From 446c073f886c28ff389a8aeb6542f08f0ed8c3d0 Mon Sep 17 00:00:00 2001 From: Kirill Terekhov Date: Fri, 17 Apr 2015 15:11:04 -0700 Subject: [PATCH] Sync changes Fixed bug in auto-differentiation that would calculate incorrect weights for derivatives in ad_val(). Adapted several geometrical and adjacency retrieval algorithms to handle broken conformity of the grid during modification, more attention to the problem is needed. Added visual studio debugger helpers. Fixed several issues with openmp support. Added openmp parallelization of BiCGStab(L) solver. Fixed issues that files overwritten with MPI_File* functionality may contain previous information. Fixed a bug in OctreeCutcell, added another example for it. Fixed a bug in collective operations in mesh class. --- .gitignore | 1 + CMakeLists.txt | 7 + Debuggers/VisualStudio/autoexp.dat | 87 ++ Debuggers/VisualStudio/readme.txt | 7 + Debuggers/readme.txt | 1 + autodiff.cpp | 2 +- cell.cpp | 34 +- examples/MatSolve/main.cpp | 5 +- examples/OctreeCutcell/CMakeLists.txt | 4 +- examples/OctreeCutcell/main.cpp | 19 +- examples/OctreeCutcell/obj.cpp | 12 +- examples/OctreeCutcell/octgrid.cpp | 22 +- examples/OctreeCutcell/octgrid.h | 2 +- .../OctreeCutcell/oil_obj2/mat/layer1.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer1.obj | 155 +++ .../OctreeCutcell/oil_obj2/mat/layer10.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer10.obj | 163 +++ .../OctreeCutcell/oil_obj2/mat/layer11.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer11.obj | 163 +++ .../OctreeCutcell/oil_obj2/mat/layer12.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer12.obj | 182 ++++ .../OctreeCutcell/oil_obj2/mat/layer13.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer13.obj | 144 +++ .../OctreeCutcell/oil_obj2/mat/layer14.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer14.obj | 144 +++ .../OctreeCutcell/oil_obj2/mat/layer15.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer15.obj | 163 +++ .../OctreeCutcell/oil_obj2/mat/layer16.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer16.obj | 163 +++ .../OctreeCutcell/oil_obj2/mat/layer17.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer17.obj | 144 +++ .../OctreeCutcell/oil_obj2/mat/layer18.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer18.obj | 182 ++++ .../OctreeCutcell/oil_obj2/mat/layer19.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer19.obj | 144 +++ .../OctreeCutcell/oil_obj2/mat/layer2.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer2.obj | 163 +++ .../OctreeCutcell/oil_obj2/mat/layer3.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer3.obj | 163 +++ .../OctreeCutcell/oil_obj2/mat/layer4.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer4.obj | 144 +++ .../OctreeCutcell/oil_obj2/mat/layer5.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer5.obj | 163 +++ .../OctreeCutcell/oil_obj2/mat/layer6.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer6.obj | 155 +++ .../OctreeCutcell/oil_obj2/mat/layer7.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer7.obj | 166 +++ .../OctreeCutcell/oil_obj2/mat/layer8.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer8.obj | 144 +++ .../OctreeCutcell/oil_obj2/mat/layer9.mtl | 9 + .../OctreeCutcell/oil_obj2/mat/layer9.obj | 144 +++ .../OctreeCutcell/oil_obj2/proj/layer0.mtl | 9 + .../OctreeCutcell/oil_obj2/proj/layer0.obj | 576 +++++++++++ .../OctreeCutcell/oil_obj2/proj/layer1.mtl | 9 + .../OctreeCutcell/oil_obj2/proj/layer1.obj | 720 +++++++++++++ .../OctreeCutcell/oil_obj2/proj/layer2.mtl | 9 + .../OctreeCutcell/oil_obj2/proj/layer2.obj | 720 +++++++++++++ .../OctreeCutcell/oil_obj2/proj/layer3.mtl | 9 + .../OctreeCutcell/oil_obj2/proj/layer3.obj | 720 +++++++++++++ geometry.cpp | 165 +-- inmost_mesh.h | 19 +- inmost_solver.h | 14 +- mesh.cpp | 4 +- mesh_file.cpp | 4 + mesh_parallel.cpp | 18 +- modify.cpp | 2 +- solver.cpp | 230 +++-- solver_bcgsl.hpp | 948 +++++++++++------- solver_ddpqiluc2.cpp | 111 +- solver_ilu2.hpp | 37 +- tests/solver_test000/CMakeLists.txt | 11 + tests/solver_test000/main.cpp | 1 + tests/solver_test001/main.cpp | 4 +- tests/solver_test002/CMakeLists.txt | 12 +- tests/solver_test002/main.cpp | 1 + 75 files changed, 7069 insertions(+), 647 deletions(-) create mode 100644 Debuggers/VisualStudio/autoexp.dat create mode 100644 Debuggers/VisualStudio/readme.txt create mode 100644 Debuggers/readme.txt create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer1.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer1.obj create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer10.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer10.obj create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer11.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer11.obj create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer12.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer12.obj create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer13.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer13.obj create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer14.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer14.obj create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer15.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer15.obj create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer16.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer16.obj create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer17.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer17.obj create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer18.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer18.obj create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer19.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer19.obj create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer2.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer2.obj create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer3.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer3.obj create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer4.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer4.obj create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer5.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer5.obj create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer6.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer6.obj create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer7.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer7.obj create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer8.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer8.obj create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer9.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/mat/layer9.obj create mode 100644 examples/OctreeCutcell/oil_obj2/proj/layer0.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/proj/layer0.obj create mode 100644 examples/OctreeCutcell/oil_obj2/proj/layer1.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/proj/layer1.obj create mode 100644 examples/OctreeCutcell/oil_obj2/proj/layer2.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/proj/layer2.obj create mode 100644 examples/OctreeCutcell/oil_obj2/proj/layer3.mtl create mode 100644 examples/OctreeCutcell/oil_obj2/proj/layer3.obj diff --git a/.gitignore b/.gitignore index 763f9e4..0d96310 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ tests/solver_test001/matrices solver_mtiluc2.cpp solver_mtiluc2.hpp +solver_mtilu2.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f59b8a..4eb8c66 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,13 @@ else() set(HAVE_SOLVER_MPTILUC2 FALSE) endif() +if( EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/solver_mtilu2.hpp" ) + add_definitions(-DHAVE_SOLVER_MPTILU2) + set(HAVE_SOLVER_MPTILU2 TRUE) + list(APPEND SOURCE solver_mtilu2.hpp) +else() + set(HAVE_SOLVER_MPTILU2 FALSE) +endif() add_library(inmost STATIC ${SOURCE} ${HEADER}) diff --git a/Debuggers/VisualStudio/autoexp.dat b/Debuggers/VisualStudio/autoexp.dat new file mode 100644 index 0000000..3c5166a --- /dev/null +++ b/Debuggers/VisualStudio/autoexp.dat @@ -0,0 +1,87 @@ + +;------------------------------------------------------------------------------ +; INMOST::interval +;------------------------------------------------------------------------------ + +INMOST::interval<*,*>{ + preview ( + #( + "[", + $e.end_index-$e.beg_index, + "](", + #array( + expr: $e.array[$i+$e.beg_index], + size: $e.end_index-$e.beg_index + ), + ")" + ) + ) + children ( + #( + #([size] : $e.end_index-$e.beg_index), + #([first] : $e.beg_index), + #([last] : $e.end_index), + #array( + expr: $e.array[$i+$e.beg_index], + size: $e.end_index-$e.beg_index + ) + ) + ) +} + +;------------------------------------------------------------------------------ +; INMOST::dynarray +;------------------------------------------------------------------------------ + +INMOST::dynarray<*,*>{ + preview ( + #( + "[", + $e.pend-$e.pbegin, + "](", + #array( + expr: $e.pbegin[$i], + size: $e.pend-$e.pbegin + ), + ")" + ) + ) + children ( + #( + #([size] : $e.pend-$e.pbegin), + #([capacity] : $e.preserved-$e.pbegin), + #array( + expr: $e.pbegin[$i], + size: $e.pend-$e.pbegin + ) + ) + ) +} + +;------------------------------------------------------------------------------ +; INMOST::shell +;------------------------------------------------------------------------------ + +INMOST::shell<*>{ + preview ( + #( + "[", + *$e.m_size, + "](", + #array( + expr: (*$e.m_arr)[$i], + size: *$e.m_size + ), + ")" + ) + ) + children ( + #( + #([size] : *$e.m_size), + #array( + expr: (*$e.m_arr)[$i], + size: *$e.m_size + ) + ) + ) +} diff --git a/Debuggers/VisualStudio/readme.txt b/Debuggers/VisualStudio/readme.txt new file mode 100644 index 0000000..cb57eba --- /dev/null +++ b/Debuggers/VisualStudio/readme.txt @@ -0,0 +1,7 @@ +Copy and paste contents of autoexp.dat into +C:\Program Files (x86)\Microsoft Visual Studio 10.0\Common7\Packages\Debugger\autoexp.dat +or the directry where Visual Studio was installed. + +In future capabilities will grove. + +Tested with Visual studio 2010. diff --git a/Debuggers/readme.txt b/Debuggers/readme.txt new file mode 100644 index 0000000..102ba10 --- /dev/null +++ b/Debuggers/readme.txt @@ -0,0 +1 @@ +This folder contains helper instructions to facilitate representation of custom classes in debuggers. \ No newline at end of file diff --git a/autodiff.cpp b/autodiff.cpp index 985a974..0d79bf9 100644 --- a/autodiff.cpp +++ b/autodiff.cpp @@ -294,7 +294,7 @@ namespace INMOST return ret*var.coef; case AD_VAL: lval = DerivativePrecompute(*var.left, e, values, user_data); - rval = Evaluate(*var.left,e,user_data); + rval = Evaluate(*var.right,e,user_data); values.push_back(rval); return lval*var.coef; } diff --git a/cell.cpp b/cell.cpp index 1fca91a..7e75d54 100644 --- a/cell.cpp +++ b/cell.cpp @@ -341,17 +341,22 @@ namespace INMOST { adj_type const & lc = m->LowConn(GetHandle()); aret.reserve(lc.size()); + if( lc.size() < 1 ) return aret; HandleType q = lc[0]; //edge 0 adj_type const & qlc = m->LowConn(q); - aret.push_back(qlc[0]); //node 0 - aret.push_back(qlc[1]); //node 1 + if( qlc.size() > 0 ) aret.push_back(qlc[0]); //node 0 + if( qlc.size() > 1 ) aret.push_back(qlc[1]); //node 1 + if( lc.size() < 2 ) return aret; HandleType r = lc[1]; //edge 1 adj_type const & rlc = m->LowConn(r); - if( aret.data()[0] == rlc[0] || aret.data()[0] == rlc[1] ) + if( aret.size() == 2 && rlc.size() == 2 ) { - HandleType temp = aret.data()[0]; - aret.data()[0] = aret.data()[1]; - aret.data()[1] = temp; + if( aret.data()[0] == rlc[0] || aret.data()[0] == rlc[1] ) + { + HandleType temp = aret.data()[0]; + aret.data()[0] = aret.data()[1]; + aret.data()[1] = temp; + } } adj_type::size_type it = 1, iend = lc.size()-1; while(it < iend) //loop over edges @@ -392,22 +397,27 @@ namespace INMOST adj_type const & lc = m->LowConn(GetHandle()); aret.reserve(lc.size()); i = m->getNext(lc.data(),static_cast(lc.size()),i,hm); + if( i == lc.size() ) return aret; HandleType q = lc[i]; //edge 0 adj_type const & qlc = m->LowConn(q); k = m->getNext(qlc.data(),static_cast(qlc.size()),k,hm); - aret.push_back(qlc[k]); //node 0 + if( k != qlc.size() ) aret.push_back(qlc[k]); //node 0 k = m->getNext(qlc.data(),static_cast(qlc.size()),k,hm); - aret.push_back(qlc[k]); //node 1 + if( k != qlc.size() ) aret.push_back(qlc[k]); //node 1 i = m->getNext(lc.data(),static_cast(lc.size()),i,hm); + if( i == lc.size() ) return aret; HandleType r = lc[i]; //edge 1 adj_type const & rlc = m->LowConn(r); k1 = m->getNext(rlc.data(),static_cast(rlc.size()),k1,hm); k2 = m->getNext(rlc.data(),static_cast(rlc.size()),k1,hm); - if( aret.data()[0] == rlc[k1] || aret.data()[0] == rlc[k2] ) + if( k1 != rlc.size() && k2 != rlc.size() && aret.size() == 2 ) { - HandleType temp = aret.data()[0]; - aret.data()[0] = aret.data()[1]; - aret.data()[1] = temp; + if( aret.data()[0] == rlc[k1] || aret.data()[0] == rlc[k2] ) + { + HandleType temp = aret.data()[0]; + aret.data()[0] = aret.data()[1]; + aret.data()[1] = temp; + } } adj_type::size_type it = 1, iend = lc.size()-1; while(it < iend) if( !m->GetMarker(lc[it],hm) ) //loop over edges diff --git a/examples/MatSolve/main.cpp b/examples/MatSolve/main.cpp index 0570c68..a669ad6 100644 --- a/examples/MatSolve/main.cpp +++ b/examples/MatSolve/main.cpp @@ -30,6 +30,7 @@ int main(int argc, char ** argv) case 6: type = Solver::Trilinos_ML; break; case 7: type = Solver::ANI; break; case 8: type = Solver::INNER_MPTILUC; break; + case 9: type = Solver::INNER_MPTILU2; break; } Solver::Initialize(&argc,&argv,argc > 4 ? argv[4] : NULL); // Initialize the linear solver in accordance with args { @@ -81,8 +82,8 @@ int main(int argc, char ** argv) s.SetParameterEnum("adapt_ddpq_tolerance",0); s.SetParameterReal("drop_tolerance",1.0e-2); - s.SetParameterReal("reuse_tolerance",1.0e-3); - s.SetParameterReal("ddpq_tolerance",0.4); + s.SetParameterReal("reuse_tolerance",1.0e-4); + s.SetParameterReal("ddpq_tolerance",0.0); s.SetParameterEnum("condition_estimation",1); diff --git a/examples/OctreeCutcell/CMakeLists.txt b/examples/OctreeCutcell/CMakeLists.txt index 253db0c..1586d03 100644 --- a/examples/OctreeCutcell/CMakeLists.txt +++ b/examples/OctreeCutcell/CMakeLists.txt @@ -46,5 +46,5 @@ else(OPENGL_FOUND) message("OpenGL not found") endif(OPENGL_FOUND) -install(TARGETS OctreeCutcell EXPORT inmost-targets RUNTIME DESTINATION bin) - +install(TARGETS OctreeCutcell EXPORT inmost-targets RUNTIME DESTINATION bin/OctreeCutcell) +install(DIRECTORY Obj EXPORT inmost-targets DESTINATION bin/OctreeCutcell) diff --git a/examples/OctreeCutcell/main.cpp b/examples/OctreeCutcell/main.cpp index b178897..8c43d3e 100644 --- a/examples/OctreeCutcell/main.cpp +++ b/examples/OctreeCutcell/main.cpp @@ -843,22 +843,33 @@ int main(int argc, char ** argv) InitObj(); { std::vector layers; - for(int i = 0; i < 3; i++) + for(int i = 1; i <= 3; i++) { std::stringstream name; - name << "Obj/bound" << i+1 << ".obj"; + //name << "Obj/bound" << i+1 << ".obj"; + name << "oil_obj2/proj/layer" << i << ".obj"; layers.push_back(name.str()); } make_proj.ReadLayers(layers); } { std::vector layers; - for(int i = 0; i < 6; i++) + for(int i = 1; i <= 19; i++) + { + std::stringstream name; + //name << "Obj/rt" << i+1 << ".obj"; + name << "oil_obj2/mat/layer" << i << ".obj"; + layers.push_back(name.str()); + } + /* + for(int i = 1; i <= 3; i++) { std::stringstream name; - name << "Obj/rt" << i+1 << ".obj"; + //name << "Obj/rt" << i+1 << ".obj"; + name << "oil_obj2/proj/layer" << i << ".obj"; layers.push_back(name.str()); } + */ get_type.ReadLayers(layers); } diff --git a/examples/OctreeCutcell/obj.cpp b/examples/OctreeCutcell/obj.cpp index 76bd08e..b39918a 100644 --- a/examples/OctreeCutcell/obj.cpp +++ b/examples/OctreeCutcell/obj.cpp @@ -22,7 +22,7 @@ char * nowarn2 = NULL; //~ extern Projection make_proj; -bool swapyz = false; +bool swapyz = true; struct ObjVertex @@ -544,6 +544,7 @@ int ReadObj(char * file) } n = objs + cur; strcpy(n->filename,file); + printf("Reading %s\n",file); // nowarn2 = getcwd(n->filedir,1024); FILE * f = fopen(file,"r"); if( f == NULL ) @@ -805,6 +806,14 @@ int ReadObj(char * file) &n->verts[n->nverts].v[0], &n->verts[n->nverts].v[1], &n->verts[n->nverts].v[2]); + + n->verts[n->nverts].v[0] /= 99.5; + n->verts[n->nverts].v[1] /= 32; + n->verts[n->nverts].v[2] /= 99.5; + n->verts[n->nverts].v[0] -= 0.001; + n->verts[n->nverts].v[1] -= 0.001; + n->verts[n->nverts].v[2] += 0.001; + n->verts[n->nverts].v[2] *= -1; if( swapyz ) { double temp = n->verts[n->nverts].v[1]; @@ -955,6 +964,7 @@ int ReadObj(char * file) if( n->minz > n->pols[i].minz ) n->minz = n->pols[i].minz; } + printf("bounds: %g:%g %g:%g %g:%g\n",n->minx,n->maxx,n->miny,n->maxy,n->minz,n->maxz); kdtree_build(n->t,n->p,n->npols,0); /* double center[3]; diff --git a/examples/OctreeCutcell/octgrid.cpp b/examples/OctreeCutcell/octgrid.cpp index d818dfc..2f574c7 100644 --- a/examples/OctreeCutcell/octgrid.cpp +++ b/examples/OctreeCutcell/octgrid.cpp @@ -156,22 +156,22 @@ int cellAround(struct grid * g, int m, int side, int neighbours[1<<(DIM-1)]) void vertDestroyINMOST(struct grid *g, int m) { //printf("%s\n",__FUNCTION__); - if( g->verts[m].mv.isValid() && !g->verts[m].mv.Hidden() ) + if( g->verts[m].mv != InvalidHandle() ) { - g->verts[m].mv->Delete(); + g->mesh->Delete(g->verts[m].mv); } } void vertCreateINMOST(struct grid * g, int m) { Storage::real xyz[3]; - if( g->verts[m].mv.isValid() ) return; + if( g->verts[m].mv != InvalidHandle() ) return; vertGetCoord(g,m,xyz); g->transformation(xyz); - g->verts[m].mv = g->mesh->CreateNode(xyz); - g->verts[m].mv->SetMarker(g->octree_node); + g->verts[m].mv = g->mesh->CreateNode(xyz)->GetHandle(); + g->mesh->SetMarker(g->verts[m].mv,g->octree_node); //g->verts[m].mv->Integer(g->new_marker) = 1; - g->vert_to_INMOST(g,m,g->verts[m].mv); + g->vert_to_INMOST(g,m,Node(g->mesh,g->verts[m].mv)); } void cellDestroyINMOST(struct grid * g, int m) @@ -299,13 +299,13 @@ void cellGetFaceVerts(struct grid * g, int m, int side, int * nverts, Node verts for(i = 0; i < 4; i++) { mid[1+faces[0]] = false; - verts[(faces[1+faces[0]] = (*nverts))] = g->verts[g->cells[m].vertexes[nvf[side][i]]].mv; + verts[(faces[1+faces[0]] = (*nverts))] = Node(g->mesh,g->verts[g->cells[m].vertexes[nvf[side][i]]].mv); faces[0]++; (*nverts)++; if( (middle = vertGetMiddle(g,m,side,i)) != -1) { mid[1+faces[0]] = true; - verts[(faces[1+faces[0]] = (*nverts))] = g->verts[middle].mv; + verts[(faces[1+faces[0]] = (*nverts))] = Node(g->mesh,g->verts[middle].mv); faces[0]++; (*nverts)++; @@ -1204,7 +1204,7 @@ void cellCreateINMOST(struct grid * g, int m, bool print = false) centers[0] = matcenter(mat0d,&n0->Coords()[0],0); centers[1] = matcenter(mat2d,&n2->Coords()[0],0); //make iterations to find correct position of the node - int max = 8; + int max = 16; int found = 0; for(int q = 0; q < max; q++) { @@ -2357,7 +2357,7 @@ exit_work: if( !success ) for(j = 0; j < l; j++) if( node_in_face[j].isValid() ) centers.push_back(matcenter(node_in_face[j]->IntegerArray(g->materials),&node_in_face[j]->Coords()[0])); //make iterations to find correct position of the node - int max = 4; + int max = 8; Storage::real div = 1.0/centers.size(); for(int q = 0; q < max; q++) { @@ -3485,7 +3485,7 @@ void cellInit(struct grid * g, int cell, int parent) void vertInit(struct grid * g, int vert) { int i; - g->verts[vert].mv = InvalidNode(); + g->verts[vert].mv = InvalidHandle(); for(i = 0; i < 1<verts[vert].env[i] = -1; } diff --git a/examples/OctreeCutcell/octgrid.h b/examples/OctreeCutcell/octgrid.h index ae0da3e..ccb534e 100644 --- a/examples/OctreeCutcell/octgrid.h +++ b/examples/OctreeCutcell/octgrid.h @@ -65,7 +65,7 @@ struct vert { int busy; int env[1< nodes = Element(this,e)->getNodes(); - Storage::real c[3]; - vec_diff(nodes[0]->Coords().data(),nodes[1]->Coords().data(),c,mdim); - *ret = vec_len(c,mdim); + if( nodes.size() > 1 ) + { + Storage::real c[3]; + vec_diff(nodes[0]->Coords().data(),nodes[1]->Coords().data(),c,mdim); + *ret = vec_len(c,mdim); + } + else *ret = 0; //~ if( isnan(*ret) || fabs(*ret) < 1e-15 ) throw -1; break; } case 2: //area of face { ElementArray nodes = Element(this,e)->getNodes(); - real x[3] = {0,0,0}; - Storage::real_array x0 = nodes[0].Coords(); - for(ElementArray::size_type i = 1; i < nodes.size()-1; i++) + if( nodes.size() > 2 ) { - Storage::real_array v1 = nodes[i].Coords(); - Storage::real_array v2 = nodes[i+1].Coords(); - if( mdim == 3 ) + real x[3] = {0,0,0}; + Storage::real_array x0 = nodes[0].Coords(); + for(ElementArray::size_type i = 1; i < nodes.size()-1; i++) { - x[0] += (v1[1]-x0[1])*(v2[2]-x0[2]) - (v1[2]-x0[2])*(v2[1]-x0[1]); - x[1] += (v1[2]-x0[2])*(v2[0]-x0[0]) - (v1[0]-x0[0])*(v2[2]-x0[2]); + Storage::real_array v1 = nodes[i].Coords(); + Storage::real_array v2 = nodes[i+1].Coords(); + if( mdim == 3 ) + { + x[0] += (v1[1]-x0[1])*(v2[2]-x0[2]) - (v1[2]-x0[2])*(v2[1]-x0[1]); + x[1] += (v1[2]-x0[2])*(v2[0]-x0[0]) - (v1[0]-x0[0])*(v2[2]-x0[2]); + } + x[2] += (v1[0]-x0[0])*(v2[1]-x0[1]) - (v1[1]-x0[1])*(v2[0]-x0[0]); } - x[2] += (v1[0]-x0[0])*(v2[1]-x0[1]) - (v1[1]-x0[1])*(v2[0]-x0[0]); - } - *ret = sqrt(x[0]*x[0]+x[1]*x[1]+x[2]*x[2])*0.5; + *ret = sqrt(x[0]*x[0]+x[1]*x[1]+x[2]*x[2])*0.5; + } else *ret = 0; //~ if( isnan(*ret) || fabs(*ret) < 1e-15 ) throw -1; break; } @@ -596,24 +603,27 @@ namespace INMOST } *ret /= 6.0; */ - Storage::real fcnt[3], fnrm[3];// , area; - for(ElementArray::size_type i = 0; i < faces.size(); i++) + if( faces.size() > 3 ) { - faces[i]->Centroid(fcnt); - faces[i]->OrientedNormal(me,fnrm); - /* - area = sqrt(vec_dot_product(fnrm,fnrm,mdim)); - if( area > 0 ) + Storage::real fcnt[3], fnrm[3];// , area; + for(ElementArray::size_type i = 0; i < faces.size(); i++) { - fnrm[0] /= area; - fnrm[1] /= area; - fnrm[2] /= area; - *ret += vec_dot_product(fcnt,fnrm,mdim) * area; + faces[i]->Centroid(fcnt); + faces[i]->OrientedNormal(me,fnrm); + /* + area = sqrt(vec_dot_product(fnrm,fnrm,mdim)); + if( area > 0 ) + { + fnrm[0] /= area; + fnrm[1] /= area; + fnrm[2] /= area; + *ret += vec_dot_product(fcnt,fnrm,mdim) * area; + } + */ + *ret += vec_dot_product(fcnt,fnrm,mdim); } - */ - *ret += vec_dot_product(fcnt,fnrm,mdim); + *ret /= 3.0; } - *ret /= 3.0; /* if( *ret < 0 || (*ret) != (*ret) ) { @@ -759,10 +769,18 @@ namespace INMOST if( edim == 1 ) { ElementArray n = Element(this,e)->getNodes(); - Storage::real_array a = n[0].Coords(); - Storage::real_array b = n[1].Coords(); - for(integer j = 0; j < dim; j++) - ret[j] = (a[j] + b[j])*0.5; + if( n.size() == 2 ) + { + Storage::real_array a = n[0].Coords(); + Storage::real_array b = n[1].Coords(); + for(integer j = 0; j < dim; j++) + ret[j] = (a[j] + b[j])*0.5; + } + else if( n.size() == 1 ) + { + Storage::real_array a = n[0].Coords(); + for(integer j = 0; j < dim; j++) ret[j] = a[j]; + } } else if( edim == 2 ) { @@ -776,37 +794,49 @@ namespace INMOST //~ } //~ else //~ { - Storage::real_array x0 = nodes[0].Coords(); - for(ElementArray::size_type i = 1; i < nodes.size()-1; i++) + if( nodes.size() > 2 ) { - Storage::real_array v1 = nodes[i].Coords(); - Storage::real_array v2 = nodes[i+1].Coords(); - if( mdim == 3 ) + Storage::real_array x0 = nodes[0].Coords(); + for(ElementArray::size_type i = 1; i < nodes.size()-1; i++) { - x[0] += (v1[1]-x0[1])*(v2[2]-x0[2]) - (v1[2]-x0[2])*(v2[1]-x0[1]); - x[1] += (v1[2]-x0[2])*(v2[0]-x0[0]) - (v1[0]-x0[0])*(v2[2]-x0[2]); + Storage::real_array v1 = nodes[i].Coords(); + Storage::real_array v2 = nodes[i+1].Coords(); + if( mdim == 3 ) + { + x[0] += (v1[1]-x0[1])*(v2[2]-x0[2]) - (v1[2]-x0[2])*(v2[1]-x0[1]); + x[1] += (v1[2]-x0[2])*(v2[0]-x0[0]) - (v1[0]-x0[0])*(v2[2]-x0[2]); + } + x[2] += (v1[0]-x0[0])*(v2[1]-x0[1]) - (v1[1]-x0[1])*(v2[0]-x0[0]); } - x[2] += (v1[0]-x0[0])*(v2[1]-x0[1]) - (v1[1]-x0[1])*(v2[0]-x0[0]); + s = sqrt(x[0]*x[0]+x[1]*x[1]+x[2]*x[2]); + x[0] /= s; x[1] /= s; x[2] /= s; //here we obtain the unit normal + //~ } + //here we compute the center + Storage::real_array v0 = nodes[0].Coords(); + for(ElementArray::size_type j = 1; j < nodes.size()-1; j++) + { + Storage::real_array v1 = nodes[j].Coords(); + Storage::real_array v2 = nodes[j+1].Coords(); + for(integer k = 0; k < mdim; k++) + { + x1[k] = v0[k] - v1[k]; + x2[k] = v0[k] - v2[k]; + } + d = det3v(x1,x2,x); //here we use unit normal + for(integer k = 0; k < mdim; k++) + ret[k] += d*(v0[k]+v1[k]+v2[k]); + } + for(integer k = 0; k < mdim; k++) ret[k] /= 3.0 * s; } - s = sqrt(x[0]*x[0]+x[1]*x[1]+x[2]*x[2]); - x[0] /= s; x[1] /= s; x[2] /= s; //here we obtain the unit normal - //~ } - //here we compute the center - Storage::real_array v0 = nodes[0].Coords(); - for(ElementArray::size_type j = 1; j < nodes.size()-1; j++) + else { - Storage::real_array v1 = nodes[j].Coords(); - Storage::real_array v2 = nodes[j+1].Coords(); - for(integer k = 0; k < mdim; k++) + for(ElementArray::size_type i = 0; i < nodes.size(); i++) { - x1[k] = v0[k] - v1[k]; - x2[k] = v0[k] - v2[k]; + Storage::real_array c = nodes[i].Coords(); + for(integer k = 0; k < mdim; k++) ret[k] += c[k]; } - d = det3v(x1,x2,x); //here we use unit normal - for(integer k = 0; k < mdim; k++) - ret[k] += d*(v0[k]+v1[k]+v2[k]); + for(integer k = 0; k < mdim; k++) ret[k] /= static_cast(nodes.size()); } - for(integer k = 0; k < mdim; k++) ret[k] /= 3.0 * s; } else if( edim == 3 ) { @@ -875,19 +905,22 @@ namespace INMOST else if( edim == 1 )//&& mdim == 2 ) { ElementArray nodes = Element(this,e)->getNodes(); - Storage::real_array a = nodes[0].Coords(); - Storage::real_array b = nodes[1].Coords(); - ret[0] = b[1] - a[1]; - ret[1] = a[0] - b[0]; - Storage::real l = sqrt(ret[0]*ret[0]+ret[1]*ret[1]); - if( l ) + if( nodes.size() > 1 ) { - ret[0] /= l; - ret[1] /= l; + Storage::real_array a = nodes[0].Coords(); + Storage::real_array b = nodes[1].Coords(); + ret[0] = b[1] - a[1]; + ret[1] = a[0] - b[0]; + Storage::real l = sqrt(ret[0]*ret[0]+ret[1]*ret[1]); + if( l ) + { + ret[0] /= l; + ret[1] /= l; + } + l = sqrt((a[0]-b[0])*(a[0]-b[0])+(a[1]-b[1])*(a[1]-b[1])); + ret[0] *= l; + ret[1] *= l; } - l = sqrt((a[0]-b[0])*(a[0]-b[0])+(a[1]-b[1])*(a[1]-b[1])); - ret[0] *= l; - ret[1] *= l; } } break; diff --git a/inmost_mesh.h b/inmost_mesh.h index d6b9b34..d408bfa 100644 --- a/inmost_mesh.h +++ b/inmost_mesh.h @@ -9,7 +9,6 @@ #define __NDT 3 #define __NET 6 - namespace INMOST { @@ -101,7 +100,7 @@ namespace INMOST static const TopologyCheck PROHIBIT_POLYGON = 0x00100000; //done//allow only known types of elements: Tet,Quad, Line static const TopologyCheck PROHIBIT_MULTIPOLYGON = 0x00200000; //done//(needs NEED_TEST_CLOSURE) produce error if faces of cell are not closed, or cell is folded static const TopologyCheck PROHIBIT_POLYHEDRON = 0x00400000; //done//allow only known types of elements: Tet,Hex,Prism,Pyramid - static const TopologyCheck FACE_EDGES_ORDER = 0x00800000; //not implemented(implement CheckEdgeOrder,FixEdgeOrder, class Cell,Face)//edges of the face should form one closed loop + static const TopologyCheck FACE_EDGES_ORDER = 0x00800000; //done//edges of the face should form one closed loop static const TopologyCheck PROHIBIT_CONCAVE_FACE = 0x01000000; //not implemented//don't allow concave face static const TopologyCheck PROHIBIT_CONCAVE_CELL = 0x02000000; //not implemented//don't allow concave cell static const TopologyCheck PROHIBIT_NONSTAR_FACE = 0x04000000; //not implemented//don't allow non-star shaped face @@ -2039,7 +2038,13 @@ namespace INMOST /// Set a marker on the element represented by handle. /// @param h element handle /// @param n stores byte number and byte bit mask that represent marker - void SetMarker (HandleType h,MarkerType n) {static_cast(MGetDenseLink(h,MarkersTag()))[n >> MarkerShift] |= static_cast(n & MarkerMask);} + void SetMarker (HandleType h,MarkerType n) + { +#if defined(USE_OMP) +#pragma omp atomic +#endif + static_cast(MGetDenseLink(h,MarkersTag()))[n >> MarkerShift] |= static_cast(n & MarkerMask); + } /// Set a marker on the set of handles. /// @param h set of handles /// @param n number of handles @@ -2053,7 +2058,13 @@ namespace INMOST /// Remove the marker from the element. /// @param h element handle /// @param n stores byte number and byte bit mask that represent marker - void RemMarker (HandleType h,MarkerType n) {static_cast(MGetDenseLink(h,MarkersTag()))[n >> MarkerShift] &= ~static_cast(n & MarkerMask);} + void RemMarker (HandleType h,MarkerType n) + { +#if defined(USE_OMP) +#pragma omp atomic +#endif + static_cast(MGetDenseLink(h,MarkersTag()))[n >> MarkerShift] &= ~static_cast(n & MarkerMask); + } /// Remove the marker from the set of handles. /// @param h set of handles /// @param n number of handles diff --git a/inmost_solver.h b/inmost_solver.h index 0df504f..18f7670 100644 --- a/inmost_solver.h +++ b/inmost_solver.h @@ -44,6 +44,7 @@ namespace INMOST INNER_ILU2, ///< inner Solver based on BiCGStab(L) solver with second order ILU factorization as preconditioner. INNER_DDPQILUC, ///< inner Solver based on BiCGStab(L) solver with second order Crout-ILU with inversed-based condition estimation and unsymmetric reordering for diagonal dominance as preconditioner. INNER_MPTILUC, ///< inner Solver based on BiCGStab(L) solver with second order Crout-ILU with inversed-based condition estimation and maximum product transversal reordering as preconditioner. + INNER_MPTILU2, ///< inner Solver based on BiCGStab(L) solver with second order ILU and maximum product transversal reordering as preconditioner. Trilinos_Aztec, ///< external Solver AztecOO from Trilinos package Trilinos_Belos, ///< external Solver Belos from Trilinos package, currently without preconditioner Trilinos_ML, ///< external Solver AztecOO with ML preconditioner @@ -131,8 +132,9 @@ namespace INMOST //~ void BeginSequentialCode() {for(int i = 0; i < rank; i++) MPI_Barrier(comm);} //~ void EndSequentialCode() {for(int i = rank; i < size; i++) MPI_Barrier(comm);} - /// Get the scalar product of the specified interval of the distributed vector. - INMOST_DATA_REAL_TYPE ScalarProd(Vector const & left, Vector const & right, INMOST_DATA_ENUM_TYPE index_begin, INMOST_DATA_ENUM_TYPE index_end) const; + // Get the scalar product of the specified interval of the distributed vector. + // Conflicts with OpenMP, should not be used in future + //void ScalarProd(Vector const & left, Vector const & right, INMOST_DATA_ENUM_TYPE index_begin, INMOST_DATA_ENUM_TYPE index_end, INMOST_DATA_REAL_TYPE & sum) const; }; /// Distributed vector class. @@ -181,7 +183,7 @@ namespace INMOST /// Get the communicator which the vector is associated with. INMOST_MPI_Comm GetCommunicator() const {return comm;} - + void Swap(Solver::Vector & other) {data.swap(other.data); name.swap(other.name); std::swap(is_parallel,other.is_parallel); std::swap(comm,other.comm);} /// Save the distributed vector to a single data file using parallel MPI I/O. @@ -350,6 +352,12 @@ namespace INMOST /// that allow for the modification of individual entries. /// @param size New size of the row. void Resize(INMOST_DATA_ENUM_TYPE size) {data.resize(size);} + + void Print() + { + for(iterator it = Begin(); it != End(); ++it) std::cout << "(" << it->first << "," << it->second << ") "; + std::cout << std::endl; + } }; diff --git a/mesh.cpp b/mesh.cpp index 80b58e4..c84cdfe 100644 --- a/mesh.cpp +++ b/mesh.cpp @@ -1528,7 +1528,7 @@ namespace INMOST void Mesh::UntieElement(integer etypenum, integer ID) { #if defined(USE_OMP) -#pragma omp critical [links_interraction] +#pragma omp critical (links_interraction) #endif { integer ADDR = links[etypenum][ID]; @@ -1547,7 +1547,7 @@ namespace INMOST { integer ID = -1, ADDR = -1; #if defined(USE_OMP) -#pragma omp critical [links_interraction] +#pragma omp critical (links_interraction) #endif { INMOST_DATA_ENUM_TYPE old_size = GetArrayCapacity(etypenum), new_size; diff --git a/mesh_file.cpp b/mesh_file.cpp index 7bea28e..8a13c84 100644 --- a/mesh_file.cpp +++ b/mesh_file.cpp @@ -4626,6 +4626,10 @@ safe_output: { MPI_File fh; MPI_Status stat; + REPORT_MPI(ierr = MPI_File_open(GetCommunicator(),const_cast(File.c_str()), MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh)); + if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); + REPORT_MPI(ierr = MPI_File_close(&fh)); + if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); REPORT_MPI(ierr = MPI_File_open(GetCommunicator(),const_cast(File.c_str()),MPI_MODE_CREATE | MPI_MODE_WRONLY,MPI_INFO_NULL,&fh)); if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); if( GetProcessorRank() == 0 ) diff --git a/mesh_parallel.cpp b/mesh_parallel.cpp index 5be1b0e..b0710cf 100644 --- a/mesh_parallel.cpp +++ b/mesh_parallel.cpp @@ -368,7 +368,8 @@ namespace INMOST void Mesh::Integrate(Storage::real * input, Storage::integer size) { #if defined(USE_MPI) - static dynarray temp(size); + static dynarray temp; + temp.resize(size); memcpy(temp.data(),input,sizeof(Storage::real)*size); MPI_Allreduce(temp.data(),input,size,INMOST_MPI_DATA_REAL_TYPE,MPI_SUM,comm); #else//USE_MPI @@ -379,7 +380,8 @@ namespace INMOST void Mesh::Integrate(Storage::integer * input, Storage::integer size) { #if defined(USE_MPI) - static dynarray temp(size); + static dynarray temp; + temp.resize(size); memcpy(temp.data(),input,sizeof(Storage::integer)*size); MPI_Allreduce(temp.data(),input,size,INMOST_MPI_DATA_INTEGER_TYPE,MPI_SUM,comm); #else//USE_MPI @@ -440,7 +442,8 @@ namespace INMOST void Mesh::AggregateMax(Storage::real * input, Storage::integer size) { #if defined(USE_MPI) - static dynarray temp(size); + static dynarray temp; + temp.resize(size); memcpy(temp.data(),input,sizeof(Storage::real)*size); MPI_Allreduce(temp.data(),input,size,INMOST_MPI_DATA_REAL_TYPE,MPI_MAX,comm); #else//USE_MPI @@ -451,7 +454,8 @@ namespace INMOST void Mesh::AggregateMax(Storage::integer * input, Storage::integer size) { #if defined(USE_MPI) - static dynarray temp(size); + static dynarray temp; + temp.resize(size); memcpy(temp.data(),input,sizeof(Storage::integer)*size); MPI_Allreduce(temp.data(),input,size,INMOST_MPI_DATA_INTEGER_TYPE,MPI_MAX,comm); #else//USE_MPI @@ -484,7 +488,8 @@ namespace INMOST void Mesh::AggregateMin(Storage::real * input, Storage::integer size) { #if defined(USE_MPI) - static dynarray temp(size); + static dynarray temp; + temp.resize(size); memcpy(temp.data(),input,sizeof(Storage::real)*size); MPI_Allreduce(temp.data(),input,size,INMOST_MPI_DATA_REAL_TYPE,MPI_MIN,comm); #else//USE_MPI @@ -495,7 +500,8 @@ namespace INMOST void Mesh::AggregateMin(Storage::integer * input, Storage::integer size) { #if defined(USE_MPI) - static dynarray temp(size); + static dynarray temp; + temp.resize(size); memcpy(temp.data(),input,sizeof(Storage::integer)*size); MPI_Allreduce(temp.data(),input,size,INMOST_MPI_DATA_INTEGER_TYPE,MPI_MIN,comm); #else//USE_MPI diff --git a/modify.cpp b/modify.cpp index 211705c..86e0810 100644 --- a/modify.cpp +++ b/modify.cpp @@ -2128,7 +2128,7 @@ public: bool Mesh::Delete(HandleType h) { - if(Hide(h)) + if(!New(h) && Hide(h)) { if( GetElementType() != CELL ) //mark all elements that rely on this that they should be deleted { diff --git a/solver.cpp b/solver.cpp index ae5d32d..8c804aa 100644 --- a/solver.cpp +++ b/solver.cpp @@ -7,6 +7,9 @@ #if defined(HAVE_SOLVER_MPTILUC2) #include "solver_mtiluc2.hpp" #endif +#if defined(HAVE_SOLVER_MPTILU2) +#include "solver_mtilu2.hpp" +#endif #include "solver_bcgsl.hpp" #include #include @@ -35,6 +38,8 @@ #include "Teuchos_XMLParameterListHelpers.hpp" #endif +//#define USE_OMP + #define KSOLVER BCGSL_solver //#define KSOLVER BCGS_solver @@ -84,10 +89,15 @@ namespace INMOST { #if defined(USE_MPI) if( GetSize() == 1 ) return; - int ierr = 0; - dynarray temp(num); - memcpy(temp.data(),inout,sizeof(INMOST_DATA_REAL_TYPE)*num); - GUARD_MPI(MPI_Allreduce(temp.data(),inout,num,INMOST_MPI_DATA_REAL_TYPE,MPI_SUM,comm)); +#if defined(USE_OMP) +#pragma omp single +#endif + { + int ierr = 0; + dynarray temp(num); + memcpy(temp.data(),inout,sizeof(INMOST_DATA_REAL_TYPE)*num); + GUARD_MPI(MPI_Allreduce(temp.data(),inout,num,INMOST_MPI_DATA_REAL_TYPE,MPI_SUM,comm)); + } #else (void) inout; (void) num; @@ -207,11 +217,15 @@ namespace INMOST INMOST_DATA_ENUM_TYPE i = LinkedList.begin()->first, k = 0; while( i != EOL ) { - r.GetIndex(k) = i-1; - r.GetValue(k) = LinkedList[i].second; + if( LinkedList[i].second ) + { + r.GetIndex(k) = i-1; + r.GetValue(k) = LinkedList[i].second; + ++k; + } i = LinkedList[i].first; - ++k; } + r.Resize(k); } void Solver::OrderInfo::PrepareMatrix(Matrix & m, INMOST_DATA_ENUM_TYPE overlap) @@ -829,42 +843,47 @@ namespace INMOST //std::cout << __FUNCTION__ << " start" << std::endl; #if defined(USE_MPI) if( GetSize() == 1 ) return; - //use MPI_Put/MPI_Get to update vector - assert(x.isParallel()); //the vector was prepared - INMOST_DATA_ENUM_TYPE i, j = 1, k, l = 0; - int ierr; - for(i = 0; i < vector_exchange_recv[0]; i++) - { - //std::cout << GetRank() << " MPI_Irecv size " << vector_exchange_recv[j+1] << " dest " << vector_exchange_recv[j] << " tag " << vector_exchange_recv[j]*size+rank << std::endl; - GUARD_MPI(MPI_Irecv(&recv_storage[l],vector_exchange_recv[j+1],INMOST_MPI_DATA_REAL_TYPE,vector_exchange_recv[j],vector_exchange_recv[j]*size+rank,comm,&recv_requests[i])); - l += vector_exchange_recv[j+1]; - j += vector_exchange_recv[j+1] + 2; - } - j = 1, l = 0; - for(i = 0; i < vector_exchange_send[0]; i++) - { - //std::cout << GetRank() << " MPI_Isend size " << vector_exchange_send[j+1] << " dest " << vector_exchange_send[j] << " tag " << rank*size+vector_exchange_send[j] << std::endl; - for(k = 0; k < vector_exchange_send[j+1]; k++) - send_storage[l+k] = x[vector_exchange_send[k+j+2]]; - GUARD_MPI(MPI_Isend(&send_storage[l],vector_exchange_send[j+1],INMOST_MPI_DATA_REAL_TYPE,vector_exchange_send[j],rank*size+vector_exchange_send[j],comm,&send_requests[i])); - l += vector_exchange_send[j+1]; - j += vector_exchange_send[j+1] + 2; - } - if( vector_exchange_recv[0] > 0 ) +#if defined(USE_OMP) +#pragma omp single +#endif { - GUARD_MPI(MPI_Waitall(static_cast(recv_requests.size()),&recv_requests[0],MPI_STATUSES_IGNORE)); - j = 1, l = 0; + //use MPI_Put/MPI_Get to update vector + assert(x.isParallel()); //the vector was prepared + INMOST_DATA_ENUM_TYPE i, j = 1, k, l = 0; + int ierr; for(i = 0; i < vector_exchange_recv[0]; i++) { - for(k = 0; k < vector_exchange_recv[j+1]; k++) - x[vector_exchange_recv[k+j+2]] = recv_storage[l+k]; + //std::cout << GetRank() << " MPI_Irecv size " << vector_exchange_recv[j+1] << " dest " << vector_exchange_recv[j] << " tag " << vector_exchange_recv[j]*size+rank << std::endl; + GUARD_MPI(MPI_Irecv(&recv_storage[l],vector_exchange_recv[j+1],INMOST_MPI_DATA_REAL_TYPE,vector_exchange_recv[j],vector_exchange_recv[j]*size+rank,comm,&recv_requests[i])); l += vector_exchange_recv[j+1]; j += vector_exchange_recv[j+1] + 2; } - } - if( vector_exchange_send[0] > 0 ) - { - GUARD_MPI(MPI_Waitall(static_cast(send_requests.size()),&send_requests[0],MPI_STATUSES_IGNORE)); + j = 1, l = 0; + for(i = 0; i < vector_exchange_send[0]; i++) + { + //std::cout << GetRank() << " MPI_Isend size " << vector_exchange_send[j+1] << " dest " << vector_exchange_send[j] << " tag " << rank*size+vector_exchange_send[j] << std::endl; + for(k = 0; k < vector_exchange_send[j+1]; k++) + send_storage[l+k] = x[vector_exchange_send[k+j+2]]; + GUARD_MPI(MPI_Isend(&send_storage[l],vector_exchange_send[j+1],INMOST_MPI_DATA_REAL_TYPE,vector_exchange_send[j],rank*size+vector_exchange_send[j],comm,&send_requests[i])); + l += vector_exchange_send[j+1]; + j += vector_exchange_send[j+1] + 2; + } + if( vector_exchange_recv[0] > 0 ) + { + GUARD_MPI(MPI_Waitall(static_cast(recv_requests.size()),&recv_requests[0],MPI_STATUSES_IGNORE)); + j = 1, l = 0; + for(i = 0; i < vector_exchange_recv[0]; i++) + { + for(k = 0; k < vector_exchange_recv[j+1]; k++) + x[vector_exchange_recv[k+j+2]] = recv_storage[l+k]; + l += vector_exchange_recv[j+1]; + j += vector_exchange_recv[j+1] + 2; + } + } + if( vector_exchange_send[0] > 0 ) + { + GUARD_MPI(MPI_Waitall(static_cast(send_requests.size()),&send_requests[0],MPI_STATUSES_IGNORE)); + } } #else (void) x; @@ -876,66 +895,75 @@ namespace INMOST //std::cout << __FUNCTION__ << " start" << std::endl; #if defined(USE_MPI) if( GetSize() == 1 ) return; - //use MPI_Put/MPI_Get to update vector - assert(x.isParallel()); //the vector was prepared - INMOST_DATA_ENUM_TYPE i, j = 1, k, l = 0; - int ierr; - for(i = 0; i < vector_exchange_send[0]; i++) - { - //std::cout << GetRank() << " MPI_Irecv size " << vector_exchange_send[j+1] << " dest " << vector_exchange_send[j] << " tag " << vector_exchange_send[j]*size+rank << std::endl; - GUARD_MPI(MPI_Irecv(&send_storage[l],vector_exchange_send[j+1],INMOST_MPI_DATA_REAL_TYPE,vector_exchange_send[j],vector_exchange_send[j]*size+rank,comm,&send_requests[i])); - l += vector_exchange_send[j+1]; - j += vector_exchange_send[j+1] + 2; - } - j = 1, l = 0; - for(i = 0; i < vector_exchange_recv[0]; i++) - { - for(k = 0; k < vector_exchange_recv[j+1]; k++) - recv_storage[l+k] = x[vector_exchange_recv[k+j+2]]; - //std::cout << GetRank() << " MPI_Isend size " << vector_exchange_recv[j+1] << " dest " << vector_exchange_recv[j] << " tag " << rank*size+vector_exchange_recv[j] << std::endl; - GUARD_MPI(MPI_Isend(&recv_storage[l],vector_exchange_recv[j+1],INMOST_MPI_DATA_REAL_TYPE,vector_exchange_recv[j],rank*size+vector_exchange_recv[j],comm,&recv_requests[i])); - l += vector_exchange_recv[j+1]; - j += vector_exchange_recv[j+1] + 2; - } - if( vector_exchange_send[0] > 0 ) +#if defined(USE_OMP) +#pragma omp single +#endif { - //std::cout << GetRank() << " Waitall send " << send_requests.size() << std::endl; - GUARD_MPI(MPI_Waitall(static_cast(send_requests.size()),&send_requests[0],MPI_STATUSES_IGNORE)); - j = 1, l = 0; + //use MPI_Put/MPI_Get to update vector + assert(x.isParallel()); //the vector was prepared + INMOST_DATA_ENUM_TYPE i, j = 1, k, l = 0; + int ierr; for(i = 0; i < vector_exchange_send[0]; i++) { - for(k = 0; k < vector_exchange_send[j+1]; k++) - x[vector_exchange_send[k+j+2]] += send_storage[l+k]; + //std::cout << GetRank() << " MPI_Irecv size " << vector_exchange_send[j+1] << " dest " << vector_exchange_send[j] << " tag " << vector_exchange_send[j]*size+rank << std::endl; + GUARD_MPI(MPI_Irecv(&send_storage[l],vector_exchange_send[j+1],INMOST_MPI_DATA_REAL_TYPE,vector_exchange_send[j],vector_exchange_send[j]*size+rank,comm,&send_requests[i])); l += vector_exchange_send[j+1]; j += vector_exchange_send[j+1] + 2; } - } - if( vector_exchange_recv[0] > 0 ) - { - //std::cout << GetRank() << " Waitall recv " << recv_requests.size() << std::endl; - GUARD_MPI(MPI_Waitall(static_cast(recv_requests.size()),&recv_requests[0],MPI_STATUSES_IGNORE)); + j = 1, l = 0; + for(i = 0; i < vector_exchange_recv[0]; i++) + { + for(k = 0; k < vector_exchange_recv[j+1]; k++) + recv_storage[l+k] = x[vector_exchange_recv[k+j+2]]; + //std::cout << GetRank() << " MPI_Isend size " << vector_exchange_recv[j+1] << " dest " << vector_exchange_recv[j] << " tag " << rank*size+vector_exchange_recv[j] << std::endl; + GUARD_MPI(MPI_Isend(&recv_storage[l],vector_exchange_recv[j+1],INMOST_MPI_DATA_REAL_TYPE,vector_exchange_recv[j],rank*size+vector_exchange_recv[j],comm,&recv_requests[i])); + l += vector_exchange_recv[j+1]; + j += vector_exchange_recv[j+1] + 2; + } + if( vector_exchange_send[0] > 0 ) + { + //std::cout << GetRank() << " Waitall send " << send_requests.size() << std::endl; + GUARD_MPI(MPI_Waitall(static_cast(send_requests.size()),&send_requests[0],MPI_STATUSES_IGNORE)); + j = 1, l = 0; + for(i = 0; i < vector_exchange_send[0]; i++) + { + for(k = 0; k < vector_exchange_send[j+1]; k++) + x[vector_exchange_send[k+j+2]] += send_storage[l+k]; + l += vector_exchange_send[j+1]; + j += vector_exchange_send[j+1] + 2; + } + } + if( vector_exchange_recv[0] > 0 ) + { + //std::cout << GetRank() << " Waitall recv " << recv_requests.size() << std::endl; + GUARD_MPI(MPI_Waitall(static_cast(recv_requests.size()),&recv_requests[0],MPI_STATUSES_IGNORE)); + } } #else (void) x; #endif //std::cout << __FUNCTION__ << " end" << std::endl; } - - INMOST_DATA_REAL_TYPE Solver::OrderInfo::ScalarProd(Vector const & left, Vector const & right, INMOST_DATA_ENUM_TYPE index_begin, INMOST_DATA_ENUM_TYPE index_end) const + /* + void Solver::OrderInfo::ScalarProd(Vector const & left, Vector const & right, INMOST_DATA_ENUM_TYPE index_begin, INMOST_DATA_ENUM_TYPE index_end, INMOST_DATA_REAL_TYPE & sum) const { - INMOST_DATA_REAL_TYPE ret = 0; - for(INMOST_DATA_ENUM_TYPE i = index_begin; i < index_end; ++i) - ret += left[i]*right[i]; - Integrate(&ret,1); - return ret; + INMOST_DATA_INTEGER_TYPE ibeg = index_begin, iend = index_end; +#if defined(USE_OMP) +#pragma omp for reduction(+:sum) +#endif + for(INMOST_DATA_INTEGER_TYPE i = ibeg; i < iend; ++i) + { + sum += left[i]*right[i]; + } + Integrate(&sum,1); } - + */ INMOST_DATA_REAL_TYPE Solver::Row::RowVec(Solver::Vector & x) const { INMOST_DATA_REAL_TYPE ret = 0; - INMOST_DATA_ENUM_TYPE end = Size(),i; - for(i = 0; i < end; i++) ret = ret + x[GetIndex(i)]*GetValue(i); + INMOST_DATA_ENUM_TYPE end = Size(); + for(INMOST_DATA_ENUM_TYPE i = 0; i < end; i++) ret = ret + x[GetIndex(i)]*GetValue(i); return ret; } @@ -1142,6 +1170,10 @@ namespace INMOST int ierr; MPI_File fh; MPI_Status stat; + ierr = MPI_File_open(GetCommunicator(),const_cast(file.c_str()), MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); + if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); + ierr = MPI_File_close(&fh); + if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); ierr = MPI_File_open(GetCommunicator(),const_cast(file.c_str()),MPI_MODE_WRONLY | MPI_MODE_CREATE,MPI_INFO_NULL,&fh); if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); if( rank == 0 ) @@ -1151,12 +1183,10 @@ namespace INMOST //header << "% is written by INMOST" << std::endl; //header << "% by MPI_File_* api" << std::endl; header << vecsize << std::endl; - std::string header_data(header.str()); - ierr = MPI_File_write_shared(fh,&header_data[0],static_cast(header_data.size()),MPI_CHAR,&stat); + ierr = MPI_File_write_shared(fh,const_cast(header.str().c_str()),static_cast(header.str().size()),MPI_CHAR,&stat); if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); } - std::string local_data(rhs.str()); - ierr = MPI_File_write_ordered(fh,&local_data[0],static_cast(local_data.size()),MPI_CHAR,&stat); + ierr = MPI_File_write_ordered(fh,const_cast(rhs.str().c_str()),static_cast(rhs.str().size()),MPI_CHAR,&stat); if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); ierr = MPI_File_close(&fh); if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); @@ -1226,6 +1256,10 @@ namespace INMOST int ierr; MPI_File fh; MPI_Status stat; + ierr = MPI_File_open(GetCommunicator(),const_cast(file.c_str()), MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); + if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); + ierr = MPI_File_close(&fh); + if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); ierr = MPI_File_open(GetCommunicator(),const_cast(file.c_str()),MPI_MODE_WRONLY | MPI_MODE_CREATE,MPI_INFO_NULL,&fh); if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); if( rank == 0 ) @@ -1236,12 +1270,11 @@ namespace INMOST header << "% is written by INMOST" << std::endl; header << "% by MPI_File_* api" << std::endl; header << matsize << " " << matsize << " " << nonzero << std::endl; - std::string header_data(header.str()); - ierr = MPI_File_write_shared(fh,&header_data[0],static_cast(header_data.size()),MPI_CHAR,&stat); + //std::string header_data(header.str()); + ierr = MPI_File_write_shared(fh,const_cast(header.str().c_str()),static_cast(header.str().size()),MPI_CHAR,&stat); if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); } - std::string local_data(mtx.str()); - ierr = MPI_File_write_ordered(fh,&local_data[0],static_cast(local_data.size()),MPI_CHAR,&stat); + ierr = MPI_File_write_ordered(fh,const_cast(mtx.str().c_str()),static_cast(mtx.str().size()),MPI_CHAR,&stat); if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); ierr = MPI_File_close(&fh); if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); @@ -1505,7 +1538,7 @@ namespace INMOST solver_data = static_cast(problem); } #endif - if( _pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC) + if( _pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC || _pack == INNER_MPTILU2) { Method * prec; if (_pack == INNER_ILU2) @@ -1533,6 +1566,15 @@ namespace INMOST #else std::cout << "Sorry, maximum product transverse condition estimation crout-ilu preconditioner is not included in this release" << std::endl; prec = NULL; +#endif + } + else if( _pack == INNER_MPTILU2 ) + { +#if defined(__SOLVER_MTILU2__) + prec = new MTILU2_preconditioner(info); +#else + std::cout << "Sorry, maximum product transverse ilu2 preconditioner is not included in this release" << std::endl; + prec = NULL; #endif } solver_data = new KSOLVER(prec, info); @@ -1580,7 +1622,7 @@ namespace INMOST throw - 1; //You should not really want to copy solver's information } #endif - if (_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC) + if (_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC || _pack == INNER_MPTILU2) { throw - 1; //You should not really want to copy solver's information } @@ -1631,7 +1673,7 @@ namespace INMOST } } #endif - if (_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC) + if (_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC || _pack == INNER_MPTILU2) { if( matrix_data != NULL ) { @@ -1719,7 +1761,7 @@ namespace INMOST throw - 1; //You should not really want to copy solver's information } #endif - if (_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC) + if (_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC || _pack == INNER_MPTILU2) { throw - 1; //You should not really want to copy solver's information } @@ -1975,7 +2017,7 @@ namespace INMOST ok = true; } #endif - if (_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC) + if (_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC || _pack == INNER_MPTILU2) { Solver::Matrix * mat = new Solver::Matrix(A); @@ -2000,7 +2042,7 @@ namespace INMOST { sol->EnumParameter(":estimator") = preconditioner_condition_estimation; } - else sol->EnumParameter(":fill") = static_cast(preconditioner_fill_level); + else if( _pack == INNER_ILU2 ) sol->EnumParameter(":fill") = static_cast(preconditioner_fill_level); if( sizeof(KSOLVER) == sizeof(BCGSL_solver) ) sol->EnumParameter("levels") = solver_gmres_substeps; @@ -2388,7 +2430,7 @@ namespace INMOST } #endif - if(_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC) + if(_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC || _pack == INNER_MPTILU2) { IterativeMethod * sol = static_cast(solver_data); diff --git a/solver_bcgsl.hpp b/solver_bcgsl.hpp index db8aaba..5565049 100644 --- a/solver_bcgsl.hpp +++ b/solver_bcgsl.hpp @@ -9,12 +9,15 @@ #include "inmost_solver.h" -//#define PSEUDOINVERSE // same trick as in petsc with pseudoinverse +//#define CONVEX_COMBINATION +#define PSEUDOINVERSE // same trick as in petsc with pseudoinverse //#define USE_LAPACK_SVD // use lapack's dgesvd routine instead of built-in svdnxn //#if !defined(NDEBUG) #define REPORT_RESIDUAL //#endif +//#define USE_OMP + namespace INMOST { @@ -37,9 +40,14 @@ namespace INMOST else result = 0.0; return(result); } - int svdnxn(INMOST_DATA_REAL_TYPE * a,INMOST_DATA_REAL_TYPE * u, INMOST_DATA_REAL_TYPE *w, INMOST_DATA_REAL_TYPE * v, const int n) + int svdnxn(INMOST_DATA_REAL_TYPE * pa,INMOST_DATA_REAL_TYPE * pu, INMOST_DATA_REAL_TYPE *pw, INMOST_DATA_REAL_TYPE * pv, const int n) { - memcpy(u,a,sizeof(INMOST_DATA_REAL_TYPE)*n*n); + shell a(pa,n*n); + shell u(pu,n*n); + shell v(pv,n*n); + shell w(pw,n); + std::copy(a.begin(),a.end(),u.begin()); + //memcpy(u,a,sizeof(INMOST_DATA_REAL_TYPE)*n*n); int flag, i, its, j, jj, k, l, nm; INMOST_DATA_REAL_TYPE c, f, h, s, x, y, z; INMOST_DATA_REAL_TYPE anorm = 0.0, g = 0.0, scale = 0.0; @@ -476,10 +484,10 @@ namespace INMOST info->PrepareVector(r_tilde); info->PrepareVector(x0); info->PrepareVector(t); - tau = new INMOST_DATA_REAL_TYPE[l * 5 + l*l]; - sigma = tau + l*l; - gamma = sigma + l; - theta1 = gamma + l; + tau = new INMOST_DATA_REAL_TYPE[l * 3 + (l+1)*(l+1) + (l+1)*2]; + sigma = tau + (l+1)*(l+1); + gamma = sigma + l+1; + theta1 = gamma + l+1; theta2 = theta1 + l; theta3 = theta2 + l; u = new Solver::Vector[l * 2 + 2]; @@ -553,7 +561,7 @@ namespace INMOST } else { - Alink->MatVec(1.0,t,0,Output); + Alink->MatVec(1.0,Input,0,Output); info->Update(Output); } } @@ -561,8 +569,9 @@ namespace INMOST { assert(isInitialized()); INMOST_DATA_ENUM_TYPE vbeg,vend, vlocbeg, vlocend; + INMOST_DATA_INTEGER_TYPE ivbeg, ivend, ivlocbeg, ivlocend; INMOST_DATA_REAL_TYPE rho0 = 1, rho1, alpha = 0, beta, omega = 1, eta; - INMOST_DATA_REAL_TYPE resid0, resid, rhs_norm;//, temp[2]; + INMOST_DATA_REAL_TYPE resid0, resid, rhs_norm, tau_sum, sigma_sum;//, temp[2]; iters = 0; info->PrepareVector(SOL); info->PrepareVector(RHS); @@ -572,8 +581,11 @@ namespace INMOST if( prec != NULL ) prec->ReplaceRHS(RHS); info->GetLocalRegion(info->GetRank(),vlocbeg,vlocend); info->GetVectorRegion(vbeg,vend); - - //rhs_norm = info->ScalarProd(RHS,RHS,vlocbeg,vlocend); + ivbeg = vbeg; + ivend = vend; + ivlocbeg = vlocbeg; + ivlocend = vlocend; + //info->ScalarProd(RHS,RHS,vlocbeg,vlocend,rhs_norm); rhs_norm = 1; //r[0] = b std::copy(RHS.Begin(),RHS.End(),r[0].Begin()); @@ -586,8 +598,18 @@ namespace INMOST } std::copy(r[0].Begin(),r[0].End(),r_tilde.Begin()); // r_tilde = r[0] std::fill(u[0].Begin(),u[0].End(),0); // u[0] = 0 - resid = info->ScalarProd(r[0],r[0],vlocbeg,vlocend); //resid = dot(r[0],r[0]) - for(INMOST_DATA_ENUM_TYPE k = vbeg; k != vend; k++) // r_tilde = r[0] / dot(r[0],r[0]) + resid = 0; +#if defined(USE_OMP) +#pragma omp parallel for reduction(+:resid) +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) + resid += r[0][k]*r[0][k]; + info->Integrate(&resid,1); + //info->ScalarProd(r[0],r[0],vlocbeg,vlocend,resid); //resid = dot(r[0],r[0]) +#if defined(USE_OMP) +#pragma omp parallel for +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivbeg; k < ivend; ++k) // r_tilde = r[0] / dot(r[0],r[0]) r_tilde[k] /= resid; last_resid = resid = resid0 = sqrt(resid/rhs_norm); //resid = sqrt(dot(r[0],r[0]) last_it = 0; @@ -608,328 +630,511 @@ namespace INMOST reason = "initial solution satisfy tolerances"; goto exit; } - - long double tt, ts, tp; - while( true ) + bool halt = false; +#if defined(USE_OMP) +#pragma omp parallel +#endif { - ts = tp = 0; - rho0 = -omega*rho0; - - tt = Timer(); - for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) + long double tt, ts, tp; + while( !halt ) { - rho1 = info->ScalarProd(r[j],r_tilde,vlocbeg,vlocend); // rho1 = dot(r[j],r_tilde) - beta = alpha * (rho1/rho0); + ts = tp = 0; - if( fabs(beta) > 1.0e+100 ) +#if defined(USE_OMP) +#pragma omp single +#endif { - //std::cout << "alpha " << alpha << " rho1 " << rho1 << " rho0 " << rho0 << " beta " << beta << std::endl; - reason = "multiplier(1) is too large"; - goto exit; + rho0 = -omega*rho0; } - - if( beta != beta ) + + tt = Timer(); + for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) { - reason = "multiplier(1) is NaN"; - goto exit; - } +#if defined(USE_OMP) +#pragma omp single +#endif + rho1 = 0; +#if defined(USE_OMP) +#pragma omp for reduction(+:rho1) +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) + rho1+= r[j][k]*r_tilde[k]; + info->Integrate(&rho1,1); + //info->ScalarProd(r[j],r_tilde,vlocbeg,vlocend,rho1); // rho1 = dot(r[j],r_tilde) +#if defined(USE_OMP) +#pragma omp single +#endif + { + beta = alpha * (rho1/rho0); + } - rho0 = rho1; - for(INMOST_DATA_ENUM_TYPE i = 0; i < j+1; i++) - for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) - u[i][k] = r[i][k] - beta*u[i][k]; + if( fabs(beta) > 1.0e+100 ) + { + //std::cout << "alpha " << alpha << " rho1 " << rho1 << " rho0 " << rho0 << " beta " << beta << std::endl; +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "multiplier(1) is too large"; + halt = true; + break; + } - ApplyOperator(u[j],u[j+1]); // u[j+1] = A*R*u[j] - eta = info->ScalarProd(u[j+1],r_tilde,vlocbeg,vlocend); //eta = dot(u[j+1],r_tilde) - - alpha = rho0 / eta; + if( beta != beta ) + { +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "multiplier(1) is NaN"; + halt = true; + break; + } +#if defined(USE_OMP) +#pragma omp single +#endif + { + rho0 = rho1; + } + for(INMOST_DATA_ENUM_TYPE i = 0; i < j+1; i++) + { +#if defined(USE_OMP) +#pragma omp for +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivbeg; k < ivend; ++k) + u[i][k] = r[i][k] - beta*u[i][k]; + } - if( fabs(alpha) > 1.0e+100 ) - { - reason = "multiplier(2) is too large"; - goto exit; - } - if( alpha != alpha ) - { - reason = "multiplier(2) is NaN"; - goto exit; - } - for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) - SOL[k] += alpha*u[0][k]; + ApplyOperator(u[j],u[j+1]); // u[j+1] = A*R*u[j] +#if defined(USE_OMP) +#pragma omp single +#endif + eta = 0; +#if defined(USE_OMP) +#pragma omp for reduction(+:eta) +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) + eta += u[j+1][k]*r_tilde[k]; + info->Integrate(&eta,1); + //info->ScalarProd(u[j+1],r_tilde,vlocbeg,vlocend,eta); //eta = dot(u[j+1],r_tilde) - for(INMOST_DATA_ENUM_TYPE i = 0; i < j+1; i++) - for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) //r[i] = r[i] - alpha * u[i+1] - r[i][k] -= alpha*u[i+1][k]; +#if defined(USE_OMP) +#pragma omp single +#endif + alpha = rho0 / eta; + + if( fabs(alpha) > 1.0e+100 ) + { +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "multiplier(2) is too large"; + halt = true; + break; + } + if( alpha != alpha ) + { +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "multiplier(2) is NaN"; + halt = true; + break; + } + +#if defined(USE_OMP) +#pragma omp for +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivbeg; k < ivend; ++k) + SOL[k] += alpha*u[0][k]; + + for(INMOST_DATA_ENUM_TYPE i = 0; i < j+1; i++) + { +#if defined(USE_OMP) +#pragma omp for +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivbeg; k < ivend; ++k) //r[i] = r[i] - alpha * u[i+1] + r[i][k] -= alpha*u[i+1][k]; + } - resid = info->ScalarProd(r[0],r[0],vlocbeg,vlocend); // resid = dot(r[j],r[j]) - resid = sqrt(resid/rhs_norm); // resid = sqrt(dot(r[j],r[j])) +#if defined(USE_OMP) +#pragma omp single +#endif + resid = 0; +#if defined(USE_OMP) +#pragma omp for reduction(+:resid) +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) + resid += r[0][k]*r[0][k]; + info->Integrate(&resid,1); + //info->ScalarProd(r[0],r[0],vlocbeg,vlocend,resid); // resid = dot(r[j],r[j]) +#if defined(USE_OMP) +#pragma omp single +#endif + resid = sqrt(resid/rhs_norm); // resid = sqrt(dot(r[j],r[j])) - if( resid < atol || resid < rtol*resid0 ) - { - reason = "early exit in bi-cg block"; - last_resid = resid; - goto exit; + if( resid < atol || resid < rtol*resid0 ) + { +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "early exit in bi-cg block"; + last_resid = resid; + halt = true; + break; + } + ApplyOperator(r[j],r[j+1]); // r[j+1] = A*R*r[j] } - - - ApplyOperator(r[j],r[j+1]); // r[j+1] = A*R*r[j] - - } - - for(INMOST_DATA_ENUM_TYPE j = 1; j < l+1; j++) - { - for(INMOST_DATA_ENUM_TYPE m = 1; m < j+1; m++) + if( halt ) break; + INMOST_DATA_ENUM_TYPE size = l; +#if defined(CONVEX_COMBINATION) + size = l+1; +#endif + // Penalization for convex combination for update below + for(INMOST_DATA_ENUM_TYPE j = 1; j < l+1; j++) { - tau[(m-1) + (j-1)*l] = 0; - for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k) - tau[(m-1) + (j-1)*l] += r[j][k]*r[m][k]; - tau[(j-1) + (m-1)*l] = tau[(m-1) + (j-1)*l]; + for(INMOST_DATA_ENUM_TYPE m = 1; m < j+1; m++) + { +#if defined(USE_OMP) +#pragma omp single +#endif + tau_sum = 0.0; +#if defined(USE_OMP) +#pragma omp for reduction(+:tau_sum) +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) + tau_sum += r[j][k]*r[m][k]; +#if defined(USE_OMP) +#pragma omp single +#endif + tau[(j-1) + (m-1)*size] = tau[(m-1) + (j-1)*size] = tau_sum; + } +#if defined(USE_OMP) +#pragma omp single +#endif + sigma_sum = 0; +#if defined(USE_OMP) +#pragma omp for reduction(+:sigma_sum) +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) + sigma_sum += r[0][k]*r[j][k]; +#if defined(USE_OMP) +#pragma omp single +#endif + sigma[j-1] = sigma_sum; } - sigma[j-1] = 0; - for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k) - sigma[j-1] += r[0][k]*r[j][k]; - } - info->Integrate(tau,l*l+l); //sigma is updated with tau +#if defined(CONVEX_COMBINATION) + INMOST_DATA_REAL_TYPE lagrangian = 0.0; + for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) lagrangian += tau[j+size*j]; + sigma[l] = lagrangian; + tau[(l+1)*(l+1)-1] = 0.0; + for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) + { + tau[l + j*(l+1)] = -lagrangian; + tau[l*(l+1)+j] = lagrangian; + } +#endif + info->Integrate(tau,size*size+size); //sigma is updated with tau +#if defined(USE_OMP) +#pragma omp single +#endif #if defined(PSEUDOINVERSE) - { - int dgesvd_info = 0; + { + int dgesvd_info = 0; +#if defined(USE_LAPACK_SVD) + char c = 'A'; + INMOST_DATA_REAL_TYPE U[128*128], V[128*128], w[128]; + INMOST_DATA_REAL_TYPE work[5*128]; + int lwork = 5*128; + int n = static_cast(size); + dgesvd_(&c,&c,&n,&n,tau,&n,w,U,&n,V,&n,work,&lwork,&dgesvd_info); +#else + /* + char c = 'A'; + INMOST_DATA_REAL_TYPE U2[128*128], V2[128*128], w2[128], tau2[128*128]; + INMOST_DATA_REAL_TYPE work[5*128]; + int lwork = 5*128; + int n = l; + memcpy(tau2,tau,sizeof(INMOST_DATA_REAL_TYPE)*l*l); + dgesvd_(&c,&c,&n,&n,tau2,&n,w2,U2,&n,V2,&n,work,&lwork,&dgesvd_info); + printf("dgesvd\n"); + printf("w\n"); + for(int q = 0; q < l; ++q) printf("%g ",w2[q]); + printf("\nU\n"); + for(int q = 0; q < l*l; ++q) + { + printf("%g ",U2[q]); + if( (q+1)%l == 0 ) printf("\n"); + } + printf("V\n"); + for(int q = 0; q < l*l; ++q) + { + printf("%g ",V2[q]); + if( (q+1)%l == 0 ) printf("\n"); + } + */ + INMOST_DATA_REAL_TYPE U[128*128], V[128*128], w[128]; + dgesvd_info = svdnxn(tau,U,w,V,size); + //for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) w[j] = S[j*l+j]; + /* + printf("svdnxn\n"); + printf("w\n"); + for(int q = 0; q < l; ++q) printf("%g ",w[q]); + printf("\nU\n"); + for(int q = 0; q < l*l; ++q) + { + printf("%g ",U[q]); + if( (q+1)%l == 0 ) printf("\n"); + } + printf("V\n"); + for(int q = 0; q < l*l; ++q) + { + printf("%g ",V[q]); + if( (q+1)%l == 0 ) printf("\n"); + } + */ +#endif + /* + printf("w "); + for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) printf("%20g ",w[j]); + printf("\n"); + printf("U\n"); + for(INMOST_DATA_ENUM_TYPE j = 0; j < l*l; j++) + { + printf("%20g ",U[j]); + if( (j+1) % l == 0 ) printf("\n"); + } + printf("\n"); -#if defined(USE_LAPACK_SVD) - char c = 'A'; - INMOST_DATA_REAL_TYPE U[128*128], V[128*128], w[128]; - INMOST_DATA_REAL_TYPE work[5*128]; - int lwork = 5*128; - int n = l; - dgesvd_(&c,&c,&n,&n,tau,&n,w,U,&n,V,&n,work,&lwork,&dgesvd_info); + printf("VT\n"); + for(INMOST_DATA_ENUM_TYPE j = 0; j < l*l; j++) + { + printf("%20g ",V[j]); + if( (j+1) % l == 0 ) printf("\n"); + } + printf("\n"); + */ + if( dgesvd_info != 0 ) + { + printf("(%s:%d) dgesvd %d\n",__FILE__,__LINE__,dgesvd_info); + exit(-1); + } + + INMOST_DATA_REAL_TYPE maxw = w[0], tol; + for(INMOST_DATA_ENUM_TYPE j = 1; j < size; j++) if(w[j]>maxw) maxw = w[j]; + tol = size*maxw*1.0e-14; + memset(gamma,0,sizeof(INMOST_DATA_REAL_TYPE)*size); + for(INMOST_DATA_ENUM_TYPE j = 0; j < size; j++) + { + if( w[j] > tol ) + { + INMOST_DATA_REAL_TYPE sum = 0; + for(INMOST_DATA_ENUM_TYPE k = 0; k < size; ++k) + sum += sigma[k]*U[j*size+k]; + for(INMOST_DATA_ENUM_TYPE k = 0; k < size; ++k) + gamma[k] += sum/w[j]*V[k*size+j]; + } + } + } + + //svdnxn(tau,U,S,V,l); + //INMOST_DATA_REAL_TYPE inv_tau[64]; + //pseudoinverse(tau,inv_tau,l); + //matmul(inv_tau,sigma,gamma,l,l,1); #else - /* - char c = 'A'; - INMOST_DATA_REAL_TYPE U2[128*128], V2[128*128], w2[128], tau2[128*128]; - INMOST_DATA_REAL_TYPE work[5*128]; - int lwork = 5*128; - int n = l; - memcpy(tau2,tau,sizeof(INMOST_DATA_REAL_TYPE)*l*l); - dgesvd_(&c,&c,&n,&n,tau2,&n,w2,U2,&n,V2,&n,work,&lwork,&dgesvd_info); - printf("dgesvd\n"); - printf("w\n"); - for(int q = 0; q < l; ++q) printf("%g ",w2[q]); - printf("\nU\n"); - for(int q = 0; q < l*l; ++q) { - printf("%g ",U2[q]); - if( (q+1)%l == 0 ) printf("\n"); + int order[128]; + int row = solvenxn(tau,gamma,sigma,size,order); + /* + double sum = 0.0; + for(int j = 0; j < l; ++j) + { + sum += gamma[j]; + std::cout << gamma[j] << " "; + } + std::cout << "sum: " << sum; + //std::cout << " lagrangian: " << gamma[l]; + std::cout << std::endl; + */ + if( row != 0 ) + { + std::cout << "breakdown on row " << row << std::endl; + reason = "breakdown in matrix inversion in polynomial part"; + break; + } } - printf("V\n"); - for(int q = 0; q < l*l; ++q) +#endif +#if defined(USE_OMP) +#pragma omp single +#endif + omega = gamma[l-1]; + if( fabs(omega) > 1.0e+100 ) { - printf("%g ",V2[q]); - if( (q+1)%l == 0 ) printf("\n"); +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "multiplier(3) is too large"; + halt = true; + break; } - */ - INMOST_DATA_REAL_TYPE U[128*128], V[128*128], w[128]; - dgesvd_info = svdnxn(tau,U,w,V,l); - //for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) w[j] = S[j*l+j]; - /* - printf("svdnxn\n"); - printf("w\n"); - for(int q = 0; q < l; ++q) printf("%g ",w[q]); - printf("\nU\n"); - for(int q = 0; q < l*l; ++q) + if( omega != omega ) { - printf("%g ",U[q]); - if( (q+1)%l == 0 ) printf("\n"); +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "multiplier(3) is NaN"; + halt = true; + break; } - printf("V\n"); - for(int q = 0; q < l*l; ++q) + for(INMOST_DATA_ENUM_TYPE j = 1; j < l+1; ++j) { - printf("%g ",V[q]); - if( (q+1)%l == 0 ) printf("\n"); +#if defined(USE_OMP) +#pragma omp for +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivbeg; k < ivend; ++k) + { + u[0][k] -= gamma[j-1]*u[j][k]; + SOL[k] += gamma[j-1]*r[j-1][k]; + r[0][k] -= gamma[j-1]*r[j][k]; + } } - */ -#endif + + /* - printf("w "); - for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) printf("%20g ",w[j]); - printf("\n"); - - printf("U\n"); - for(INMOST_DATA_ENUM_TYPE j = 0; j < l*l; j++) + for(INMOST_DATA_ENUM_TYPE j = 1; j < l+1; j++) { - printf("%20g ",U[j]); - if( (j+1) % l == 0 ) printf("\n"); + for(INMOST_DATA_ENUM_TYPE i = 1; i < j; i++) + { + tau[i-1 + (j-1)*l] = 0; + for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k) + tau[i-1 + (j-1)*l] += r[j][k]*r[i][k]; + info->Integrate(&tau[i-1 + (j-1)*l],1); + tau[i-1 + (j-1)*l] /= sigma[i-1]; + for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) + r[j][k] -= tau[i-1 + (j-1)*l]*r[i][k]; + } + INMOST_DATA_REAL_TYPE temp[2] = {0,0}; + for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k) + { + temp[0] += r[j][k]*r[j][k]; + temp[1] += r[0][k]*r[j][k]; + } + info->Integrate(temp,2); + sigma[j-1] = temp[0];//+1.0e-35; //REVIEW + theta2[j-1] = temp[1]/sigma[j-1]; } - printf("\n"); - - printf("VT\n"); - for(INMOST_DATA_ENUM_TYPE j = 0; j < l*l; j++) + omega = theta1[l-1] = theta2[l-1]; + for(INMOST_DATA_ENUM_TYPE j = l-1; j > 0; j--) + { + eta = 0; + for(INMOST_DATA_ENUM_TYPE i = j+1; i < l+1; i++) + eta += tau[j-1 + (i-1)*l] * theta1[i-1]; + theta1[j-1] = theta2[j-1] - eta; + } + for(INMOST_DATA_ENUM_TYPE j = 1; j < l; j++) + { + eta = 0; + for(INMOST_DATA_ENUM_TYPE i = j+1; i < l; i++) + eta += tau[j-1 + (i-1)*l] * theta1[i]; + theta3[j-1] = theta1[j] + eta; + } + for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) { - printf("%20g ",V[j]); - if( (j+1) % l == 0 ) printf("\n"); + SOL[k] += theta1[0]*r[0][k]; + r[0][k] -= theta2[l-1]*r[l][k]; + u[0][k] -= theta1[l-1]*u[l][k]; + } + for(INMOST_DATA_ENUM_TYPE j = 1; j < l; j++) + { + for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) + { + u[0][k] -= theta1[j-1]*u[j][k]; + SOL[k] += theta3[j-1]*r[j][k]; + r[0][k] -= theta2[j-1]*r[j][k]; + } } - printf("\n"); */ - if( dgesvd_info != 0 ) + last_it = i+1; { - printf("(%s:%d) dgesvd %d\n",__FILE__,__LINE__,dgesvd_info); - exit(-1); +#if defined(USE_OMP) +#pragma omp single +#endif + resid = 0; +#if defined(USE_OMP) +#pragma omp for reduction(+:resid) +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) + resid += r[0][k]*r[0][k]; + info->Integrate(&resid,1); + //info->ScalarProd(r[0],r[0],vlocbeg,vlocend,resid); +#if defined(USE_OMP) +#pragma omp single +#endif + resid = sqrt(resid/rhs_norm); } - - INMOST_DATA_REAL_TYPE maxw = w[0], tol; - for(INMOST_DATA_ENUM_TYPE j = 1; j < l; j++) if(w[j]>maxw) maxw = w[j]; - tol = l*maxw*1.0e-14; - memset(gamma,0,sizeof(INMOST_DATA_REAL_TYPE)*l); - for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) + tt = Timer() - tt; +#if defined(REPORT_RESIDUAL) + if( info->GetRank() == 0 ) { - if( w[j] > tol ) + //std::cout << "iter " << last_it << " residual " << resid << " time " << tt << " matvec " << ts*0.5/l << " precond " << tp*0.5/l << std::endl; + //std::cout << "iter " << last_it << " resid " << resid << "\r"; + //printf("iter %3d resid %12g | %12g relative %12g | %12g\r", last_it, resid, atol, resid / resid0, rtol); +#if defined(USE_OMP) +#pragma omp single +#endif { - INMOST_DATA_REAL_TYPE sum = 0; - for(INMOST_DATA_ENUM_TYPE k = 0; k < l; ++k) - sum += sigma[k]*U[j*l+k]; - for(INMOST_DATA_ENUM_TYPE k = 0; k < l; ++k) - gamma[k] += sum/w[j]*V[k*l+j]; + printf("iter %3d resid %12g | %g\r", last_it, resid, atol); + fflush(stdout); } } - } - - //svdnxn(tau,U,S,V,l); - //INMOST_DATA_REAL_TYPE inv_tau[64]; - //pseudoinverse(tau,inv_tau,l); - //matmul(inv_tau,sigma,gamma,l,l,1); -#else - int order[128]; - int row = solvenxn(tau,gamma,sigma,l,order); - if( row != 0 ) - { - std::cout << "breakdown on row " << row << std::endl; - reason = "breakdown in matrix inversion in polynomial part"; - break; - } #endif - omega = gamma[l-1]; - if( fabs(omega) > 1.0e+100 ) - { - reason = "multiplier(3) is too large"; - goto exit; - } - if( omega != omega ) - { - reason = "multiplier(3) is NaN"; - goto exit; - } - for(INMOST_DATA_ENUM_TYPE j = 1; j < l+1; ++j) - { - for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) + last_resid = resid; + if( resid != resid ) { - u[0][k] -= gamma[j-1]*u[j][k]; - SOL[k] += gamma[j-1]*r[j-1][k]; - r[0][k] -= gamma[j-1]*r[j][k]; +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "residual is NAN"; + break; } - } - - - /* - for(INMOST_DATA_ENUM_TYPE j = 1; j < l+1; j++) - { - for(INMOST_DATA_ENUM_TYPE i = 1; i < j; i++) + if( resid < atol ) { - tau[i-1 + (j-1)*l] = 0; - for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k) - tau[i-1 + (j-1)*l] += r[j][k]*r[i][k]; - info->Integrate(&tau[i-1 + (j-1)*l],1); - tau[i-1 + (j-1)*l] /= sigma[i-1]; - for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) - r[j][k] -= tau[i-1 + (j-1)*l]*r[i][k]; +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "converged due to absolute tolerance"; + break; } - INMOST_DATA_REAL_TYPE temp[2] = {0,0}; - for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k) + if( resid < rtol*resid0 ) { - temp[0] += r[j][k]*r[j][k]; - temp[1] += r[0][k]*r[j][k]; +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "converged due to relative tolerance"; + break; } - info->Integrate(temp,2); - sigma[j-1] = temp[0];//+1.0e-35; //REVIEW - theta2[j-1] = temp[1]/sigma[j-1]; - } - omega = theta1[l-1] = theta2[l-1]; - for(INMOST_DATA_ENUM_TYPE j = l-1; j > 0; j--) - { - eta = 0; - for(INMOST_DATA_ENUM_TYPE i = j+1; i < l+1; i++) - eta += tau[j-1 + (i-1)*l] * theta1[i-1]; - theta1[j-1] = theta2[j-1] - eta; - } - for(INMOST_DATA_ENUM_TYPE j = 1; j < l; j++) - { - eta = 0; - for(INMOST_DATA_ENUM_TYPE i = j+1; i < l; i++) - eta += tau[j-1 + (i-1)*l] * theta1[i]; - theta3[j-1] = theta1[j] + eta; - } - for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) - { - SOL[k] += theta1[0]*r[0][k]; - r[0][k] -= theta2[l-1]*r[l][k]; - u[0][k] -= theta1[l-1]*u[l][k]; - } - for(INMOST_DATA_ENUM_TYPE j = 1; j < l; j++) - { - for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) + if( resid > divtol ) { - u[0][k] -= theta1[j-1]*u[j][k]; - SOL[k] += theta3[j-1]*r[j][k]; - r[0][k] -= theta2[j-1]*r[j][k]; +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "diverged due to divergence tolerance"; + break; } - } - */ - last_it = i+1; - { - resid = info->ScalarProd(r[0],r[0],vlocbeg,vlocend); - resid = sqrt(resid/rhs_norm); - } - tt = Timer() - tt; -#if defined(REPORT_RESIDUAL) - if( info->GetRank() == 0 ) - { - //std::cout << "iter " << last_it << " residual " << resid << " time " << tt << " matvec " << ts*0.5/l << " precond " << tp*0.5/l << std::endl; - //std::cout << "iter " << last_it << " resid " << resid << "\r"; - //printf("iter %3d resid %12g | %12g relative %12g | %12g\r", last_it, resid, atol, resid / resid0, rtol); - printf("iter %3d resid %12g | %g\r", last_it, resid, atol); - fflush(stdout); - } + if( i == maxits ) + { +#if defined(USE_OMP) +#pragma omp single #endif - last_resid = resid; - if( resid != resid ) - { - reason = "residual is NAN"; - break; - } - if( resid < atol ) - { - reason = "converged due to absolute tolerance"; - break; - } - if( resid < rtol*resid0 ) - { - reason = "converged due to relative tolerance"; - break; - } - if( resid > divtol ) - { - reason = "diverged due to divergence tolerance"; - break; - } - if( i == maxits ) - { - reason = "reached maximum iteration number"; - break; + reason = "reached maximum iteration number"; + break; + } + i++; } - i++; } exit: if (prec != NULL) @@ -937,7 +1142,10 @@ exit: prec->Solve(SOL, r_tilde); //undo right preconditioner std::copy(r_tilde.Begin(), r_tilde.End(), SOL.Begin()); } - for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k) //undo shift +#if defined(USE_OMP) +#pragma omp parallel for +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) //undo shift SOL[k] += x0[k]; //info->RestoreMatrix(A); info->RestoreVector(SOL); @@ -1061,11 +1269,10 @@ exit: INMOST_DATA_INTEGER_TYPE ivbeg,ivend, ivlocbeg, ivlocend; INMOST_DATA_REAL_TYPE rho = 1, alpha = 1, beta, omega = 1; INMOST_DATA_REAL_TYPE resid0, resid, temp[2]; - bool is_parallel = info->GetSize() > 1; info->PrepareVector(SOL); info->PrepareVector(RHS); - if( is_parallel ) info->Update(SOL); - if( is_parallel ) info->Update(RHS); + info->Update(SOL); + info->Update(RHS); if (prec != NULL)prec->ReplaceSOL(SOL); if (prec != NULL)prec->ReplaceRHS(RHS); info->GetLocalRegion(info->GetRank(),vlocbeg,vlocend); @@ -1080,9 +1287,12 @@ exit: std::fill(p.Begin(),p.End(),0.0); { resid = 0; - for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k != vlocend; k++) +#if defined(USE_OMP) +#pragma omp parallel for +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; k++) resid += r[k]*r[k]; - if( is_parallel ) info->Integrate(&resid,1); + info->Integrate(&resid,1); } last_resid = resid = resid0 = sqrt(resid); last_it = 0; @@ -1104,12 +1314,9 @@ exit: #pragma omp parallel #endif { - long double tt, ts, tp, ttt; INMOST_DATA_ENUM_TYPE i = 0; while(true) { - ts = tp = 0; - tt = Timer(); { /* if( fabs(rho) < 1.0e-31 ) @@ -1126,30 +1333,41 @@ exit: } */ //std::cout << "rho " << rho << " alpha " << alpha << " omega " << omega << " beta " << 1.0 /rho * alpha / omega << std::endl; - beta = 1.0 /rho * alpha / omega; - rho = 0; +#if defined(USE_OMP) +#pragma omp single +#endif + { + beta = 1.0 /rho * alpha / omega; + rho = 0; + } #if defined(USE_OMP) #pragma omp for reduction(+:rho) #endif - for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; k++) + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) rho += r0[k]*r[k]; - if( is_parallel ) - { + info->Integrate(&rho,1); + //info->ScalarProd(r0,r,ivlocbeg,ivlocend,rho); #if defined(USE_OMP) #pragma omp single #endif - info->Integrate(&rho,1); + { + beta *= rho; } - beta *= rho; if( fabs(beta) > 1.0e+100 ) { //std::cout << "rho " << rho << " alpha " << alpha << " omega " << omega << " beta " << 1.0 /rho * alpha / omega << std::endl; +#if defined(USE_OMP) +#pragma omp single +#endif reason = "multiplier(1) is too large"; break; } if( beta != beta ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "multiplier(1) is NaN"; break; } @@ -1161,62 +1379,57 @@ exit: for(INMOST_DATA_INTEGER_TYPE k = ivbeg; k < ivend; ++k) p[k] = r[k] + beta*(p[k] - omega*v[k]); //global indexes r, p, v } + { - ttt = Timer(); - if (prec != NULL) + if (prec != NULL) { -#if defined(USE_OMP) -#pragma omp single -#endif prec->Solve(p, y); - } - tp += Timer() - ttt; - if( is_parallel ) - { -#if defined(USE_OMP) -#pragma omp single -#endif info->Update(y); + Alink->MatVec(1,y,0,v); // global multiplication, y should be updated, v probably needs an update + info->Update(v); } - ttt = Timer(); - Alink->MatVec(1,y,0,v); // global multiplication, y should be updated, v probably needs an update - ts += Timer() - ttt; - if( is_parallel ) + else { -#if defined(USE_OMP) -#pragma omp single -#endif + Alink->MatVec(1,p,0,v); // global multiplication, y should be updated, v probably needs an update info->Update(v); } } { +#if defined(USE_OMP) +#pragma omp single +#endif alpha = 0; #if defined(USE_OMP) #pragma omp for reduction(+:alpha) #endif - for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; k++) + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) alpha += r0[k]*v[k]; - if( is_parallel ) - { + info->Integrate(&alpha,1); + //info->ScalarProd(r0,v,ivlocbeg,ivlocend,alpha); #if defined(USE_OMP) #pragma omp single #endif - info->Integrate(&alpha,1); + { + if( alpha == 0 && rho == 0 ) + alpha = 0; + else + alpha = rho / alpha; //local indexes, r0, v } - if( alpha == 0 && rho == 0 ) - alpha = 0; - else - alpha = rho / alpha; //local indexes, r0, v - if( fabs(alpha) > 1.0e+100 ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "multiplier(2) is too large"; //std::cout << "alpha " << alpha << " rho " << rho << std::endl; break; } if( alpha != alpha ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "multiplier(2) is NaN"; //std::cout << "alpha " << alpha << " rho " << rho << std::endl; break; @@ -1232,35 +1445,21 @@ exit: } { - ttt = Timer(); - if (prec != NULL) + if (prec != NULL) { -#if defined(USE_OMP) -#pragma omp single -#endif prec->Solve(s, z); - } - tp += Timer() - ttt; - if( is_parallel ) - { -#if defined(USE_OMP) -#pragma omp single -#endif info->Update(z); + Alink->MatVec(1.0,z,0,t); // global multiplication, z should be updated, t probably needs an update + info->Update(t); } - ttt = Timer(); - Alink->MatVec(1.0,z,0,t); // global multiplication, z should be updated, t probably needs an update - ts += Timer() - ttt; - if( is_parallel ) + else { -#if defined(USE_OMP) -#pragma omp single -#endif + Alink->MatVec(1.0,s,0,t); // global multiplication, z should be updated, t probably needs an update info->Update(t); } } + { - temp[0] = temp[1] = 0; #if defined(USE_OMP) #pragma omp for reduction(+:tempa) reduction(+:tempb) @@ -1272,13 +1471,7 @@ exit: } temp[0] = tempa; temp[1] = tempb; - if( is_parallel ) - { -#if defined(USE_OMP) -#pragma omp single -#endif - info->Integrate(temp,2); - } + info->Integrate(temp,2); /* if (fabs(temp[1]) < 1.0e-35) { @@ -1286,19 +1479,30 @@ exit: } */ //omega = temp[0] / (temp[1] + (temp[1] < 0.0 ? -1.0e-10 : 1.0e-10)); //local indexes t, s - if( temp[0] == 0 && temp[1] == 0 ) - omega = 0; - else - omega = temp[0] / temp[1]; +#if defined(USE_OMP) +#pragma omp single +#endif + { + if( temp[0] == 0 && temp[1] == 0 ) + omega = 0; + else + omega = temp[0] / temp[1]; + } if( fabs(omega) > 1.0e+100 ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "multiplier(3) is too large"; //std::cout << "alpha " << alpha << " rho " << rho << std::endl; break; } if( omega != omega ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "multiplier(3) is NaN"; //std::cout << "alpha " << alpha << " rho " << rho << std::endl; break; @@ -1319,23 +1523,23 @@ exit: r[k] = s[k] - omega * t[k]; // global indexes r, s, t } last_it = i+1; - { - resid = 0; + //info->ScalarProd(r,r,ivlocbeg,ivlocend,resid); +#if defined(USE_OMP) +#pragma omp single +#endif + resid = 0; #if defined(USE_OMP) #pragma omp for reduction(+:resid) #endif - for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; k++) - resid += r[k]*r[k]; - if( is_parallel ) - { + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) + resid += r[k]*r[k]; #if defined(USE_OMP) #pragma omp single #endif - info->Integrate(&resid,1); - } + { + info->Integrate(&resid,1); resid = sqrt(resid); } - tt = Timer() - tt; #if defined(REPORT_RESIDUAL) if( info->GetRank() == 0 ) { @@ -1354,26 +1558,41 @@ exit: last_resid = resid; if( resid != resid ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "residual is NAN"; break; } if( resid > divtol ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "diverged due to divergence tolerance"; break; } if( resid < atol ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "converged due to absolute tolerance"; break; } if( resid < rtol*resid0 ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "converged due to relative tolerance"; break; } if( i == maxits ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "reached maximum iteration number"; break; } @@ -1391,6 +1610,7 @@ exit: bool ReplaceSOL(Solver::Vector & SOL) {(void)SOL; return true; } Method * Duplicate() { return new BCGS_solver(*this);} std::string GetReason() {return reason;} + }; } diff --git a/solver_ddpqiluc2.cpp b/solver_ddpqiluc2.cpp index f734a76..dd8b74e 100644 --- a/solver_ddpqiluc2.cpp +++ b/solver_ddpqiluc2.cpp @@ -13,8 +13,8 @@ //#define REPORT_ILU //#undef REPORT_ILU //#define REPORT_ILU_PROGRESS -#define REPORT_ILU_END -#define REPORT_ILU_SUMMARY +//#define REPORT_ILU_END +//#define REPORT_ILU_SUMMARY //#undef REPORT_ILU_PROGRESS //#define REPORT_SCHUR using namespace INMOST; @@ -456,7 +456,7 @@ using namespace INMOST; A_Entries[j++] = Solver::Row::make_entry(r->first, r->second); } A_Address[k].last = j; - assert(A_Address[k].Size() != 0); //singular matrix + //assert(A_Address[k].Size() != 0); //singular matrix } INMOST_DATA_REAL_TYPE Anorm, Enorm, Fnorm; INMOST_DATA_ENUM_TYPE nzE,nzF; @@ -3362,60 +3362,65 @@ swap_algorithm: { assert(&input != &output); // - INMOST_DATA_ENUM_TYPE k, level, mobeg, moend, vbeg, vend; - info->GetOverlapRegion(info->GetRank(), mobeg, moend); - info->GetVectorRegion(vbeg, vend); - for (k = mobeg; k < moend; k++) temp[k] = input[k]; - //the original matrix A was separated by multilevel algorithm to the following form - // | B F | - // A = | | - // | E C | - // In order to apply solve phase on this matrix, we consider the matrix in following sense: - // | I 0 | | B 0 | | I B^{-1} F | - // A = | | | | | | = L D U - // | E B^{-1} I | | 0 S | | 0 I | - // where S = C - E B^{-1} F - // consider the system - // | B F | | u | | f | - // | | | | = | | - // | E C | | y | | g | - // then solution is obtained by steps: - // 1) ~f = B^{-1} f - // 2) ~g = g - E ~f - // 3) y = S^{-1} ~g - // 4) u = ~f - B^{-1} F y - //first reorder input as prescribed by P - // ~y = Pt y - for (k = vbeg; k < mobeg; k++) output[k] = 0;//Restrict additive schwartz (maybe do it outside?) - for (k = mobeg; k < moend; ++k) output[k] = temp[ddP[k]]; - for (k = moend; k < vend; k++) output[k] = 0;//Restrict additive schwartz (maybe do it outside?) - - level = 0; - //perform recursively first two steps of solve phase - // outer V-cycle - - while(level < level_size.size()) level = Descend(level, output, output); +#if defined(USE_OMP) +#pragma omp single +#endif + { + INMOST_DATA_ENUM_TYPE k, level, mobeg, moend, vbeg, vend; + info->GetOverlapRegion(info->GetRank(), mobeg, moend); + info->GetVectorRegion(vbeg, vend); + for (k = mobeg; k < moend; k++) temp[k] = input[k]; + //the original matrix A was separated by multilevel algorithm to the following form + // | B F | + // A = | | + // | E C | + // In order to apply solve phase on this matrix, we consider the matrix in following sense: + // | I 0 | | B 0 | | I B^{-1} F | + // A = | | | | | | = L D U + // | E B^{-1} I | | 0 S | | 0 I | + // where S = C - E B^{-1} F + // consider the system + // | B F | | u | | f | + // | | | | = | | + // | E C | | y | | g | + // then solution is obtained by steps: + // 1) ~f = B^{-1} f + // 2) ~g = g - E ~f + // 3) y = S^{-1} ~g + // 4) u = ~f - B^{-1} F y + //first reorder input as prescribed by P + // ~y = Pt y + for (k = vbeg; k < mobeg; k++) output[k] = 0;//Restrict additive schwartz (maybe do it outside?) + for (k = mobeg; k < moend; ++k) output[k] = temp[ddP[k]]; + for (k = moend; k < vend; k++) output[k] = 0;//Restrict additive schwartz (maybe do it outside?) + + level = 0; + //perform recursively first two steps of solve phase + // outer V-cycle + + while(level < level_size.size()) level = Descend(level, output, output); - // W-cycle, should do ApplyB between moves - //level = Ascend(level, output); - //level = Ascend(level, output); - //level = Descend(level, output); - //level = Descend(level, output); - - //on the last recursion level third step was accomplished - //finish last step in unwinding recursion + // W-cycle, should do ApplyB between moves + //level = Ascend(level, output); + //level = Ascend(level, output); + //level = Descend(level, output); + //level = Descend(level, output); + + //on the last recursion level third step was accomplished + //finish last step in unwinding recursion - while (level > 0) level = Ascend(level, output, output); + while (level > 0) level = Ascend(level, output, output); - //System was solved with reordered matrix - // PAQ x = y - // AQ x = Pt y - // Q x = A^{-1} Pt y <- we have now - // x = Qt A^{-1} Pt y - //reorder output by Q - for (k = mobeg; k < moend; ++k) temp[ddQ[k]] = output[k]; - for (k = mobeg; k < moend; ++k) output[k] = temp[k]; + //System was solved with reordered matrix + // PAQ x = y + // AQ x = Pt y + // Q x = A^{-1} Pt y <- we have now + // x = Qt A^{-1} Pt y + //reorder output by Q + for (k = mobeg; k < moend; ++k) temp[ddQ[k]] = output[k]; + for (k = mobeg; k < moend; ++k) output[k] = temp[k]; + } //May assemble partition of unity instead of restriction before accumulation //assembly should be done instead of initialization info->Accumulate(output); diff --git a/solver_ilu2.hpp b/solver_ilu2.hpp index c987f8a..e6f36fc 100644 --- a/solver_ilu2.hpp +++ b/solver_ilu2.hpp @@ -490,23 +490,28 @@ public: bool Solve(Solver::Vector & input, Solver::Vector & output) { assert(isInitialized()); - INMOST_DATA_ENUM_TYPE mobeg, moend, r, k, vbeg,vend; //, end; - info->GetOverlapRegion(info->GetRank(),mobeg,moend); - info->GetVectorRegion(vbeg,vend); - for(k = vbeg; k < mobeg; k++) output[k] = 0; //Restrict additive schwartz (maybe do it outside?) - for(k = mobeg; k < moend; k++) output[k] = input[k]; - for(k = moend; k < vend; k++) output[k] = 0; //Restrict additive schwartz (maybe do it outside?) - for(k = mobeg; k < moend; k++) //iterate over L part - { - for(r = iu[k]-1; r > ilu[k]; r--) - output[k] -= luv[r-1]*output[lui[r-1]]; - output[k] *= luv[iu[k]-1]; - } - for(k = moend; k > mobeg; k--) //iterate over U part +#if defined(USE_OMP) +#pragma omp single +#endif { - for(r = iu[k-1]+1; r < ilu[k]; r++) - output[k-1] -= luv[r]*output[lui[r]]; - output[k-1] *= luv[iu[k-1]]; + INMOST_DATA_ENUM_TYPE mobeg, moend, r, k, vbeg,vend; //, end; + info->GetOverlapRegion(info->GetRank(),mobeg,moend); + info->GetVectorRegion(vbeg,vend); + for(k = vbeg; k < mobeg; k++) output[k] = 0; //Restrict additive schwartz (maybe do it outside?) + for(k = mobeg; k < moend; k++) output[k] = input[k]; + for(k = moend; k < vend; k++) output[k] = 0; //Restrict additive schwartz (maybe do it outside?) + for(k = mobeg; k < moend; k++) //iterate over L part + { + for(r = iu[k]-1; r > ilu[k]; r--) + output[k] -= luv[r-1]*output[lui[r-1]]; + output[k] *= luv[iu[k]-1]; + } + for(k = moend; k > mobeg; k--) //iterate over U part + { + for(r = iu[k-1]+1; r < ilu[k]; r++) + output[k-1] -= luv[r]*output[lui[r]]; + output[k-1] *= luv[iu[k-1]]; + } } //May assemble partition of unity instead of restriction before accumulation //assembly should be done instead of initialization diff --git a/tests/solver_test000/CMakeLists.txt b/tests/solver_test000/CMakeLists.txt index 4e4c6cc..c5af267 100644 --- a/tests/solver_test000/CMakeLists.txt +++ b/tests/solver_test000/CMakeLists.txt @@ -41,6 +41,11 @@ if( HAVE_SOLVER_MPTILUC2 ) add_test(NAME solver_test000_serial_inner_mptiluc COMMAND $ 0 8) endif() +if( HAVE_SOLVER_MPTILU2 ) +add_test(NAME solver_test000_serial_inner_mptilu2 COMMAND $ 0 9) +endif() + + if(USE_SOLVER_PETSC) add_test(NAME solver_test000_serial_petsc COMMAND $ 0 2) endif() @@ -72,6 +77,12 @@ add_test(NAME solver_test000_parallel_permute1_inner_mptiluc COMMAND ${MPIEXE add_test(NAME solver_test000_parallel_permute2_inner_mptiluc COMMAND ${MPIEXEC} -np 4 $ 2 1) endif() +if( HAVE_SOLVER_MPTILU2 ) +add_test(NAME solver_test000_parallel_normal_inner_mptilu2 COMMAND ${MPIEXEC} -np 4 $ 0 9) +add_test(NAME solver_test000_parallel_permute1_inner_mptilu2 COMMAND ${MPIEXEC} -np 4 $ 1 9) +add_test(NAME solver_test000_parallel_permute2_inner_mptilu2 COMMAND ${MPIEXEC} -np 4 $ 2 9) +endif() + if(USE_SOLVER_PETSC) add_test(NAME solver_test000_parallel_normal_petsc COMMAND ${MPIEXEC} -np 4 $ 0 2) #add_test(NAME solver_test000_parallel_permute1_petsc COMMAND ${MPIEXEC} -np 4 $ 1 2) diff --git a/tests/solver_test000/main.cpp b/tests/solver_test000/main.cpp index 78fea9e..6c385c0 100644 --- a/tests/solver_test000/main.cpp +++ b/tests/solver_test000/main.cpp @@ -41,6 +41,7 @@ int main(int argc,char ** argv) case 6: type = Solver::Trilinos_Belos; break; case 7: type = Solver::ANI; break; case 8: type = Solver::INNER_MPTILUC; break; + case 9: type = Solver::INNER_MPTILU2; break; } { diff --git a/tests/solver_test001/main.cpp b/tests/solver_test001/main.cpp index 003a4e7..a0d92c8 100644 --- a/tests/solver_test001/main.cpp +++ b/tests/solver_test001/main.cpp @@ -66,8 +66,8 @@ int main(int argc, char ** argv) s.SetParameterReal("relative_tolerance",1.0e-9); s.SetParameterReal("absolute_tolerance",1.0e-16); s.SetParameterEnum("rescale_iterations",8); - s.SetParameterReal("drop_tolerance",1.0e-4); - s.SetParameterReal("reuse_tolerance",1.0e-8); + s.SetParameterReal("drop_tolerance",1.0e-2); + s.SetParameterReal("reuse_tolerance",1.0e-4); /* diff --git a/tests/solver_test002/CMakeLists.txt b/tests/solver_test002/CMakeLists.txt index aa99f48..e519cf9 100644 --- a/tests/solver_test002/CMakeLists.txt +++ b/tests/solver_test002/CMakeLists.txt @@ -38,8 +38,12 @@ endif() add_test(NAME solver_test002_serial_inner_ilu2 COMMAND $ 0 20) add_test(NAME solver_test002_serial_inner_ddpqiluc COMMAND $ 1 20) if(HAVE_SOLVER_MPTILUC2) -add_test(NAME solver_test002_serial_inner_mptiluc COMMAND $ 1 20) +add_test(NAME solver_test002_serial_inner_mptiluc COMMAND $ 8 20) endif() +if(HAVE_SOLVER_MPTILU2) +add_test(NAME solver_test002_serial_inner_mptilu2 COMMAND $ 9 20) +endif() + if(USE_SOLVER_PETSC) add_test(NAME solver_test002_serial_petsc COMMAND $ 2 20) endif() @@ -60,8 +64,12 @@ if( EXISTS ${MPIEXEC} ) add_test(NAME solver_test002_parallel_inner_ilu2 COMMAND ${MPIEXEC} -np 4 $ 0 20) add_test(NAME solver_test002_parallel_inner_ddpqiluc COMMAND ${MPIEXEC} -np 4 $ 1 20) if(HAVE_SOLVER_MPTILUC2) -add_test(NAME solver_test002_parallel_inner_mptiluc COMMAND ${MPIEXEC} -np 4 $ 1 20) +add_test(NAME solver_test002_parallel_inner_mptiluc COMMAND ${MPIEXEC} -np 4 $ 8 20) endif() +if(HAVE_SOLVER_MPTILU2) +add_test(NAME solver_test002_parallel_inner_mptilu2 COMMAND ${MPIEXEC} -np 4 $ 9 20) +endif() + if(USE_SOLVER_PETSC) add_test(NAME solver_test002_parallel_petsc COMMAND ${MPIEXEC} -np 4 $ 2 20) endif() diff --git a/tests/solver_test002/main.cpp b/tests/solver_test002/main.cpp index f3bd225..994ad66 100644 --- a/tests/solver_test002/main.cpp +++ b/tests/solver_test002/main.cpp @@ -102,6 +102,7 @@ int main(int argc, char ** argv) case 6: type = Solver::Trilinos_ML; break; case 7: type = Solver::ANI; break; case 8: type = Solver::INNER_MPTILUC; break; + case 9: type = Solver::INNER_MPTILU2; break; } int n = atoi(argv[2]); Solver::Initialize(&argc,&argv,argc > 3 ? argv[3] : NULL); // Initialize the linear solver in accordance with args -- 2.26.2