diff --git a/.gitignore b/.gitignore index 763f9e43858bf1c9efdd0221437ab51b7507a1ee..0d963102fe43c1d7cd6b622ba9bbac980ee4f3a0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ tests/solver_test001/matrices solver_mtiluc2.cpp solver_mtiluc2.hpp +solver_mtilu2.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f59b8a9d6ff82b5e632f2a424679aff94c7ed2f..4eb8c66e26f150b892e020fdbcb6d21eeccf7225 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,13 @@ else() set(HAVE_SOLVER_MPTILUC2 FALSE) endif() +if( EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/solver_mtilu2.hpp" ) + add_definitions(-DHAVE_SOLVER_MPTILU2) + set(HAVE_SOLVER_MPTILU2 TRUE) + list(APPEND SOURCE solver_mtilu2.hpp) +else() + set(HAVE_SOLVER_MPTILU2 FALSE) +endif() add_library(inmost STATIC ${SOURCE} ${HEADER}) diff --git a/Debuggers/VisualStudio/autoexp.dat b/Debuggers/VisualStudio/autoexp.dat new file mode 100644 index 0000000000000000000000000000000000000000..3c5166ae8bc1a93e02d36ed2bbaafc595c73e955 --- /dev/null +++ b/Debuggers/VisualStudio/autoexp.dat @@ -0,0 +1,87 @@ + +;------------------------------------------------------------------------------ +; INMOST::interval +;------------------------------------------------------------------------------ + +INMOST::interval<*,*>{ + preview ( + #( + "[", + $e.end_index-$e.beg_index, + "](", + #array( + expr: $e.array[$i+$e.beg_index], + size: $e.end_index-$e.beg_index + ), + ")" + ) + ) + children ( + #( + #([size] : $e.end_index-$e.beg_index), + #([first] : $e.beg_index), + #([last] : $e.end_index), + #array( + expr: $e.array[$i+$e.beg_index], + size: $e.end_index-$e.beg_index + ) + ) + ) +} + +;------------------------------------------------------------------------------ +; INMOST::dynarray +;------------------------------------------------------------------------------ + +INMOST::dynarray<*,*>{ + preview ( + #( + "[", + $e.pend-$e.pbegin, + "](", + #array( + expr: $e.pbegin[$i], + size: $e.pend-$e.pbegin + ), + ")" + ) + ) + children ( + #( + #([size] : $e.pend-$e.pbegin), + #([capacity] : $e.preserved-$e.pbegin), + #array( + expr: $e.pbegin[$i], + size: $e.pend-$e.pbegin + ) + ) + ) +} + +;------------------------------------------------------------------------------ +; INMOST::shell +;------------------------------------------------------------------------------ + +INMOST::shell<*>{ + preview ( + #( + "[", + *$e.m_size, + "](", + #array( + expr: (*$e.m_arr)[$i], + size: *$e.m_size + ), + ")" + ) + ) + children ( + #( + #([size] : *$e.m_size), + #array( + expr: (*$e.m_arr)[$i], + size: *$e.m_size + ) + ) + ) +} diff --git a/Debuggers/VisualStudio/readme.txt b/Debuggers/VisualStudio/readme.txt new file mode 100644 index 0000000000000000000000000000000000000000..cb57ebaffc549aa38a4a075e4dd4da21b0b7a594 --- /dev/null +++ b/Debuggers/VisualStudio/readme.txt @@ -0,0 +1,7 @@ +Copy and paste contents of autoexp.dat into +C:\Program Files (x86)\Microsoft Visual Studio 10.0\Common7\Packages\Debugger\autoexp.dat +or the directry where Visual Studio was installed. + +In future capabilities will grove. + +Tested with Visual studio 2010. diff --git a/Debuggers/readme.txt b/Debuggers/readme.txt new file mode 100644 index 0000000000000000000000000000000000000000..102ba10a8bc912a2f588459288059d8c329bae12 --- /dev/null +++ b/Debuggers/readme.txt @@ -0,0 +1 @@ +This folder contains helper instructions to facilitate representation of custom classes in debuggers. \ No newline at end of file diff --git a/autodiff.cpp b/autodiff.cpp index 985a974f60d8e4246d40191d1249194ee2dd4684..0d79bf99001339e1f8d12880ba2ce31635edca4a 100644 --- a/autodiff.cpp +++ b/autodiff.cpp @@ -294,7 +294,7 @@ namespace INMOST return ret*var.coef; case AD_VAL: lval = DerivativePrecompute(*var.left, e, values, user_data); - rval = Evaluate(*var.left,e,user_data); + rval = Evaluate(*var.right,e,user_data); values.push_back(rval); return lval*var.coef; } diff --git a/cell.cpp b/cell.cpp index 1fca91a4f19abfde0ea5594b2584c08178fde749..7e75d54b43f839575c9c99c61e5c3b7592dd10fe 100644 --- a/cell.cpp +++ b/cell.cpp @@ -341,17 +341,22 @@ namespace INMOST { adj_type const & lc = m->LowConn(GetHandle()); aret.reserve(lc.size()); + if( lc.size() < 1 ) return aret; HandleType q = lc[0]; //edge 0 adj_type const & qlc = m->LowConn(q); - aret.push_back(qlc[0]); //node 0 - aret.push_back(qlc[1]); //node 1 + if( qlc.size() > 0 ) aret.push_back(qlc[0]); //node 0 + if( qlc.size() > 1 ) aret.push_back(qlc[1]); //node 1 + if( lc.size() < 2 ) return aret; HandleType r = lc[1]; //edge 1 adj_type const & rlc = m->LowConn(r); - if( aret.data()[0] == rlc[0] || aret.data()[0] == rlc[1] ) + if( aret.size() == 2 && rlc.size() == 2 ) { - HandleType temp = aret.data()[0]; - aret.data()[0] = aret.data()[1]; - aret.data()[1] = temp; + if( aret.data()[0] == rlc[0] || aret.data()[0] == rlc[1] ) + { + HandleType temp = aret.data()[0]; + aret.data()[0] = aret.data()[1]; + aret.data()[1] = temp; + } } adj_type::size_type it = 1, iend = lc.size()-1; while(it < iend) //loop over edges @@ -392,22 +397,27 @@ namespace INMOST adj_type const & lc = m->LowConn(GetHandle()); aret.reserve(lc.size()); i = m->getNext(lc.data(),static_cast(lc.size()),i,hm); + if( i == lc.size() ) return aret; HandleType q = lc[i]; //edge 0 adj_type const & qlc = m->LowConn(q); k = m->getNext(qlc.data(),static_cast(qlc.size()),k,hm); - aret.push_back(qlc[k]); //node 0 + if( k != qlc.size() ) aret.push_back(qlc[k]); //node 0 k = m->getNext(qlc.data(),static_cast(qlc.size()),k,hm); - aret.push_back(qlc[k]); //node 1 + if( k != qlc.size() ) aret.push_back(qlc[k]); //node 1 i = m->getNext(lc.data(),static_cast(lc.size()),i,hm); + if( i == lc.size() ) return aret; HandleType r = lc[i]; //edge 1 adj_type const & rlc = m->LowConn(r); k1 = m->getNext(rlc.data(),static_cast(rlc.size()),k1,hm); k2 = m->getNext(rlc.data(),static_cast(rlc.size()),k1,hm); - if( aret.data()[0] == rlc[k1] || aret.data()[0] == rlc[k2] ) + if( k1 != rlc.size() && k2 != rlc.size() && aret.size() == 2 ) { - HandleType temp = aret.data()[0]; - aret.data()[0] = aret.data()[1]; - aret.data()[1] = temp; + if( aret.data()[0] == rlc[k1] || aret.data()[0] == rlc[k2] ) + { + HandleType temp = aret.data()[0]; + aret.data()[0] = aret.data()[1]; + aret.data()[1] = temp; + } } adj_type::size_type it = 1, iend = lc.size()-1; while(it < iend) if( !m->GetMarker(lc[it],hm) ) //loop over edges diff --git a/examples/MatSolve/main.cpp b/examples/MatSolve/main.cpp index 0570c68592233d81b9be501b4405e0c3b9f65a34..a669ad63b97719a2f8ba95502928489a4d00d1da 100644 --- a/examples/MatSolve/main.cpp +++ b/examples/MatSolve/main.cpp @@ -30,6 +30,7 @@ int main(int argc, char ** argv) case 6: type = Solver::Trilinos_ML; break; case 7: type = Solver::ANI; break; case 8: type = Solver::INNER_MPTILUC; break; + case 9: type = Solver::INNER_MPTILU2; break; } Solver::Initialize(&argc,&argv,argc > 4 ? argv[4] : NULL); // Initialize the linear solver in accordance with args { @@ -81,8 +82,8 @@ int main(int argc, char ** argv) s.SetParameterEnum("adapt_ddpq_tolerance",0); s.SetParameterReal("drop_tolerance",1.0e-2); - s.SetParameterReal("reuse_tolerance",1.0e-3); - s.SetParameterReal("ddpq_tolerance",0.4); + s.SetParameterReal("reuse_tolerance",1.0e-4); + s.SetParameterReal("ddpq_tolerance",0.0); s.SetParameterEnum("condition_estimation",1); diff --git a/examples/OctreeCutcell/CMakeLists.txt b/examples/OctreeCutcell/CMakeLists.txt index 253db0c994e5987e8925ee797d050d380f1db39e..1586d03f7f55a1d1e102981b46712266d3d8856b 100644 --- a/examples/OctreeCutcell/CMakeLists.txt +++ b/examples/OctreeCutcell/CMakeLists.txt @@ -46,5 +46,5 @@ else(OPENGL_FOUND) message("OpenGL not found") endif(OPENGL_FOUND) -install(TARGETS OctreeCutcell EXPORT inmost-targets RUNTIME DESTINATION bin) - +install(TARGETS OctreeCutcell EXPORT inmost-targets RUNTIME DESTINATION bin/OctreeCutcell) +install(DIRECTORY Obj EXPORT inmost-targets DESTINATION bin/OctreeCutcell) diff --git a/examples/OctreeCutcell/main.cpp b/examples/OctreeCutcell/main.cpp index b178897d9023f7aee43f732d7b4e588c64f835e3..8c43d3e2334d61bb363f541592ae57b843796940 100644 --- a/examples/OctreeCutcell/main.cpp +++ b/examples/OctreeCutcell/main.cpp @@ -843,22 +843,33 @@ int main(int argc, char ** argv) InitObj(); { std::vector layers; - for(int i = 0; i < 3; i++) + for(int i = 1; i <= 3; i++) { std::stringstream name; - name << "Obj/bound" << i+1 << ".obj"; + //name << "Obj/bound" << i+1 << ".obj"; + name << "oil_obj2/proj/layer" << i << ".obj"; layers.push_back(name.str()); } make_proj.ReadLayers(layers); } { std::vector layers; - for(int i = 0; i < 6; i++) + for(int i = 1; i <= 19; i++) + { + std::stringstream name; + //name << "Obj/rt" << i+1 << ".obj"; + name << "oil_obj2/mat/layer" << i << ".obj"; + layers.push_back(name.str()); + } + /* + for(int i = 1; i <= 3; i++) { std::stringstream name; - name << "Obj/rt" << i+1 << ".obj"; + //name << "Obj/rt" << i+1 << ".obj"; + name << "oil_obj2/proj/layer" << i << ".obj"; layers.push_back(name.str()); } + */ get_type.ReadLayers(layers); } diff --git a/examples/OctreeCutcell/obj.cpp b/examples/OctreeCutcell/obj.cpp index 76bd08e1d3beb9546ed792065aa5572754d9e9f5..b39918acc304177801661334312e0603f6abb291 100644 --- a/examples/OctreeCutcell/obj.cpp +++ b/examples/OctreeCutcell/obj.cpp @@ -22,7 +22,7 @@ char * nowarn2 = NULL; //~ extern Projection make_proj; -bool swapyz = false; +bool swapyz = true; struct ObjVertex @@ -544,6 +544,7 @@ int ReadObj(char * file) } n = objs + cur; strcpy(n->filename,file); + printf("Reading %s\n",file); // nowarn2 = getcwd(n->filedir,1024); FILE * f = fopen(file,"r"); if( f == NULL ) @@ -805,6 +806,14 @@ int ReadObj(char * file) &n->verts[n->nverts].v[0], &n->verts[n->nverts].v[1], &n->verts[n->nverts].v[2]); + + n->verts[n->nverts].v[0] /= 99.5; + n->verts[n->nverts].v[1] /= 32; + n->verts[n->nverts].v[2] /= 99.5; + n->verts[n->nverts].v[0] -= 0.001; + n->verts[n->nverts].v[1] -= 0.001; + n->verts[n->nverts].v[2] += 0.001; + n->verts[n->nverts].v[2] *= -1; if( swapyz ) { double temp = n->verts[n->nverts].v[1]; @@ -955,6 +964,7 @@ int ReadObj(char * file) if( n->minz > n->pols[i].minz ) n->minz = n->pols[i].minz; } + printf("bounds: %g:%g %g:%g %g:%g\n",n->minx,n->maxx,n->miny,n->maxy,n->minz,n->maxz); kdtree_build(n->t,n->p,n->npols,0); /* double center[3]; diff --git a/examples/OctreeCutcell/octgrid.cpp b/examples/OctreeCutcell/octgrid.cpp index d818dfc90a7ae8a711f5495f00a8dd160e71a318..2f574c79334db9b7238e4905a12a3c10b334a59a 100644 --- a/examples/OctreeCutcell/octgrid.cpp +++ b/examples/OctreeCutcell/octgrid.cpp @@ -156,22 +156,22 @@ int cellAround(struct grid * g, int m, int side, int neighbours[1<<(DIM-1)]) void vertDestroyINMOST(struct grid *g, int m) { //printf("%s\n",__FUNCTION__); - if( g->verts[m].mv.isValid() && !g->verts[m].mv.Hidden() ) + if( g->verts[m].mv != InvalidHandle() ) { - g->verts[m].mv->Delete(); + g->mesh->Delete(g->verts[m].mv); } } void vertCreateINMOST(struct grid * g, int m) { Storage::real xyz[3]; - if( g->verts[m].mv.isValid() ) return; + if( g->verts[m].mv != InvalidHandle() ) return; vertGetCoord(g,m,xyz); g->transformation(xyz); - g->verts[m].mv = g->mesh->CreateNode(xyz); - g->verts[m].mv->SetMarker(g->octree_node); + g->verts[m].mv = g->mesh->CreateNode(xyz)->GetHandle(); + g->mesh->SetMarker(g->verts[m].mv,g->octree_node); //g->verts[m].mv->Integer(g->new_marker) = 1; - g->vert_to_INMOST(g,m,g->verts[m].mv); + g->vert_to_INMOST(g,m,Node(g->mesh,g->verts[m].mv)); } void cellDestroyINMOST(struct grid * g, int m) @@ -299,13 +299,13 @@ void cellGetFaceVerts(struct grid * g, int m, int side, int * nverts, Node verts for(i = 0; i < 4; i++) { mid[1+faces[0]] = false; - verts[(faces[1+faces[0]] = (*nverts))] = g->verts[g->cells[m].vertexes[nvf[side][i]]].mv; + verts[(faces[1+faces[0]] = (*nverts))] = Node(g->mesh,g->verts[g->cells[m].vertexes[nvf[side][i]]].mv); faces[0]++; (*nverts)++; if( (middle = vertGetMiddle(g,m,side,i)) != -1) { mid[1+faces[0]] = true; - verts[(faces[1+faces[0]] = (*nverts))] = g->verts[middle].mv; + verts[(faces[1+faces[0]] = (*nverts))] = Node(g->mesh,g->verts[middle].mv); faces[0]++; (*nverts)++; @@ -1204,7 +1204,7 @@ void cellCreateINMOST(struct grid * g, int m, bool print = false) centers[0] = matcenter(mat0d,&n0->Coords()[0],0); centers[1] = matcenter(mat2d,&n2->Coords()[0],0); //make iterations to find correct position of the node - int max = 8; + int max = 16; int found = 0; for(int q = 0; q < max; q++) { @@ -2357,7 +2357,7 @@ exit_work: if( !success ) for(j = 0; j < l; j++) if( node_in_face[j].isValid() ) centers.push_back(matcenter(node_in_face[j]->IntegerArray(g->materials),&node_in_face[j]->Coords()[0])); //make iterations to find correct position of the node - int max = 4; + int max = 8; Storage::real div = 1.0/centers.size(); for(int q = 0; q < max; q++) { @@ -3485,7 +3485,7 @@ void cellInit(struct grid * g, int cell, int parent) void vertInit(struct grid * g, int vert) { int i; - g->verts[vert].mv = InvalidNode(); + g->verts[vert].mv = InvalidHandle(); for(i = 0; i < 1<verts[vert].env[i] = -1; } diff --git a/examples/OctreeCutcell/octgrid.h b/examples/OctreeCutcell/octgrid.h index ae0da3e2e2fec2c2c950727a4fbdcdc03e664152..ccb534eaeeb58620c71b602d3edd3f274a48c903 100644 --- a/examples/OctreeCutcell/octgrid.h +++ b/examples/OctreeCutcell/octgrid.h @@ -65,7 +65,7 @@ struct vert { int busy; int env[1< nodes = Element(this,e)->getNodes(); - Storage::real c[3]; - vec_diff(nodes[0]->Coords().data(),nodes[1]->Coords().data(),c,mdim); - *ret = vec_len(c,mdim); + if( nodes.size() > 1 ) + { + Storage::real c[3]; + vec_diff(nodes[0]->Coords().data(),nodes[1]->Coords().data(),c,mdim); + *ret = vec_len(c,mdim); + } + else *ret = 0; //~ if( isnan(*ret) || fabs(*ret) < 1e-15 ) throw -1; break; } case 2: //area of face { ElementArray nodes = Element(this,e)->getNodes(); - real x[3] = {0,0,0}; - Storage::real_array x0 = nodes[0].Coords(); - for(ElementArray::size_type i = 1; i < nodes.size()-1; i++) + if( nodes.size() > 2 ) { - Storage::real_array v1 = nodes[i].Coords(); - Storage::real_array v2 = nodes[i+1].Coords(); - if( mdim == 3 ) + real x[3] = {0,0,0}; + Storage::real_array x0 = nodes[0].Coords(); + for(ElementArray::size_type i = 1; i < nodes.size()-1; i++) { - x[0] += (v1[1]-x0[1])*(v2[2]-x0[2]) - (v1[2]-x0[2])*(v2[1]-x0[1]); - x[1] += (v1[2]-x0[2])*(v2[0]-x0[0]) - (v1[0]-x0[0])*(v2[2]-x0[2]); + Storage::real_array v1 = nodes[i].Coords(); + Storage::real_array v2 = nodes[i+1].Coords(); + if( mdim == 3 ) + { + x[0] += (v1[1]-x0[1])*(v2[2]-x0[2]) - (v1[2]-x0[2])*(v2[1]-x0[1]); + x[1] += (v1[2]-x0[2])*(v2[0]-x0[0]) - (v1[0]-x0[0])*(v2[2]-x0[2]); + } + x[2] += (v1[0]-x0[0])*(v2[1]-x0[1]) - (v1[1]-x0[1])*(v2[0]-x0[0]); } - x[2] += (v1[0]-x0[0])*(v2[1]-x0[1]) - (v1[1]-x0[1])*(v2[0]-x0[0]); - } - *ret = sqrt(x[0]*x[0]+x[1]*x[1]+x[2]*x[2])*0.5; + *ret = sqrt(x[0]*x[0]+x[1]*x[1]+x[2]*x[2])*0.5; + } else *ret = 0; //~ if( isnan(*ret) || fabs(*ret) < 1e-15 ) throw -1; break; } @@ -596,24 +603,27 @@ namespace INMOST } *ret /= 6.0; */ - Storage::real fcnt[3], fnrm[3];// , area; - for(ElementArray::size_type i = 0; i < faces.size(); i++) + if( faces.size() > 3 ) { - faces[i]->Centroid(fcnt); - faces[i]->OrientedNormal(me,fnrm); - /* - area = sqrt(vec_dot_product(fnrm,fnrm,mdim)); - if( area > 0 ) + Storage::real fcnt[3], fnrm[3];// , area; + for(ElementArray::size_type i = 0; i < faces.size(); i++) { - fnrm[0] /= area; - fnrm[1] /= area; - fnrm[2] /= area; - *ret += vec_dot_product(fcnt,fnrm,mdim) * area; + faces[i]->Centroid(fcnt); + faces[i]->OrientedNormal(me,fnrm); + /* + area = sqrt(vec_dot_product(fnrm,fnrm,mdim)); + if( area > 0 ) + { + fnrm[0] /= area; + fnrm[1] /= area; + fnrm[2] /= area; + *ret += vec_dot_product(fcnt,fnrm,mdim) * area; + } + */ + *ret += vec_dot_product(fcnt,fnrm,mdim); } - */ - *ret += vec_dot_product(fcnt,fnrm,mdim); + *ret /= 3.0; } - *ret /= 3.0; /* if( *ret < 0 || (*ret) != (*ret) ) { @@ -759,10 +769,18 @@ namespace INMOST if( edim == 1 ) { ElementArray n = Element(this,e)->getNodes(); - Storage::real_array a = n[0].Coords(); - Storage::real_array b = n[1].Coords(); - for(integer j = 0; j < dim; j++) - ret[j] = (a[j] + b[j])*0.5; + if( n.size() == 2 ) + { + Storage::real_array a = n[0].Coords(); + Storage::real_array b = n[1].Coords(); + for(integer j = 0; j < dim; j++) + ret[j] = (a[j] + b[j])*0.5; + } + else if( n.size() == 1 ) + { + Storage::real_array a = n[0].Coords(); + for(integer j = 0; j < dim; j++) ret[j] = a[j]; + } } else if( edim == 2 ) { @@ -776,37 +794,49 @@ namespace INMOST //~ } //~ else //~ { - Storage::real_array x0 = nodes[0].Coords(); - for(ElementArray::size_type i = 1; i < nodes.size()-1; i++) + if( nodes.size() > 2 ) { - Storage::real_array v1 = nodes[i].Coords(); - Storage::real_array v2 = nodes[i+1].Coords(); - if( mdim == 3 ) + Storage::real_array x0 = nodes[0].Coords(); + for(ElementArray::size_type i = 1; i < nodes.size()-1; i++) { - x[0] += (v1[1]-x0[1])*(v2[2]-x0[2]) - (v1[2]-x0[2])*(v2[1]-x0[1]); - x[1] += (v1[2]-x0[2])*(v2[0]-x0[0]) - (v1[0]-x0[0])*(v2[2]-x0[2]); + Storage::real_array v1 = nodes[i].Coords(); + Storage::real_array v2 = nodes[i+1].Coords(); + if( mdim == 3 ) + { + x[0] += (v1[1]-x0[1])*(v2[2]-x0[2]) - (v1[2]-x0[2])*(v2[1]-x0[1]); + x[1] += (v1[2]-x0[2])*(v2[0]-x0[0]) - (v1[0]-x0[0])*(v2[2]-x0[2]); + } + x[2] += (v1[0]-x0[0])*(v2[1]-x0[1]) - (v1[1]-x0[1])*(v2[0]-x0[0]); } - x[2] += (v1[0]-x0[0])*(v2[1]-x0[1]) - (v1[1]-x0[1])*(v2[0]-x0[0]); + s = sqrt(x[0]*x[0]+x[1]*x[1]+x[2]*x[2]); + x[0] /= s; x[1] /= s; x[2] /= s; //here we obtain the unit normal + //~ } + //here we compute the center + Storage::real_array v0 = nodes[0].Coords(); + for(ElementArray::size_type j = 1; j < nodes.size()-1; j++) + { + Storage::real_array v1 = nodes[j].Coords(); + Storage::real_array v2 = nodes[j+1].Coords(); + for(integer k = 0; k < mdim; k++) + { + x1[k] = v0[k] - v1[k]; + x2[k] = v0[k] - v2[k]; + } + d = det3v(x1,x2,x); //here we use unit normal + for(integer k = 0; k < mdim; k++) + ret[k] += d*(v0[k]+v1[k]+v2[k]); + } + for(integer k = 0; k < mdim; k++) ret[k] /= 3.0 * s; } - s = sqrt(x[0]*x[0]+x[1]*x[1]+x[2]*x[2]); - x[0] /= s; x[1] /= s; x[2] /= s; //here we obtain the unit normal - //~ } - //here we compute the center - Storage::real_array v0 = nodes[0].Coords(); - for(ElementArray::size_type j = 1; j < nodes.size()-1; j++) + else { - Storage::real_array v1 = nodes[j].Coords(); - Storage::real_array v2 = nodes[j+1].Coords(); - for(integer k = 0; k < mdim; k++) + for(ElementArray::size_type i = 0; i < nodes.size(); i++) { - x1[k] = v0[k] - v1[k]; - x2[k] = v0[k] - v2[k]; + Storage::real_array c = nodes[i].Coords(); + for(integer k = 0; k < mdim; k++) ret[k] += c[k]; } - d = det3v(x1,x2,x); //here we use unit normal - for(integer k = 0; k < mdim; k++) - ret[k] += d*(v0[k]+v1[k]+v2[k]); + for(integer k = 0; k < mdim; k++) ret[k] /= static_cast(nodes.size()); } - for(integer k = 0; k < mdim; k++) ret[k] /= 3.0 * s; } else if( edim == 3 ) { @@ -875,19 +905,22 @@ namespace INMOST else if( edim == 1 )//&& mdim == 2 ) { ElementArray nodes = Element(this,e)->getNodes(); - Storage::real_array a = nodes[0].Coords(); - Storage::real_array b = nodes[1].Coords(); - ret[0] = b[1] - a[1]; - ret[1] = a[0] - b[0]; - Storage::real l = sqrt(ret[0]*ret[0]+ret[1]*ret[1]); - if( l ) + if( nodes.size() > 1 ) { - ret[0] /= l; - ret[1] /= l; + Storage::real_array a = nodes[0].Coords(); + Storage::real_array b = nodes[1].Coords(); + ret[0] = b[1] - a[1]; + ret[1] = a[0] - b[0]; + Storage::real l = sqrt(ret[0]*ret[0]+ret[1]*ret[1]); + if( l ) + { + ret[0] /= l; + ret[1] /= l; + } + l = sqrt((a[0]-b[0])*(a[0]-b[0])+(a[1]-b[1])*(a[1]-b[1])); + ret[0] *= l; + ret[1] *= l; } - l = sqrt((a[0]-b[0])*(a[0]-b[0])+(a[1]-b[1])*(a[1]-b[1])); - ret[0] *= l; - ret[1] *= l; } } break; diff --git a/inmost_mesh.h b/inmost_mesh.h index d6b9b3412b820c318f55b4f878701f71db8224b7..d408bfa2a5e5912f1ffada86efd884e9539a73c8 100644 --- a/inmost_mesh.h +++ b/inmost_mesh.h @@ -9,7 +9,6 @@ #define __NDT 3 #define __NET 6 - namespace INMOST { @@ -101,7 +100,7 @@ namespace INMOST static const TopologyCheck PROHIBIT_POLYGON = 0x00100000; //done//allow only known types of elements: Tet,Quad, Line static const TopologyCheck PROHIBIT_MULTIPOLYGON = 0x00200000; //done//(needs NEED_TEST_CLOSURE) produce error if faces of cell are not closed, or cell is folded static const TopologyCheck PROHIBIT_POLYHEDRON = 0x00400000; //done//allow only known types of elements: Tet,Hex,Prism,Pyramid - static const TopologyCheck FACE_EDGES_ORDER = 0x00800000; //not implemented(implement CheckEdgeOrder,FixEdgeOrder, class Cell,Face)//edges of the face should form one closed loop + static const TopologyCheck FACE_EDGES_ORDER = 0x00800000; //done//edges of the face should form one closed loop static const TopologyCheck PROHIBIT_CONCAVE_FACE = 0x01000000; //not implemented//don't allow concave face static const TopologyCheck PROHIBIT_CONCAVE_CELL = 0x02000000; //not implemented//don't allow concave cell static const TopologyCheck PROHIBIT_NONSTAR_FACE = 0x04000000; //not implemented//don't allow non-star shaped face @@ -2039,7 +2038,13 @@ namespace INMOST /// Set a marker on the element represented by handle. /// @param h element handle /// @param n stores byte number and byte bit mask that represent marker - void SetMarker (HandleType h,MarkerType n) {static_cast(MGetDenseLink(h,MarkersTag()))[n >> MarkerShift] |= static_cast(n & MarkerMask);} + void SetMarker (HandleType h,MarkerType n) + { +#if defined(USE_OMP) +#pragma omp atomic +#endif + static_cast(MGetDenseLink(h,MarkersTag()))[n >> MarkerShift] |= static_cast(n & MarkerMask); + } /// Set a marker on the set of handles. /// @param h set of handles /// @param n number of handles @@ -2053,7 +2058,13 @@ namespace INMOST /// Remove the marker from the element. /// @param h element handle /// @param n stores byte number and byte bit mask that represent marker - void RemMarker (HandleType h,MarkerType n) {static_cast(MGetDenseLink(h,MarkersTag()))[n >> MarkerShift] &= ~static_cast(n & MarkerMask);} + void RemMarker (HandleType h,MarkerType n) + { +#if defined(USE_OMP) +#pragma omp atomic +#endif + static_cast(MGetDenseLink(h,MarkersTag()))[n >> MarkerShift] &= ~static_cast(n & MarkerMask); + } /// Remove the marker from the set of handles. /// @param h set of handles /// @param n number of handles diff --git a/inmost_solver.h b/inmost_solver.h index 0df504f52f23951e2468da4a17ad8fee0fa0f780..18f7670391a636397b2e262e3b6ca2cdca5c010d 100644 --- a/inmost_solver.h +++ b/inmost_solver.h @@ -44,6 +44,7 @@ namespace INMOST INNER_ILU2, ///< inner Solver based on BiCGStab(L) solver with second order ILU factorization as preconditioner. INNER_DDPQILUC, ///< inner Solver based on BiCGStab(L) solver with second order Crout-ILU with inversed-based condition estimation and unsymmetric reordering for diagonal dominance as preconditioner. INNER_MPTILUC, ///< inner Solver based on BiCGStab(L) solver with second order Crout-ILU with inversed-based condition estimation and maximum product transversal reordering as preconditioner. + INNER_MPTILU2, ///< inner Solver based on BiCGStab(L) solver with second order ILU and maximum product transversal reordering as preconditioner. Trilinos_Aztec, ///< external Solver AztecOO from Trilinos package Trilinos_Belos, ///< external Solver Belos from Trilinos package, currently without preconditioner Trilinos_ML, ///< external Solver AztecOO with ML preconditioner @@ -131,8 +132,9 @@ namespace INMOST //~ void BeginSequentialCode() {for(int i = 0; i < rank; i++) MPI_Barrier(comm);} //~ void EndSequentialCode() {for(int i = rank; i < size; i++) MPI_Barrier(comm);} - /// Get the scalar product of the specified interval of the distributed vector. - INMOST_DATA_REAL_TYPE ScalarProd(Vector const & left, Vector const & right, INMOST_DATA_ENUM_TYPE index_begin, INMOST_DATA_ENUM_TYPE index_end) const; + // Get the scalar product of the specified interval of the distributed vector. + // Conflicts with OpenMP, should not be used in future + //void ScalarProd(Vector const & left, Vector const & right, INMOST_DATA_ENUM_TYPE index_begin, INMOST_DATA_ENUM_TYPE index_end, INMOST_DATA_REAL_TYPE & sum) const; }; /// Distributed vector class. @@ -181,7 +183,7 @@ namespace INMOST /// Get the communicator which the vector is associated with. INMOST_MPI_Comm GetCommunicator() const {return comm;} - + void Swap(Solver::Vector & other) {data.swap(other.data); name.swap(other.name); std::swap(is_parallel,other.is_parallel); std::swap(comm,other.comm);} /// Save the distributed vector to a single data file using parallel MPI I/O. @@ -350,6 +352,12 @@ namespace INMOST /// that allow for the modification of individual entries. /// @param size New size of the row. void Resize(INMOST_DATA_ENUM_TYPE size) {data.resize(size);} + + void Print() + { + for(iterator it = Begin(); it != End(); ++it) std::cout << "(" << it->first << "," << it->second << ") "; + std::cout << std::endl; + } }; diff --git a/mesh.cpp b/mesh.cpp index 80b58e4b1c2f324fa0e649c118c912df7fefab21..c84cdfea71b8fcc43f46d6456ddd809a52a5875c 100644 --- a/mesh.cpp +++ b/mesh.cpp @@ -1528,7 +1528,7 @@ namespace INMOST void Mesh::UntieElement(integer etypenum, integer ID) { #if defined(USE_OMP) -#pragma omp critical [links_interraction] +#pragma omp critical (links_interraction) #endif { integer ADDR = links[etypenum][ID]; @@ -1547,7 +1547,7 @@ namespace INMOST { integer ID = -1, ADDR = -1; #if defined(USE_OMP) -#pragma omp critical [links_interraction] +#pragma omp critical (links_interraction) #endif { INMOST_DATA_ENUM_TYPE old_size = GetArrayCapacity(etypenum), new_size; diff --git a/mesh_file.cpp b/mesh_file.cpp index 7bea28efb254dc633f1cdd27a3f003a6ddf77855..8a13c84786d4bf24827e31d51c92719956b722fb 100644 --- a/mesh_file.cpp +++ b/mesh_file.cpp @@ -4626,6 +4626,10 @@ safe_output: { MPI_File fh; MPI_Status stat; + REPORT_MPI(ierr = MPI_File_open(GetCommunicator(),const_cast(File.c_str()), MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh)); + if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); + REPORT_MPI(ierr = MPI_File_close(&fh)); + if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); REPORT_MPI(ierr = MPI_File_open(GetCommunicator(),const_cast(File.c_str()),MPI_MODE_CREATE | MPI_MODE_WRONLY,MPI_INFO_NULL,&fh)); if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); if( GetProcessorRank() == 0 ) diff --git a/mesh_parallel.cpp b/mesh_parallel.cpp index 5be1b0e50a88738e05f3d22ceb409e231772c3cd..b0710cf2e17cc5ff6c58ff545a28bd4cdddab6fb 100644 --- a/mesh_parallel.cpp +++ b/mesh_parallel.cpp @@ -368,7 +368,8 @@ namespace INMOST void Mesh::Integrate(Storage::real * input, Storage::integer size) { #if defined(USE_MPI) - static dynarray temp(size); + static dynarray temp; + temp.resize(size); memcpy(temp.data(),input,sizeof(Storage::real)*size); MPI_Allreduce(temp.data(),input,size,INMOST_MPI_DATA_REAL_TYPE,MPI_SUM,comm); #else//USE_MPI @@ -379,7 +380,8 @@ namespace INMOST void Mesh::Integrate(Storage::integer * input, Storage::integer size) { #if defined(USE_MPI) - static dynarray temp(size); + static dynarray temp; + temp.resize(size); memcpy(temp.data(),input,sizeof(Storage::integer)*size); MPI_Allreduce(temp.data(),input,size,INMOST_MPI_DATA_INTEGER_TYPE,MPI_SUM,comm); #else//USE_MPI @@ -440,7 +442,8 @@ namespace INMOST void Mesh::AggregateMax(Storage::real * input, Storage::integer size) { #if defined(USE_MPI) - static dynarray temp(size); + static dynarray temp; + temp.resize(size); memcpy(temp.data(),input,sizeof(Storage::real)*size); MPI_Allreduce(temp.data(),input,size,INMOST_MPI_DATA_REAL_TYPE,MPI_MAX,comm); #else//USE_MPI @@ -451,7 +454,8 @@ namespace INMOST void Mesh::AggregateMax(Storage::integer * input, Storage::integer size) { #if defined(USE_MPI) - static dynarray temp(size); + static dynarray temp; + temp.resize(size); memcpy(temp.data(),input,sizeof(Storage::integer)*size); MPI_Allreduce(temp.data(),input,size,INMOST_MPI_DATA_INTEGER_TYPE,MPI_MAX,comm); #else//USE_MPI @@ -484,7 +488,8 @@ namespace INMOST void Mesh::AggregateMin(Storage::real * input, Storage::integer size) { #if defined(USE_MPI) - static dynarray temp(size); + static dynarray temp; + temp.resize(size); memcpy(temp.data(),input,sizeof(Storage::real)*size); MPI_Allreduce(temp.data(),input,size,INMOST_MPI_DATA_REAL_TYPE,MPI_MIN,comm); #else//USE_MPI @@ -495,7 +500,8 @@ namespace INMOST void Mesh::AggregateMin(Storage::integer * input, Storage::integer size) { #if defined(USE_MPI) - static dynarray temp(size); + static dynarray temp; + temp.resize(size); memcpy(temp.data(),input,sizeof(Storage::integer)*size); MPI_Allreduce(temp.data(),input,size,INMOST_MPI_DATA_INTEGER_TYPE,MPI_MIN,comm); #else//USE_MPI diff --git a/modify.cpp b/modify.cpp index 211705c083bf4ee52dc99224b672f38b7e0fc627..86e081007ae94de9a621ee8c0d84425da1a50202 100644 --- a/modify.cpp +++ b/modify.cpp @@ -2128,7 +2128,7 @@ public: bool Mesh::Delete(HandleType h) { - if(Hide(h)) + if(!New(h) && Hide(h)) { if( GetElementType() != CELL ) //mark all elements that rely on this that they should be deleted { diff --git a/solver.cpp b/solver.cpp index ae5d32db63955b18ec9bc00236f5963bf88e2e01..8c804aaa4ddd21dc674f07370db40af581b8c9e1 100644 --- a/solver.cpp +++ b/solver.cpp @@ -7,6 +7,9 @@ #if defined(HAVE_SOLVER_MPTILUC2) #include "solver_mtiluc2.hpp" #endif +#if defined(HAVE_SOLVER_MPTILU2) +#include "solver_mtilu2.hpp" +#endif #include "solver_bcgsl.hpp" #include #include @@ -35,6 +38,8 @@ #include "Teuchos_XMLParameterListHelpers.hpp" #endif +//#define USE_OMP + #define KSOLVER BCGSL_solver //#define KSOLVER BCGS_solver @@ -84,10 +89,15 @@ namespace INMOST { #if defined(USE_MPI) if( GetSize() == 1 ) return; - int ierr = 0; - dynarray temp(num); - memcpy(temp.data(),inout,sizeof(INMOST_DATA_REAL_TYPE)*num); - GUARD_MPI(MPI_Allreduce(temp.data(),inout,num,INMOST_MPI_DATA_REAL_TYPE,MPI_SUM,comm)); +#if defined(USE_OMP) +#pragma omp single +#endif + { + int ierr = 0; + dynarray temp(num); + memcpy(temp.data(),inout,sizeof(INMOST_DATA_REAL_TYPE)*num); + GUARD_MPI(MPI_Allreduce(temp.data(),inout,num,INMOST_MPI_DATA_REAL_TYPE,MPI_SUM,comm)); + } #else (void) inout; (void) num; @@ -207,11 +217,15 @@ namespace INMOST INMOST_DATA_ENUM_TYPE i = LinkedList.begin()->first, k = 0; while( i != EOL ) { - r.GetIndex(k) = i-1; - r.GetValue(k) = LinkedList[i].second; + if( LinkedList[i].second ) + { + r.GetIndex(k) = i-1; + r.GetValue(k) = LinkedList[i].second; + ++k; + } i = LinkedList[i].first; - ++k; } + r.Resize(k); } void Solver::OrderInfo::PrepareMatrix(Matrix & m, INMOST_DATA_ENUM_TYPE overlap) @@ -829,42 +843,47 @@ namespace INMOST //std::cout << __FUNCTION__ << " start" << std::endl; #if defined(USE_MPI) if( GetSize() == 1 ) return; - //use MPI_Put/MPI_Get to update vector - assert(x.isParallel()); //the vector was prepared - INMOST_DATA_ENUM_TYPE i, j = 1, k, l = 0; - int ierr; - for(i = 0; i < vector_exchange_recv[0]; i++) - { - //std::cout << GetRank() << " MPI_Irecv size " << vector_exchange_recv[j+1] << " dest " << vector_exchange_recv[j] << " tag " << vector_exchange_recv[j]*size+rank << std::endl; - GUARD_MPI(MPI_Irecv(&recv_storage[l],vector_exchange_recv[j+1],INMOST_MPI_DATA_REAL_TYPE,vector_exchange_recv[j],vector_exchange_recv[j]*size+rank,comm,&recv_requests[i])); - l += vector_exchange_recv[j+1]; - j += vector_exchange_recv[j+1] + 2; - } - j = 1, l = 0; - for(i = 0; i < vector_exchange_send[0]; i++) - { - //std::cout << GetRank() << " MPI_Isend size " << vector_exchange_send[j+1] << " dest " << vector_exchange_send[j] << " tag " << rank*size+vector_exchange_send[j] << std::endl; - for(k = 0; k < vector_exchange_send[j+1]; k++) - send_storage[l+k] = x[vector_exchange_send[k+j+2]]; - GUARD_MPI(MPI_Isend(&send_storage[l],vector_exchange_send[j+1],INMOST_MPI_DATA_REAL_TYPE,vector_exchange_send[j],rank*size+vector_exchange_send[j],comm,&send_requests[i])); - l += vector_exchange_send[j+1]; - j += vector_exchange_send[j+1] + 2; - } - if( vector_exchange_recv[0] > 0 ) +#if defined(USE_OMP) +#pragma omp single +#endif { - GUARD_MPI(MPI_Waitall(static_cast(recv_requests.size()),&recv_requests[0],MPI_STATUSES_IGNORE)); - j = 1, l = 0; + //use MPI_Put/MPI_Get to update vector + assert(x.isParallel()); //the vector was prepared + INMOST_DATA_ENUM_TYPE i, j = 1, k, l = 0; + int ierr; for(i = 0; i < vector_exchange_recv[0]; i++) { - for(k = 0; k < vector_exchange_recv[j+1]; k++) - x[vector_exchange_recv[k+j+2]] = recv_storage[l+k]; + //std::cout << GetRank() << " MPI_Irecv size " << vector_exchange_recv[j+1] << " dest " << vector_exchange_recv[j] << " tag " << vector_exchange_recv[j]*size+rank << std::endl; + GUARD_MPI(MPI_Irecv(&recv_storage[l],vector_exchange_recv[j+1],INMOST_MPI_DATA_REAL_TYPE,vector_exchange_recv[j],vector_exchange_recv[j]*size+rank,comm,&recv_requests[i])); l += vector_exchange_recv[j+1]; j += vector_exchange_recv[j+1] + 2; } - } - if( vector_exchange_send[0] > 0 ) - { - GUARD_MPI(MPI_Waitall(static_cast(send_requests.size()),&send_requests[0],MPI_STATUSES_IGNORE)); + j = 1, l = 0; + for(i = 0; i < vector_exchange_send[0]; i++) + { + //std::cout << GetRank() << " MPI_Isend size " << vector_exchange_send[j+1] << " dest " << vector_exchange_send[j] << " tag " << rank*size+vector_exchange_send[j] << std::endl; + for(k = 0; k < vector_exchange_send[j+1]; k++) + send_storage[l+k] = x[vector_exchange_send[k+j+2]]; + GUARD_MPI(MPI_Isend(&send_storage[l],vector_exchange_send[j+1],INMOST_MPI_DATA_REAL_TYPE,vector_exchange_send[j],rank*size+vector_exchange_send[j],comm,&send_requests[i])); + l += vector_exchange_send[j+1]; + j += vector_exchange_send[j+1] + 2; + } + if( vector_exchange_recv[0] > 0 ) + { + GUARD_MPI(MPI_Waitall(static_cast(recv_requests.size()),&recv_requests[0],MPI_STATUSES_IGNORE)); + j = 1, l = 0; + for(i = 0; i < vector_exchange_recv[0]; i++) + { + for(k = 0; k < vector_exchange_recv[j+1]; k++) + x[vector_exchange_recv[k+j+2]] = recv_storage[l+k]; + l += vector_exchange_recv[j+1]; + j += vector_exchange_recv[j+1] + 2; + } + } + if( vector_exchange_send[0] > 0 ) + { + GUARD_MPI(MPI_Waitall(static_cast(send_requests.size()),&send_requests[0],MPI_STATUSES_IGNORE)); + } } #else (void) x; @@ -876,66 +895,75 @@ namespace INMOST //std::cout << __FUNCTION__ << " start" << std::endl; #if defined(USE_MPI) if( GetSize() == 1 ) return; - //use MPI_Put/MPI_Get to update vector - assert(x.isParallel()); //the vector was prepared - INMOST_DATA_ENUM_TYPE i, j = 1, k, l = 0; - int ierr; - for(i = 0; i < vector_exchange_send[0]; i++) - { - //std::cout << GetRank() << " MPI_Irecv size " << vector_exchange_send[j+1] << " dest " << vector_exchange_send[j] << " tag " << vector_exchange_send[j]*size+rank << std::endl; - GUARD_MPI(MPI_Irecv(&send_storage[l],vector_exchange_send[j+1],INMOST_MPI_DATA_REAL_TYPE,vector_exchange_send[j],vector_exchange_send[j]*size+rank,comm,&send_requests[i])); - l += vector_exchange_send[j+1]; - j += vector_exchange_send[j+1] + 2; - } - j = 1, l = 0; - for(i = 0; i < vector_exchange_recv[0]; i++) - { - for(k = 0; k < vector_exchange_recv[j+1]; k++) - recv_storage[l+k] = x[vector_exchange_recv[k+j+2]]; - //std::cout << GetRank() << " MPI_Isend size " << vector_exchange_recv[j+1] << " dest " << vector_exchange_recv[j] << " tag " << rank*size+vector_exchange_recv[j] << std::endl; - GUARD_MPI(MPI_Isend(&recv_storage[l],vector_exchange_recv[j+1],INMOST_MPI_DATA_REAL_TYPE,vector_exchange_recv[j],rank*size+vector_exchange_recv[j],comm,&recv_requests[i])); - l += vector_exchange_recv[j+1]; - j += vector_exchange_recv[j+1] + 2; - } - if( vector_exchange_send[0] > 0 ) +#if defined(USE_OMP) +#pragma omp single +#endif { - //std::cout << GetRank() << " Waitall send " << send_requests.size() << std::endl; - GUARD_MPI(MPI_Waitall(static_cast(send_requests.size()),&send_requests[0],MPI_STATUSES_IGNORE)); - j = 1, l = 0; + //use MPI_Put/MPI_Get to update vector + assert(x.isParallel()); //the vector was prepared + INMOST_DATA_ENUM_TYPE i, j = 1, k, l = 0; + int ierr; for(i = 0; i < vector_exchange_send[0]; i++) { - for(k = 0; k < vector_exchange_send[j+1]; k++) - x[vector_exchange_send[k+j+2]] += send_storage[l+k]; + //std::cout << GetRank() << " MPI_Irecv size " << vector_exchange_send[j+1] << " dest " << vector_exchange_send[j] << " tag " << vector_exchange_send[j]*size+rank << std::endl; + GUARD_MPI(MPI_Irecv(&send_storage[l],vector_exchange_send[j+1],INMOST_MPI_DATA_REAL_TYPE,vector_exchange_send[j],vector_exchange_send[j]*size+rank,comm,&send_requests[i])); l += vector_exchange_send[j+1]; j += vector_exchange_send[j+1] + 2; } - } - if( vector_exchange_recv[0] > 0 ) - { - //std::cout << GetRank() << " Waitall recv " << recv_requests.size() << std::endl; - GUARD_MPI(MPI_Waitall(static_cast(recv_requests.size()),&recv_requests[0],MPI_STATUSES_IGNORE)); + j = 1, l = 0; + for(i = 0; i < vector_exchange_recv[0]; i++) + { + for(k = 0; k < vector_exchange_recv[j+1]; k++) + recv_storage[l+k] = x[vector_exchange_recv[k+j+2]]; + //std::cout << GetRank() << " MPI_Isend size " << vector_exchange_recv[j+1] << " dest " << vector_exchange_recv[j] << " tag " << rank*size+vector_exchange_recv[j] << std::endl; + GUARD_MPI(MPI_Isend(&recv_storage[l],vector_exchange_recv[j+1],INMOST_MPI_DATA_REAL_TYPE,vector_exchange_recv[j],rank*size+vector_exchange_recv[j],comm,&recv_requests[i])); + l += vector_exchange_recv[j+1]; + j += vector_exchange_recv[j+1] + 2; + } + if( vector_exchange_send[0] > 0 ) + { + //std::cout << GetRank() << " Waitall send " << send_requests.size() << std::endl; + GUARD_MPI(MPI_Waitall(static_cast(send_requests.size()),&send_requests[0],MPI_STATUSES_IGNORE)); + j = 1, l = 0; + for(i = 0; i < vector_exchange_send[0]; i++) + { + for(k = 0; k < vector_exchange_send[j+1]; k++) + x[vector_exchange_send[k+j+2]] += send_storage[l+k]; + l += vector_exchange_send[j+1]; + j += vector_exchange_send[j+1] + 2; + } + } + if( vector_exchange_recv[0] > 0 ) + { + //std::cout << GetRank() << " Waitall recv " << recv_requests.size() << std::endl; + GUARD_MPI(MPI_Waitall(static_cast(recv_requests.size()),&recv_requests[0],MPI_STATUSES_IGNORE)); + } } #else (void) x; #endif //std::cout << __FUNCTION__ << " end" << std::endl; } - - INMOST_DATA_REAL_TYPE Solver::OrderInfo::ScalarProd(Vector const & left, Vector const & right, INMOST_DATA_ENUM_TYPE index_begin, INMOST_DATA_ENUM_TYPE index_end) const + /* + void Solver::OrderInfo::ScalarProd(Vector const & left, Vector const & right, INMOST_DATA_ENUM_TYPE index_begin, INMOST_DATA_ENUM_TYPE index_end, INMOST_DATA_REAL_TYPE & sum) const { - INMOST_DATA_REAL_TYPE ret = 0; - for(INMOST_DATA_ENUM_TYPE i = index_begin; i < index_end; ++i) - ret += left[i]*right[i]; - Integrate(&ret,1); - return ret; + INMOST_DATA_INTEGER_TYPE ibeg = index_begin, iend = index_end; +#if defined(USE_OMP) +#pragma omp for reduction(+:sum) +#endif + for(INMOST_DATA_INTEGER_TYPE i = ibeg; i < iend; ++i) + { + sum += left[i]*right[i]; + } + Integrate(&sum,1); } - + */ INMOST_DATA_REAL_TYPE Solver::Row::RowVec(Solver::Vector & x) const { INMOST_DATA_REAL_TYPE ret = 0; - INMOST_DATA_ENUM_TYPE end = Size(),i; - for(i = 0; i < end; i++) ret = ret + x[GetIndex(i)]*GetValue(i); + INMOST_DATA_ENUM_TYPE end = Size(); + for(INMOST_DATA_ENUM_TYPE i = 0; i < end; i++) ret = ret + x[GetIndex(i)]*GetValue(i); return ret; } @@ -1142,6 +1170,10 @@ namespace INMOST int ierr; MPI_File fh; MPI_Status stat; + ierr = MPI_File_open(GetCommunicator(),const_cast(file.c_str()), MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); + if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); + ierr = MPI_File_close(&fh); + if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); ierr = MPI_File_open(GetCommunicator(),const_cast(file.c_str()),MPI_MODE_WRONLY | MPI_MODE_CREATE,MPI_INFO_NULL,&fh); if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); if( rank == 0 ) @@ -1151,12 +1183,10 @@ namespace INMOST //header << "% is written by INMOST" << std::endl; //header << "% by MPI_File_* api" << std::endl; header << vecsize << std::endl; - std::string header_data(header.str()); - ierr = MPI_File_write_shared(fh,&header_data[0],static_cast(header_data.size()),MPI_CHAR,&stat); + ierr = MPI_File_write_shared(fh,const_cast(header.str().c_str()),static_cast(header.str().size()),MPI_CHAR,&stat); if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); } - std::string local_data(rhs.str()); - ierr = MPI_File_write_ordered(fh,&local_data[0],static_cast(local_data.size()),MPI_CHAR,&stat); + ierr = MPI_File_write_ordered(fh,const_cast(rhs.str().c_str()),static_cast(rhs.str().size()),MPI_CHAR,&stat); if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); ierr = MPI_File_close(&fh); if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); @@ -1226,6 +1256,10 @@ namespace INMOST int ierr; MPI_File fh; MPI_Status stat; + ierr = MPI_File_open(GetCommunicator(),const_cast(file.c_str()), MPI_MODE_CREATE | MPI_MODE_DELETE_ON_CLOSE | MPI_MODE_WRONLY, MPI_INFO_NULL, &fh); + if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); + ierr = MPI_File_close(&fh); + if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); ierr = MPI_File_open(GetCommunicator(),const_cast(file.c_str()),MPI_MODE_WRONLY | MPI_MODE_CREATE,MPI_INFO_NULL,&fh); if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); if( rank == 0 ) @@ -1236,12 +1270,11 @@ namespace INMOST header << "% is written by INMOST" << std::endl; header << "% by MPI_File_* api" << std::endl; header << matsize << " " << matsize << " " << nonzero << std::endl; - std::string header_data(header.str()); - ierr = MPI_File_write_shared(fh,&header_data[0],static_cast(header_data.size()),MPI_CHAR,&stat); + //std::string header_data(header.str()); + ierr = MPI_File_write_shared(fh,const_cast(header.str().c_str()),static_cast(header.str().size()),MPI_CHAR,&stat); if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); } - std::string local_data(mtx.str()); - ierr = MPI_File_write_ordered(fh,&local_data[0],static_cast(local_data.size()),MPI_CHAR,&stat); + ierr = MPI_File_write_ordered(fh,const_cast(mtx.str().c_str()),static_cast(mtx.str().size()),MPI_CHAR,&stat); if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); ierr = MPI_File_close(&fh); if( ierr != MPI_SUCCESS ) MPI_Abort(GetCommunicator(),-1); @@ -1505,7 +1538,7 @@ namespace INMOST solver_data = static_cast(problem); } #endif - if( _pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC) + if( _pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC || _pack == INNER_MPTILU2) { Method * prec; if (_pack == INNER_ILU2) @@ -1533,6 +1566,15 @@ namespace INMOST #else std::cout << "Sorry, maximum product transverse condition estimation crout-ilu preconditioner is not included in this release" << std::endl; prec = NULL; +#endif + } + else if( _pack == INNER_MPTILU2 ) + { +#if defined(__SOLVER_MTILU2__) + prec = new MTILU2_preconditioner(info); +#else + std::cout << "Sorry, maximum product transverse ilu2 preconditioner is not included in this release" << std::endl; + prec = NULL; #endif } solver_data = new KSOLVER(prec, info); @@ -1580,7 +1622,7 @@ namespace INMOST throw - 1; //You should not really want to copy solver's information } #endif - if (_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC) + if (_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC || _pack == INNER_MPTILU2) { throw - 1; //You should not really want to copy solver's information } @@ -1631,7 +1673,7 @@ namespace INMOST } } #endif - if (_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC) + if (_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC || _pack == INNER_MPTILU2) { if( matrix_data != NULL ) { @@ -1719,7 +1761,7 @@ namespace INMOST throw - 1; //You should not really want to copy solver's information } #endif - if (_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC) + if (_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC || _pack == INNER_MPTILU2) { throw - 1; //You should not really want to copy solver's information } @@ -1975,7 +2017,7 @@ namespace INMOST ok = true; } #endif - if (_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC) + if (_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC || _pack == INNER_MPTILU2) { Solver::Matrix * mat = new Solver::Matrix(A); @@ -2000,7 +2042,7 @@ namespace INMOST { sol->EnumParameter(":estimator") = preconditioner_condition_estimation; } - else sol->EnumParameter(":fill") = static_cast(preconditioner_fill_level); + else if( _pack == INNER_ILU2 ) sol->EnumParameter(":fill") = static_cast(preconditioner_fill_level); if( sizeof(KSOLVER) == sizeof(BCGSL_solver) ) sol->EnumParameter("levels") = solver_gmres_substeps; @@ -2388,7 +2430,7 @@ namespace INMOST } #endif - if(_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC) + if(_pack == INNER_ILU2 || _pack == INNER_DDPQILUC || _pack == INNER_MPTILUC || _pack == INNER_MPTILU2) { IterativeMethod * sol = static_cast(solver_data); diff --git a/solver_bcgsl.hpp b/solver_bcgsl.hpp index db8aaba05be2a764a33cab834e7708fc39d0acc8..556504902557551bd44372cfd547236db3949159 100644 --- a/solver_bcgsl.hpp +++ b/solver_bcgsl.hpp @@ -9,12 +9,15 @@ #include "inmost_solver.h" -//#define PSEUDOINVERSE // same trick as in petsc with pseudoinverse +//#define CONVEX_COMBINATION +#define PSEUDOINVERSE // same trick as in petsc with pseudoinverse //#define USE_LAPACK_SVD // use lapack's dgesvd routine instead of built-in svdnxn //#if !defined(NDEBUG) #define REPORT_RESIDUAL //#endif +//#define USE_OMP + namespace INMOST { @@ -37,9 +40,14 @@ namespace INMOST else result = 0.0; return(result); } - int svdnxn(INMOST_DATA_REAL_TYPE * a,INMOST_DATA_REAL_TYPE * u, INMOST_DATA_REAL_TYPE *w, INMOST_DATA_REAL_TYPE * v, const int n) + int svdnxn(INMOST_DATA_REAL_TYPE * pa,INMOST_DATA_REAL_TYPE * pu, INMOST_DATA_REAL_TYPE *pw, INMOST_DATA_REAL_TYPE * pv, const int n) { - memcpy(u,a,sizeof(INMOST_DATA_REAL_TYPE)*n*n); + shell a(pa,n*n); + shell u(pu,n*n); + shell v(pv,n*n); + shell w(pw,n); + std::copy(a.begin(),a.end(),u.begin()); + //memcpy(u,a,sizeof(INMOST_DATA_REAL_TYPE)*n*n); int flag, i, its, j, jj, k, l, nm; INMOST_DATA_REAL_TYPE c, f, h, s, x, y, z; INMOST_DATA_REAL_TYPE anorm = 0.0, g = 0.0, scale = 0.0; @@ -476,10 +484,10 @@ namespace INMOST info->PrepareVector(r_tilde); info->PrepareVector(x0); info->PrepareVector(t); - tau = new INMOST_DATA_REAL_TYPE[l * 5 + l*l]; - sigma = tau + l*l; - gamma = sigma + l; - theta1 = gamma + l; + tau = new INMOST_DATA_REAL_TYPE[l * 3 + (l+1)*(l+1) + (l+1)*2]; + sigma = tau + (l+1)*(l+1); + gamma = sigma + l+1; + theta1 = gamma + l+1; theta2 = theta1 + l; theta3 = theta2 + l; u = new Solver::Vector[l * 2 + 2]; @@ -553,7 +561,7 @@ namespace INMOST } else { - Alink->MatVec(1.0,t,0,Output); + Alink->MatVec(1.0,Input,0,Output); info->Update(Output); } } @@ -561,8 +569,9 @@ namespace INMOST { assert(isInitialized()); INMOST_DATA_ENUM_TYPE vbeg,vend, vlocbeg, vlocend; + INMOST_DATA_INTEGER_TYPE ivbeg, ivend, ivlocbeg, ivlocend; INMOST_DATA_REAL_TYPE rho0 = 1, rho1, alpha = 0, beta, omega = 1, eta; - INMOST_DATA_REAL_TYPE resid0, resid, rhs_norm;//, temp[2]; + INMOST_DATA_REAL_TYPE resid0, resid, rhs_norm, tau_sum, sigma_sum;//, temp[2]; iters = 0; info->PrepareVector(SOL); info->PrepareVector(RHS); @@ -572,8 +581,11 @@ namespace INMOST if( prec != NULL ) prec->ReplaceRHS(RHS); info->GetLocalRegion(info->GetRank(),vlocbeg,vlocend); info->GetVectorRegion(vbeg,vend); - - //rhs_norm = info->ScalarProd(RHS,RHS,vlocbeg,vlocend); + ivbeg = vbeg; + ivend = vend; + ivlocbeg = vlocbeg; + ivlocend = vlocend; + //info->ScalarProd(RHS,RHS,vlocbeg,vlocend,rhs_norm); rhs_norm = 1; //r[0] = b std::copy(RHS.Begin(),RHS.End(),r[0].Begin()); @@ -586,8 +598,18 @@ namespace INMOST } std::copy(r[0].Begin(),r[0].End(),r_tilde.Begin()); // r_tilde = r[0] std::fill(u[0].Begin(),u[0].End(),0); // u[0] = 0 - resid = info->ScalarProd(r[0],r[0],vlocbeg,vlocend); //resid = dot(r[0],r[0]) - for(INMOST_DATA_ENUM_TYPE k = vbeg; k != vend; k++) // r_tilde = r[0] / dot(r[0],r[0]) + resid = 0; +#if defined(USE_OMP) +#pragma omp parallel for reduction(+:resid) +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) + resid += r[0][k]*r[0][k]; + info->Integrate(&resid,1); + //info->ScalarProd(r[0],r[0],vlocbeg,vlocend,resid); //resid = dot(r[0],r[0]) +#if defined(USE_OMP) +#pragma omp parallel for +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivbeg; k < ivend; ++k) // r_tilde = r[0] / dot(r[0],r[0]) r_tilde[k] /= resid; last_resid = resid = resid0 = sqrt(resid/rhs_norm); //resid = sqrt(dot(r[0],r[0]) last_it = 0; @@ -608,328 +630,511 @@ namespace INMOST reason = "initial solution satisfy tolerances"; goto exit; } - - long double tt, ts, tp; - while( true ) + bool halt = false; +#if defined(USE_OMP) +#pragma omp parallel +#endif { - ts = tp = 0; - rho0 = -omega*rho0; - - tt = Timer(); - for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) + long double tt, ts, tp; + while( !halt ) { - rho1 = info->ScalarProd(r[j],r_tilde,vlocbeg,vlocend); // rho1 = dot(r[j],r_tilde) - beta = alpha * (rho1/rho0); + ts = tp = 0; - if( fabs(beta) > 1.0e+100 ) +#if defined(USE_OMP) +#pragma omp single +#endif { - //std::cout << "alpha " << alpha << " rho1 " << rho1 << " rho0 " << rho0 << " beta " << beta << std::endl; - reason = "multiplier(1) is too large"; - goto exit; + rho0 = -omega*rho0; } - - if( beta != beta ) + + tt = Timer(); + for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) { - reason = "multiplier(1) is NaN"; - goto exit; - } +#if defined(USE_OMP) +#pragma omp single +#endif + rho1 = 0; +#if defined(USE_OMP) +#pragma omp for reduction(+:rho1) +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) + rho1+= r[j][k]*r_tilde[k]; + info->Integrate(&rho1,1); + //info->ScalarProd(r[j],r_tilde,vlocbeg,vlocend,rho1); // rho1 = dot(r[j],r_tilde) +#if defined(USE_OMP) +#pragma omp single +#endif + { + beta = alpha * (rho1/rho0); + } - rho0 = rho1; - for(INMOST_DATA_ENUM_TYPE i = 0; i < j+1; i++) - for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) - u[i][k] = r[i][k] - beta*u[i][k]; + if( fabs(beta) > 1.0e+100 ) + { + //std::cout << "alpha " << alpha << " rho1 " << rho1 << " rho0 " << rho0 << " beta " << beta << std::endl; +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "multiplier(1) is too large"; + halt = true; + break; + } - ApplyOperator(u[j],u[j+1]); // u[j+1] = A*R*u[j] - eta = info->ScalarProd(u[j+1],r_tilde,vlocbeg,vlocend); //eta = dot(u[j+1],r_tilde) - - alpha = rho0 / eta; + if( beta != beta ) + { +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "multiplier(1) is NaN"; + halt = true; + break; + } +#if defined(USE_OMP) +#pragma omp single +#endif + { + rho0 = rho1; + } + for(INMOST_DATA_ENUM_TYPE i = 0; i < j+1; i++) + { +#if defined(USE_OMP) +#pragma omp for +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivbeg; k < ivend; ++k) + u[i][k] = r[i][k] - beta*u[i][k]; + } - if( fabs(alpha) > 1.0e+100 ) - { - reason = "multiplier(2) is too large"; - goto exit; - } - if( alpha != alpha ) - { - reason = "multiplier(2) is NaN"; - goto exit; - } - for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) - SOL[k] += alpha*u[0][k]; + ApplyOperator(u[j],u[j+1]); // u[j+1] = A*R*u[j] +#if defined(USE_OMP) +#pragma omp single +#endif + eta = 0; +#if defined(USE_OMP) +#pragma omp for reduction(+:eta) +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) + eta += u[j+1][k]*r_tilde[k]; + info->Integrate(&eta,1); + //info->ScalarProd(u[j+1],r_tilde,vlocbeg,vlocend,eta); //eta = dot(u[j+1],r_tilde) - for(INMOST_DATA_ENUM_TYPE i = 0; i < j+1; i++) - for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) //r[i] = r[i] - alpha * u[i+1] - r[i][k] -= alpha*u[i+1][k]; +#if defined(USE_OMP) +#pragma omp single +#endif + alpha = rho0 / eta; + + if( fabs(alpha) > 1.0e+100 ) + { +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "multiplier(2) is too large"; + halt = true; + break; + } + if( alpha != alpha ) + { +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "multiplier(2) is NaN"; + halt = true; + break; + } + +#if defined(USE_OMP) +#pragma omp for +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivbeg; k < ivend; ++k) + SOL[k] += alpha*u[0][k]; + + for(INMOST_DATA_ENUM_TYPE i = 0; i < j+1; i++) + { +#if defined(USE_OMP) +#pragma omp for +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivbeg; k < ivend; ++k) //r[i] = r[i] - alpha * u[i+1] + r[i][k] -= alpha*u[i+1][k]; + } - resid = info->ScalarProd(r[0],r[0],vlocbeg,vlocend); // resid = dot(r[j],r[j]) - resid = sqrt(resid/rhs_norm); // resid = sqrt(dot(r[j],r[j])) +#if defined(USE_OMP) +#pragma omp single +#endif + resid = 0; +#if defined(USE_OMP) +#pragma omp for reduction(+:resid) +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) + resid += r[0][k]*r[0][k]; + info->Integrate(&resid,1); + //info->ScalarProd(r[0],r[0],vlocbeg,vlocend,resid); // resid = dot(r[j],r[j]) +#if defined(USE_OMP) +#pragma omp single +#endif + resid = sqrt(resid/rhs_norm); // resid = sqrt(dot(r[j],r[j])) - if( resid < atol || resid < rtol*resid0 ) - { - reason = "early exit in bi-cg block"; - last_resid = resid; - goto exit; + if( resid < atol || resid < rtol*resid0 ) + { +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "early exit in bi-cg block"; + last_resid = resid; + halt = true; + break; + } + ApplyOperator(r[j],r[j+1]); // r[j+1] = A*R*r[j] } - - - ApplyOperator(r[j],r[j+1]); // r[j+1] = A*R*r[j] - - } - - for(INMOST_DATA_ENUM_TYPE j = 1; j < l+1; j++) - { - for(INMOST_DATA_ENUM_TYPE m = 1; m < j+1; m++) + if( halt ) break; + INMOST_DATA_ENUM_TYPE size = l; +#if defined(CONVEX_COMBINATION) + size = l+1; +#endif + // Penalization for convex combination for update below + for(INMOST_DATA_ENUM_TYPE j = 1; j < l+1; j++) { - tau[(m-1) + (j-1)*l] = 0; - for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k) - tau[(m-1) + (j-1)*l] += r[j][k]*r[m][k]; - tau[(j-1) + (m-1)*l] = tau[(m-1) + (j-1)*l]; + for(INMOST_DATA_ENUM_TYPE m = 1; m < j+1; m++) + { +#if defined(USE_OMP) +#pragma omp single +#endif + tau_sum = 0.0; +#if defined(USE_OMP) +#pragma omp for reduction(+:tau_sum) +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) + tau_sum += r[j][k]*r[m][k]; +#if defined(USE_OMP) +#pragma omp single +#endif + tau[(j-1) + (m-1)*size] = tau[(m-1) + (j-1)*size] = tau_sum; + } +#if defined(USE_OMP) +#pragma omp single +#endif + sigma_sum = 0; +#if defined(USE_OMP) +#pragma omp for reduction(+:sigma_sum) +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) + sigma_sum += r[0][k]*r[j][k]; +#if defined(USE_OMP) +#pragma omp single +#endif + sigma[j-1] = sigma_sum; } - sigma[j-1] = 0; - for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k) - sigma[j-1] += r[0][k]*r[j][k]; - } - info->Integrate(tau,l*l+l); //sigma is updated with tau +#if defined(CONVEX_COMBINATION) + INMOST_DATA_REAL_TYPE lagrangian = 0.0; + for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) lagrangian += tau[j+size*j]; + sigma[l] = lagrangian; + tau[(l+1)*(l+1)-1] = 0.0; + for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) + { + tau[l + j*(l+1)] = -lagrangian; + tau[l*(l+1)+j] = lagrangian; + } +#endif + info->Integrate(tau,size*size+size); //sigma is updated with tau +#if defined(USE_OMP) +#pragma omp single +#endif #if defined(PSEUDOINVERSE) - { - int dgesvd_info = 0; + { + int dgesvd_info = 0; +#if defined(USE_LAPACK_SVD) + char c = 'A'; + INMOST_DATA_REAL_TYPE U[128*128], V[128*128], w[128]; + INMOST_DATA_REAL_TYPE work[5*128]; + int lwork = 5*128; + int n = static_cast(size); + dgesvd_(&c,&c,&n,&n,tau,&n,w,U,&n,V,&n,work,&lwork,&dgesvd_info); +#else + /* + char c = 'A'; + INMOST_DATA_REAL_TYPE U2[128*128], V2[128*128], w2[128], tau2[128*128]; + INMOST_DATA_REAL_TYPE work[5*128]; + int lwork = 5*128; + int n = l; + memcpy(tau2,tau,sizeof(INMOST_DATA_REAL_TYPE)*l*l); + dgesvd_(&c,&c,&n,&n,tau2,&n,w2,U2,&n,V2,&n,work,&lwork,&dgesvd_info); + printf("dgesvd\n"); + printf("w\n"); + for(int q = 0; q < l; ++q) printf("%g ",w2[q]); + printf("\nU\n"); + for(int q = 0; q < l*l; ++q) + { + printf("%g ",U2[q]); + if( (q+1)%l == 0 ) printf("\n"); + } + printf("V\n"); + for(int q = 0; q < l*l; ++q) + { + printf("%g ",V2[q]); + if( (q+1)%l == 0 ) printf("\n"); + } + */ + INMOST_DATA_REAL_TYPE U[128*128], V[128*128], w[128]; + dgesvd_info = svdnxn(tau,U,w,V,size); + //for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) w[j] = S[j*l+j]; + /* + printf("svdnxn\n"); + printf("w\n"); + for(int q = 0; q < l; ++q) printf("%g ",w[q]); + printf("\nU\n"); + for(int q = 0; q < l*l; ++q) + { + printf("%g ",U[q]); + if( (q+1)%l == 0 ) printf("\n"); + } + printf("V\n"); + for(int q = 0; q < l*l; ++q) + { + printf("%g ",V[q]); + if( (q+1)%l == 0 ) printf("\n"); + } + */ +#endif + /* + printf("w "); + for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) printf("%20g ",w[j]); + printf("\n"); + printf("U\n"); + for(INMOST_DATA_ENUM_TYPE j = 0; j < l*l; j++) + { + printf("%20g ",U[j]); + if( (j+1) % l == 0 ) printf("\n"); + } + printf("\n"); -#if defined(USE_LAPACK_SVD) - char c = 'A'; - INMOST_DATA_REAL_TYPE U[128*128], V[128*128], w[128]; - INMOST_DATA_REAL_TYPE work[5*128]; - int lwork = 5*128; - int n = l; - dgesvd_(&c,&c,&n,&n,tau,&n,w,U,&n,V,&n,work,&lwork,&dgesvd_info); + printf("VT\n"); + for(INMOST_DATA_ENUM_TYPE j = 0; j < l*l; j++) + { + printf("%20g ",V[j]); + if( (j+1) % l == 0 ) printf("\n"); + } + printf("\n"); + */ + if( dgesvd_info != 0 ) + { + printf("(%s:%d) dgesvd %d\n",__FILE__,__LINE__,dgesvd_info); + exit(-1); + } + + INMOST_DATA_REAL_TYPE maxw = w[0], tol; + for(INMOST_DATA_ENUM_TYPE j = 1; j < size; j++) if(w[j]>maxw) maxw = w[j]; + tol = size*maxw*1.0e-14; + memset(gamma,0,sizeof(INMOST_DATA_REAL_TYPE)*size); + for(INMOST_DATA_ENUM_TYPE j = 0; j < size; j++) + { + if( w[j] > tol ) + { + INMOST_DATA_REAL_TYPE sum = 0; + for(INMOST_DATA_ENUM_TYPE k = 0; k < size; ++k) + sum += sigma[k]*U[j*size+k]; + for(INMOST_DATA_ENUM_TYPE k = 0; k < size; ++k) + gamma[k] += sum/w[j]*V[k*size+j]; + } + } + } + + //svdnxn(tau,U,S,V,l); + //INMOST_DATA_REAL_TYPE inv_tau[64]; + //pseudoinverse(tau,inv_tau,l); + //matmul(inv_tau,sigma,gamma,l,l,1); #else - /* - char c = 'A'; - INMOST_DATA_REAL_TYPE U2[128*128], V2[128*128], w2[128], tau2[128*128]; - INMOST_DATA_REAL_TYPE work[5*128]; - int lwork = 5*128; - int n = l; - memcpy(tau2,tau,sizeof(INMOST_DATA_REAL_TYPE)*l*l); - dgesvd_(&c,&c,&n,&n,tau2,&n,w2,U2,&n,V2,&n,work,&lwork,&dgesvd_info); - printf("dgesvd\n"); - printf("w\n"); - for(int q = 0; q < l; ++q) printf("%g ",w2[q]); - printf("\nU\n"); - for(int q = 0; q < l*l; ++q) { - printf("%g ",U2[q]); - if( (q+1)%l == 0 ) printf("\n"); + int order[128]; + int row = solvenxn(tau,gamma,sigma,size,order); + /* + double sum = 0.0; + for(int j = 0; j < l; ++j) + { + sum += gamma[j]; + std::cout << gamma[j] << " "; + } + std::cout << "sum: " << sum; + //std::cout << " lagrangian: " << gamma[l]; + std::cout << std::endl; + */ + if( row != 0 ) + { + std::cout << "breakdown on row " << row << std::endl; + reason = "breakdown in matrix inversion in polynomial part"; + break; + } } - printf("V\n"); - for(int q = 0; q < l*l; ++q) +#endif +#if defined(USE_OMP) +#pragma omp single +#endif + omega = gamma[l-1]; + if( fabs(omega) > 1.0e+100 ) { - printf("%g ",V2[q]); - if( (q+1)%l == 0 ) printf("\n"); +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "multiplier(3) is too large"; + halt = true; + break; } - */ - INMOST_DATA_REAL_TYPE U[128*128], V[128*128], w[128]; - dgesvd_info = svdnxn(tau,U,w,V,l); - //for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) w[j] = S[j*l+j]; - /* - printf("svdnxn\n"); - printf("w\n"); - for(int q = 0; q < l; ++q) printf("%g ",w[q]); - printf("\nU\n"); - for(int q = 0; q < l*l; ++q) + if( omega != omega ) { - printf("%g ",U[q]); - if( (q+1)%l == 0 ) printf("\n"); +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "multiplier(3) is NaN"; + halt = true; + break; } - printf("V\n"); - for(int q = 0; q < l*l; ++q) + for(INMOST_DATA_ENUM_TYPE j = 1; j < l+1; ++j) { - printf("%g ",V[q]); - if( (q+1)%l == 0 ) printf("\n"); +#if defined(USE_OMP) +#pragma omp for +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivbeg; k < ivend; ++k) + { + u[0][k] -= gamma[j-1]*u[j][k]; + SOL[k] += gamma[j-1]*r[j-1][k]; + r[0][k] -= gamma[j-1]*r[j][k]; + } } - */ -#endif + + /* - printf("w "); - for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) printf("%20g ",w[j]); - printf("\n"); - - printf("U\n"); - for(INMOST_DATA_ENUM_TYPE j = 0; j < l*l; j++) + for(INMOST_DATA_ENUM_TYPE j = 1; j < l+1; j++) { - printf("%20g ",U[j]); - if( (j+1) % l == 0 ) printf("\n"); + for(INMOST_DATA_ENUM_TYPE i = 1; i < j; i++) + { + tau[i-1 + (j-1)*l] = 0; + for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k) + tau[i-1 + (j-1)*l] += r[j][k]*r[i][k]; + info->Integrate(&tau[i-1 + (j-1)*l],1); + tau[i-1 + (j-1)*l] /= sigma[i-1]; + for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) + r[j][k] -= tau[i-1 + (j-1)*l]*r[i][k]; + } + INMOST_DATA_REAL_TYPE temp[2] = {0,0}; + for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k) + { + temp[0] += r[j][k]*r[j][k]; + temp[1] += r[0][k]*r[j][k]; + } + info->Integrate(temp,2); + sigma[j-1] = temp[0];//+1.0e-35; //REVIEW + theta2[j-1] = temp[1]/sigma[j-1]; } - printf("\n"); - - printf("VT\n"); - for(INMOST_DATA_ENUM_TYPE j = 0; j < l*l; j++) + omega = theta1[l-1] = theta2[l-1]; + for(INMOST_DATA_ENUM_TYPE j = l-1; j > 0; j--) + { + eta = 0; + for(INMOST_DATA_ENUM_TYPE i = j+1; i < l+1; i++) + eta += tau[j-1 + (i-1)*l] * theta1[i-1]; + theta1[j-1] = theta2[j-1] - eta; + } + for(INMOST_DATA_ENUM_TYPE j = 1; j < l; j++) + { + eta = 0; + for(INMOST_DATA_ENUM_TYPE i = j+1; i < l; i++) + eta += tau[j-1 + (i-1)*l] * theta1[i]; + theta3[j-1] = theta1[j] + eta; + } + for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) { - printf("%20g ",V[j]); - if( (j+1) % l == 0 ) printf("\n"); + SOL[k] += theta1[0]*r[0][k]; + r[0][k] -= theta2[l-1]*r[l][k]; + u[0][k] -= theta1[l-1]*u[l][k]; + } + for(INMOST_DATA_ENUM_TYPE j = 1; j < l; j++) + { + for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) + { + u[0][k] -= theta1[j-1]*u[j][k]; + SOL[k] += theta3[j-1]*r[j][k]; + r[0][k] -= theta2[j-1]*r[j][k]; + } } - printf("\n"); */ - if( dgesvd_info != 0 ) + last_it = i+1; { - printf("(%s:%d) dgesvd %d\n",__FILE__,__LINE__,dgesvd_info); - exit(-1); +#if defined(USE_OMP) +#pragma omp single +#endif + resid = 0; +#if defined(USE_OMP) +#pragma omp for reduction(+:resid) +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) + resid += r[0][k]*r[0][k]; + info->Integrate(&resid,1); + //info->ScalarProd(r[0],r[0],vlocbeg,vlocend,resid); +#if defined(USE_OMP) +#pragma omp single +#endif + resid = sqrt(resid/rhs_norm); } - - INMOST_DATA_REAL_TYPE maxw = w[0], tol; - for(INMOST_DATA_ENUM_TYPE j = 1; j < l; j++) if(w[j]>maxw) maxw = w[j]; - tol = l*maxw*1.0e-14; - memset(gamma,0,sizeof(INMOST_DATA_REAL_TYPE)*l); - for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) + tt = Timer() - tt; +#if defined(REPORT_RESIDUAL) + if( info->GetRank() == 0 ) { - if( w[j] > tol ) + //std::cout << "iter " << last_it << " residual " << resid << " time " << tt << " matvec " << ts*0.5/l << " precond " << tp*0.5/l << std::endl; + //std::cout << "iter " << last_it << " resid " << resid << "\r"; + //printf("iter %3d resid %12g | %12g relative %12g | %12g\r", last_it, resid, atol, resid / resid0, rtol); +#if defined(USE_OMP) +#pragma omp single +#endif { - INMOST_DATA_REAL_TYPE sum = 0; - for(INMOST_DATA_ENUM_TYPE k = 0; k < l; ++k) - sum += sigma[k]*U[j*l+k]; - for(INMOST_DATA_ENUM_TYPE k = 0; k < l; ++k) - gamma[k] += sum/w[j]*V[k*l+j]; + printf("iter %3d resid %12g | %g\r", last_it, resid, atol); + fflush(stdout); } } - } - - //svdnxn(tau,U,S,V,l); - //INMOST_DATA_REAL_TYPE inv_tau[64]; - //pseudoinverse(tau,inv_tau,l); - //matmul(inv_tau,sigma,gamma,l,l,1); -#else - int order[128]; - int row = solvenxn(tau,gamma,sigma,l,order); - if( row != 0 ) - { - std::cout << "breakdown on row " << row << std::endl; - reason = "breakdown in matrix inversion in polynomial part"; - break; - } #endif - omega = gamma[l-1]; - if( fabs(omega) > 1.0e+100 ) - { - reason = "multiplier(3) is too large"; - goto exit; - } - if( omega != omega ) - { - reason = "multiplier(3) is NaN"; - goto exit; - } - for(INMOST_DATA_ENUM_TYPE j = 1; j < l+1; ++j) - { - for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) + last_resid = resid; + if( resid != resid ) { - u[0][k] -= gamma[j-1]*u[j][k]; - SOL[k] += gamma[j-1]*r[j-1][k]; - r[0][k] -= gamma[j-1]*r[j][k]; +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "residual is NAN"; + break; } - } - - - /* - for(INMOST_DATA_ENUM_TYPE j = 1; j < l+1; j++) - { - for(INMOST_DATA_ENUM_TYPE i = 1; i < j; i++) + if( resid < atol ) { - tau[i-1 + (j-1)*l] = 0; - for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k) - tau[i-1 + (j-1)*l] += r[j][k]*r[i][k]; - info->Integrate(&tau[i-1 + (j-1)*l],1); - tau[i-1 + (j-1)*l] /= sigma[i-1]; - for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) - r[j][k] -= tau[i-1 + (j-1)*l]*r[i][k]; +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "converged due to absolute tolerance"; + break; } - INMOST_DATA_REAL_TYPE temp[2] = {0,0}; - for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k) + if( resid < rtol*resid0 ) { - temp[0] += r[j][k]*r[j][k]; - temp[1] += r[0][k]*r[j][k]; +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "converged due to relative tolerance"; + break; } - info->Integrate(temp,2); - sigma[j-1] = temp[0];//+1.0e-35; //REVIEW - theta2[j-1] = temp[1]/sigma[j-1]; - } - omega = theta1[l-1] = theta2[l-1]; - for(INMOST_DATA_ENUM_TYPE j = l-1; j > 0; j--) - { - eta = 0; - for(INMOST_DATA_ENUM_TYPE i = j+1; i < l+1; i++) - eta += tau[j-1 + (i-1)*l] * theta1[i-1]; - theta1[j-1] = theta2[j-1] - eta; - } - for(INMOST_DATA_ENUM_TYPE j = 1; j < l; j++) - { - eta = 0; - for(INMOST_DATA_ENUM_TYPE i = j+1; i < l; i++) - eta += tau[j-1 + (i-1)*l] * theta1[i]; - theta3[j-1] = theta1[j] + eta; - } - for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) - { - SOL[k] += theta1[0]*r[0][k]; - r[0][k] -= theta2[l-1]*r[l][k]; - u[0][k] -= theta1[l-1]*u[l][k]; - } - for(INMOST_DATA_ENUM_TYPE j = 1; j < l; j++) - { - for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) + if( resid > divtol ) { - u[0][k] -= theta1[j-1]*u[j][k]; - SOL[k] += theta3[j-1]*r[j][k]; - r[0][k] -= theta2[j-1]*r[j][k]; +#if defined(USE_OMP) +#pragma omp single +#endif + reason = "diverged due to divergence tolerance"; + break; } - } - */ - last_it = i+1; - { - resid = info->ScalarProd(r[0],r[0],vlocbeg,vlocend); - resid = sqrt(resid/rhs_norm); - } - tt = Timer() - tt; -#if defined(REPORT_RESIDUAL) - if( info->GetRank() == 0 ) - { - //std::cout << "iter " << last_it << " residual " << resid << " time " << tt << " matvec " << ts*0.5/l << " precond " << tp*0.5/l << std::endl; - //std::cout << "iter " << last_it << " resid " << resid << "\r"; - //printf("iter %3d resid %12g | %12g relative %12g | %12g\r", last_it, resid, atol, resid / resid0, rtol); - printf("iter %3d resid %12g | %g\r", last_it, resid, atol); - fflush(stdout); - } + if( i == maxits ) + { +#if defined(USE_OMP) +#pragma omp single #endif - last_resid = resid; - if( resid != resid ) - { - reason = "residual is NAN"; - break; - } - if( resid < atol ) - { - reason = "converged due to absolute tolerance"; - break; - } - if( resid < rtol*resid0 ) - { - reason = "converged due to relative tolerance"; - break; - } - if( resid > divtol ) - { - reason = "diverged due to divergence tolerance"; - break; - } - if( i == maxits ) - { - reason = "reached maximum iteration number"; - break; + reason = "reached maximum iteration number"; + break; + } + i++; } - i++; } exit: if (prec != NULL) @@ -937,7 +1142,10 @@ exit: prec->Solve(SOL, r_tilde); //undo right preconditioner std::copy(r_tilde.Begin(), r_tilde.End(), SOL.Begin()); } - for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k) //undo shift +#if defined(USE_OMP) +#pragma omp parallel for +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) //undo shift SOL[k] += x0[k]; //info->RestoreMatrix(A); info->RestoreVector(SOL); @@ -1061,11 +1269,10 @@ exit: INMOST_DATA_INTEGER_TYPE ivbeg,ivend, ivlocbeg, ivlocend; INMOST_DATA_REAL_TYPE rho = 1, alpha = 1, beta, omega = 1; INMOST_DATA_REAL_TYPE resid0, resid, temp[2]; - bool is_parallel = info->GetSize() > 1; info->PrepareVector(SOL); info->PrepareVector(RHS); - if( is_parallel ) info->Update(SOL); - if( is_parallel ) info->Update(RHS); + info->Update(SOL); + info->Update(RHS); if (prec != NULL)prec->ReplaceSOL(SOL); if (prec != NULL)prec->ReplaceRHS(RHS); info->GetLocalRegion(info->GetRank(),vlocbeg,vlocend); @@ -1080,9 +1287,12 @@ exit: std::fill(p.Begin(),p.End(),0.0); { resid = 0; - for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k != vlocend; k++) +#if defined(USE_OMP) +#pragma omp parallel for +#endif + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; k++) resid += r[k]*r[k]; - if( is_parallel ) info->Integrate(&resid,1); + info->Integrate(&resid,1); } last_resid = resid = resid0 = sqrt(resid); last_it = 0; @@ -1104,12 +1314,9 @@ exit: #pragma omp parallel #endif { - long double tt, ts, tp, ttt; INMOST_DATA_ENUM_TYPE i = 0; while(true) { - ts = tp = 0; - tt = Timer(); { /* if( fabs(rho) < 1.0e-31 ) @@ -1126,30 +1333,41 @@ exit: } */ //std::cout << "rho " << rho << " alpha " << alpha << " omega " << omega << " beta " << 1.0 /rho * alpha / omega << std::endl; - beta = 1.0 /rho * alpha / omega; - rho = 0; +#if defined(USE_OMP) +#pragma omp single +#endif + { + beta = 1.0 /rho * alpha / omega; + rho = 0; + } #if defined(USE_OMP) #pragma omp for reduction(+:rho) #endif - for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; k++) + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) rho += r0[k]*r[k]; - if( is_parallel ) - { + info->Integrate(&rho,1); + //info->ScalarProd(r0,r,ivlocbeg,ivlocend,rho); #if defined(USE_OMP) #pragma omp single #endif - info->Integrate(&rho,1); + { + beta *= rho; } - beta *= rho; if( fabs(beta) > 1.0e+100 ) { //std::cout << "rho " << rho << " alpha " << alpha << " omega " << omega << " beta " << 1.0 /rho * alpha / omega << std::endl; +#if defined(USE_OMP) +#pragma omp single +#endif reason = "multiplier(1) is too large"; break; } if( beta != beta ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "multiplier(1) is NaN"; break; } @@ -1161,62 +1379,57 @@ exit: for(INMOST_DATA_INTEGER_TYPE k = ivbeg; k < ivend; ++k) p[k] = r[k] + beta*(p[k] - omega*v[k]); //global indexes r, p, v } + { - ttt = Timer(); - if (prec != NULL) + if (prec != NULL) { -#if defined(USE_OMP) -#pragma omp single -#endif prec->Solve(p, y); - } - tp += Timer() - ttt; - if( is_parallel ) - { -#if defined(USE_OMP) -#pragma omp single -#endif info->Update(y); + Alink->MatVec(1,y,0,v); // global multiplication, y should be updated, v probably needs an update + info->Update(v); } - ttt = Timer(); - Alink->MatVec(1,y,0,v); // global multiplication, y should be updated, v probably needs an update - ts += Timer() - ttt; - if( is_parallel ) + else { -#if defined(USE_OMP) -#pragma omp single -#endif + Alink->MatVec(1,p,0,v); // global multiplication, y should be updated, v probably needs an update info->Update(v); } } { +#if defined(USE_OMP) +#pragma omp single +#endif alpha = 0; #if defined(USE_OMP) #pragma omp for reduction(+:alpha) #endif - for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; k++) + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) alpha += r0[k]*v[k]; - if( is_parallel ) - { + info->Integrate(&alpha,1); + //info->ScalarProd(r0,v,ivlocbeg,ivlocend,alpha); #if defined(USE_OMP) #pragma omp single #endif - info->Integrate(&alpha,1); + { + if( alpha == 0 && rho == 0 ) + alpha = 0; + else + alpha = rho / alpha; //local indexes, r0, v } - if( alpha == 0 && rho == 0 ) - alpha = 0; - else - alpha = rho / alpha; //local indexes, r0, v - if( fabs(alpha) > 1.0e+100 ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "multiplier(2) is too large"; //std::cout << "alpha " << alpha << " rho " << rho << std::endl; break; } if( alpha != alpha ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "multiplier(2) is NaN"; //std::cout << "alpha " << alpha << " rho " << rho << std::endl; break; @@ -1232,35 +1445,21 @@ exit: } { - ttt = Timer(); - if (prec != NULL) + if (prec != NULL) { -#if defined(USE_OMP) -#pragma omp single -#endif prec->Solve(s, z); - } - tp += Timer() - ttt; - if( is_parallel ) - { -#if defined(USE_OMP) -#pragma omp single -#endif info->Update(z); + Alink->MatVec(1.0,z,0,t); // global multiplication, z should be updated, t probably needs an update + info->Update(t); } - ttt = Timer(); - Alink->MatVec(1.0,z,0,t); // global multiplication, z should be updated, t probably needs an update - ts += Timer() - ttt; - if( is_parallel ) + else { -#if defined(USE_OMP) -#pragma omp single -#endif + Alink->MatVec(1.0,s,0,t); // global multiplication, z should be updated, t probably needs an update info->Update(t); } } + { - temp[0] = temp[1] = 0; #if defined(USE_OMP) #pragma omp for reduction(+:tempa) reduction(+:tempb) @@ -1272,13 +1471,7 @@ exit: } temp[0] = tempa; temp[1] = tempb; - if( is_parallel ) - { -#if defined(USE_OMP) -#pragma omp single -#endif - info->Integrate(temp,2); - } + info->Integrate(temp,2); /* if (fabs(temp[1]) < 1.0e-35) { @@ -1286,19 +1479,30 @@ exit: } */ //omega = temp[0] / (temp[1] + (temp[1] < 0.0 ? -1.0e-10 : 1.0e-10)); //local indexes t, s - if( temp[0] == 0 && temp[1] == 0 ) - omega = 0; - else - omega = temp[0] / temp[1]; +#if defined(USE_OMP) +#pragma omp single +#endif + { + if( temp[0] == 0 && temp[1] == 0 ) + omega = 0; + else + omega = temp[0] / temp[1]; + } if( fabs(omega) > 1.0e+100 ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "multiplier(3) is too large"; //std::cout << "alpha " << alpha << " rho " << rho << std::endl; break; } if( omega != omega ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "multiplier(3) is NaN"; //std::cout << "alpha " << alpha << " rho " << rho << std::endl; break; @@ -1319,23 +1523,23 @@ exit: r[k] = s[k] - omega * t[k]; // global indexes r, s, t } last_it = i+1; - { - resid = 0; + //info->ScalarProd(r,r,ivlocbeg,ivlocend,resid); +#if defined(USE_OMP) +#pragma omp single +#endif + resid = 0; #if defined(USE_OMP) #pragma omp for reduction(+:resid) #endif - for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; k++) - resid += r[k]*r[k]; - if( is_parallel ) - { + for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; ++k) + resid += r[k]*r[k]; #if defined(USE_OMP) #pragma omp single #endif - info->Integrate(&resid,1); - } + { + info->Integrate(&resid,1); resid = sqrt(resid); } - tt = Timer() - tt; #if defined(REPORT_RESIDUAL) if( info->GetRank() == 0 ) { @@ -1354,26 +1558,41 @@ exit: last_resid = resid; if( resid != resid ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "residual is NAN"; break; } if( resid > divtol ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "diverged due to divergence tolerance"; break; } if( resid < atol ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "converged due to absolute tolerance"; break; } if( resid < rtol*resid0 ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "converged due to relative tolerance"; break; } if( i == maxits ) { +#if defined(USE_OMP) +#pragma omp single +#endif reason = "reached maximum iteration number"; break; } @@ -1391,6 +1610,7 @@ exit: bool ReplaceSOL(Solver::Vector & SOL) {(void)SOL; return true; } Method * Duplicate() { return new BCGS_solver(*this);} std::string GetReason() {return reason;} + }; } diff --git a/solver_ddpqiluc2.cpp b/solver_ddpqiluc2.cpp index f734a76a6093b14dafede8ea744148cc9c09a769..dd8b74e908cad58d223b748ce4139d81244f3ca2 100644 --- a/solver_ddpqiluc2.cpp +++ b/solver_ddpqiluc2.cpp @@ -13,8 +13,8 @@ //#define REPORT_ILU //#undef REPORT_ILU //#define REPORT_ILU_PROGRESS -#define REPORT_ILU_END -#define REPORT_ILU_SUMMARY +//#define REPORT_ILU_END +//#define REPORT_ILU_SUMMARY //#undef REPORT_ILU_PROGRESS //#define REPORT_SCHUR using namespace INMOST; @@ -456,7 +456,7 @@ using namespace INMOST; A_Entries[j++] = Solver::Row::make_entry(r->first, r->second); } A_Address[k].last = j; - assert(A_Address[k].Size() != 0); //singular matrix + //assert(A_Address[k].Size() != 0); //singular matrix } INMOST_DATA_REAL_TYPE Anorm, Enorm, Fnorm; INMOST_DATA_ENUM_TYPE nzE,nzF; @@ -3362,60 +3362,65 @@ swap_algorithm: { assert(&input != &output); // - INMOST_DATA_ENUM_TYPE k, level, mobeg, moend, vbeg, vend; - info->GetOverlapRegion(info->GetRank(), mobeg, moend); - info->GetVectorRegion(vbeg, vend); - for (k = mobeg; k < moend; k++) temp[k] = input[k]; - //the original matrix A was separated by multilevel algorithm to the following form - // | B F | - // A = | | - // | E C | - // In order to apply solve phase on this matrix, we consider the matrix in following sense: - // | I 0 | | B 0 | | I B^{-1} F | - // A = | | | | | | = L D U - // | E B^{-1} I | | 0 S | | 0 I | - // where S = C - E B^{-1} F - // consider the system - // | B F | | u | | f | - // | | | | = | | - // | E C | | y | | g | - // then solution is obtained by steps: - // 1) ~f = B^{-1} f - // 2) ~g = g - E ~f - // 3) y = S^{-1} ~g - // 4) u = ~f - B^{-1} F y - //first reorder input as prescribed by P - // ~y = Pt y - for (k = vbeg; k < mobeg; k++) output[k] = 0;//Restrict additive schwartz (maybe do it outside?) - for (k = mobeg; k < moend; ++k) output[k] = temp[ddP[k]]; - for (k = moend; k < vend; k++) output[k] = 0;//Restrict additive schwartz (maybe do it outside?) - - level = 0; - //perform recursively first two steps of solve phase - // outer V-cycle - - while(level < level_size.size()) level = Descend(level, output, output); +#if defined(USE_OMP) +#pragma omp single +#endif + { + INMOST_DATA_ENUM_TYPE k, level, mobeg, moend, vbeg, vend; + info->GetOverlapRegion(info->GetRank(), mobeg, moend); + info->GetVectorRegion(vbeg, vend); + for (k = mobeg; k < moend; k++) temp[k] = input[k]; + //the original matrix A was separated by multilevel algorithm to the following form + // | B F | + // A = | | + // | E C | + // In order to apply solve phase on this matrix, we consider the matrix in following sense: + // | I 0 | | B 0 | | I B^{-1} F | + // A = | | | | | | = L D U + // | E B^{-1} I | | 0 S | | 0 I | + // where S = C - E B^{-1} F + // consider the system + // | B F | | u | | f | + // | | | | = | | + // | E C | | y | | g | + // then solution is obtained by steps: + // 1) ~f = B^{-1} f + // 2) ~g = g - E ~f + // 3) y = S^{-1} ~g + // 4) u = ~f - B^{-1} F y + //first reorder input as prescribed by P + // ~y = Pt y + for (k = vbeg; k < mobeg; k++) output[k] = 0;//Restrict additive schwartz (maybe do it outside?) + for (k = mobeg; k < moend; ++k) output[k] = temp[ddP[k]]; + for (k = moend; k < vend; k++) output[k] = 0;//Restrict additive schwartz (maybe do it outside?) + + level = 0; + //perform recursively first two steps of solve phase + // outer V-cycle + + while(level < level_size.size()) level = Descend(level, output, output); - // W-cycle, should do ApplyB between moves - //level = Ascend(level, output); - //level = Ascend(level, output); - //level = Descend(level, output); - //level = Descend(level, output); - - //on the last recursion level third step was accomplished - //finish last step in unwinding recursion + // W-cycle, should do ApplyB between moves + //level = Ascend(level, output); + //level = Ascend(level, output); + //level = Descend(level, output); + //level = Descend(level, output); + + //on the last recursion level third step was accomplished + //finish last step in unwinding recursion - while (level > 0) level = Ascend(level, output, output); + while (level > 0) level = Ascend(level, output, output); - //System was solved with reordered matrix - // PAQ x = y - // AQ x = Pt y - // Q x = A^{-1} Pt y <- we have now - // x = Qt A^{-1} Pt y - //reorder output by Q - for (k = mobeg; k < moend; ++k) temp[ddQ[k]] = output[k]; - for (k = mobeg; k < moend; ++k) output[k] = temp[k]; + //System was solved with reordered matrix + // PAQ x = y + // AQ x = Pt y + // Q x = A^{-1} Pt y <- we have now + // x = Qt A^{-1} Pt y + //reorder output by Q + for (k = mobeg; k < moend; ++k) temp[ddQ[k]] = output[k]; + for (k = mobeg; k < moend; ++k) output[k] = temp[k]; + } //May assemble partition of unity instead of restriction before accumulation //assembly should be done instead of initialization info->Accumulate(output); diff --git a/solver_ilu2.hpp b/solver_ilu2.hpp index c987f8acb4013bab115ae70e1952e51cc786e0e8..e6f36fc0f02c69e376813d7b5e062352df1522d6 100644 --- a/solver_ilu2.hpp +++ b/solver_ilu2.hpp @@ -490,23 +490,28 @@ public: bool Solve(Solver::Vector & input, Solver::Vector & output) { assert(isInitialized()); - INMOST_DATA_ENUM_TYPE mobeg, moend, r, k, vbeg,vend; //, end; - info->GetOverlapRegion(info->GetRank(),mobeg,moend); - info->GetVectorRegion(vbeg,vend); - for(k = vbeg; k < mobeg; k++) output[k] = 0; //Restrict additive schwartz (maybe do it outside?) - for(k = mobeg; k < moend; k++) output[k] = input[k]; - for(k = moend; k < vend; k++) output[k] = 0; //Restrict additive schwartz (maybe do it outside?) - for(k = mobeg; k < moend; k++) //iterate over L part - { - for(r = iu[k]-1; r > ilu[k]; r--) - output[k] -= luv[r-1]*output[lui[r-1]]; - output[k] *= luv[iu[k]-1]; - } - for(k = moend; k > mobeg; k--) //iterate over U part +#if defined(USE_OMP) +#pragma omp single +#endif { - for(r = iu[k-1]+1; r < ilu[k]; r++) - output[k-1] -= luv[r]*output[lui[r]]; - output[k-1] *= luv[iu[k-1]]; + INMOST_DATA_ENUM_TYPE mobeg, moend, r, k, vbeg,vend; //, end; + info->GetOverlapRegion(info->GetRank(),mobeg,moend); + info->GetVectorRegion(vbeg,vend); + for(k = vbeg; k < mobeg; k++) output[k] = 0; //Restrict additive schwartz (maybe do it outside?) + for(k = mobeg; k < moend; k++) output[k] = input[k]; + for(k = moend; k < vend; k++) output[k] = 0; //Restrict additive schwartz (maybe do it outside?) + for(k = mobeg; k < moend; k++) //iterate over L part + { + for(r = iu[k]-1; r > ilu[k]; r--) + output[k] -= luv[r-1]*output[lui[r-1]]; + output[k] *= luv[iu[k]-1]; + } + for(k = moend; k > mobeg; k--) //iterate over U part + { + for(r = iu[k-1]+1; r < ilu[k]; r++) + output[k-1] -= luv[r]*output[lui[r]]; + output[k-1] *= luv[iu[k-1]]; + } } //May assemble partition of unity instead of restriction before accumulation //assembly should be done instead of initialization diff --git a/tests/solver_test000/CMakeLists.txt b/tests/solver_test000/CMakeLists.txt index 4e4c6ccbdb67dc24fca83c5491fe0abc7ce7b257..c5af2675133fe9d599011fddd8d61a74afc69de1 100644 --- a/tests/solver_test000/CMakeLists.txt +++ b/tests/solver_test000/CMakeLists.txt @@ -41,6 +41,11 @@ if( HAVE_SOLVER_MPTILUC2 ) add_test(NAME solver_test000_serial_inner_mptiluc COMMAND $ 0 8) endif() +if( HAVE_SOLVER_MPTILU2 ) +add_test(NAME solver_test000_serial_inner_mptilu2 COMMAND $ 0 9) +endif() + + if(USE_SOLVER_PETSC) add_test(NAME solver_test000_serial_petsc COMMAND $ 0 2) endif() @@ -72,6 +77,12 @@ add_test(NAME solver_test000_parallel_permute1_inner_mptiluc COMMAND ${MPIEXE add_test(NAME solver_test000_parallel_permute2_inner_mptiluc COMMAND ${MPIEXEC} -np 4 $ 2 1) endif() +if( HAVE_SOLVER_MPTILU2 ) +add_test(NAME solver_test000_parallel_normal_inner_mptilu2 COMMAND ${MPIEXEC} -np 4 $ 0 9) +add_test(NAME solver_test000_parallel_permute1_inner_mptilu2 COMMAND ${MPIEXEC} -np 4 $ 1 9) +add_test(NAME solver_test000_parallel_permute2_inner_mptilu2 COMMAND ${MPIEXEC} -np 4 $ 2 9) +endif() + if(USE_SOLVER_PETSC) add_test(NAME solver_test000_parallel_normal_petsc COMMAND ${MPIEXEC} -np 4 $ 0 2) #add_test(NAME solver_test000_parallel_permute1_petsc COMMAND ${MPIEXEC} -np 4 $ 1 2) diff --git a/tests/solver_test000/main.cpp b/tests/solver_test000/main.cpp index 78fea9e33ff8fa80fa506eac059f46d664bcb076..6c385c01e0e74ca50a50658f7f657a6231d84d88 100644 --- a/tests/solver_test000/main.cpp +++ b/tests/solver_test000/main.cpp @@ -41,6 +41,7 @@ int main(int argc,char ** argv) case 6: type = Solver::Trilinos_Belos; break; case 7: type = Solver::ANI; break; case 8: type = Solver::INNER_MPTILUC; break; + case 9: type = Solver::INNER_MPTILU2; break; } { diff --git a/tests/solver_test001/main.cpp b/tests/solver_test001/main.cpp index 003a4e7c62392a5cf845503622331384e353c4b7..a0d92c8f08bb888cb59d7bd9e1bfff81cac921d6 100644 --- a/tests/solver_test001/main.cpp +++ b/tests/solver_test001/main.cpp @@ -66,8 +66,8 @@ int main(int argc, char ** argv) s.SetParameterReal("relative_tolerance",1.0e-9); s.SetParameterReal("absolute_tolerance",1.0e-16); s.SetParameterEnum("rescale_iterations",8); - s.SetParameterReal("drop_tolerance",1.0e-4); - s.SetParameterReal("reuse_tolerance",1.0e-8); + s.SetParameterReal("drop_tolerance",1.0e-2); + s.SetParameterReal("reuse_tolerance",1.0e-4); /* diff --git a/tests/solver_test002/CMakeLists.txt b/tests/solver_test002/CMakeLists.txt index aa99f4829691d7f24b51a2ea5f4f5b800c9416a9..e519cf90fb673a9a5903e8a3c18ef09ec539e269 100644 --- a/tests/solver_test002/CMakeLists.txt +++ b/tests/solver_test002/CMakeLists.txt @@ -38,8 +38,12 @@ endif() add_test(NAME solver_test002_serial_inner_ilu2 COMMAND $ 0 20) add_test(NAME solver_test002_serial_inner_ddpqiluc COMMAND $ 1 20) if(HAVE_SOLVER_MPTILUC2) -add_test(NAME solver_test002_serial_inner_mptiluc COMMAND $ 1 20) +add_test(NAME solver_test002_serial_inner_mptiluc COMMAND $ 8 20) endif() +if(HAVE_SOLVER_MPTILU2) +add_test(NAME solver_test002_serial_inner_mptilu2 COMMAND $ 9 20) +endif() + if(USE_SOLVER_PETSC) add_test(NAME solver_test002_serial_petsc COMMAND $ 2 20) endif() @@ -60,8 +64,12 @@ if( EXISTS ${MPIEXEC} ) add_test(NAME solver_test002_parallel_inner_ilu2 COMMAND ${MPIEXEC} -np 4 $ 0 20) add_test(NAME solver_test002_parallel_inner_ddpqiluc COMMAND ${MPIEXEC} -np 4 $ 1 20) if(HAVE_SOLVER_MPTILUC2) -add_test(NAME solver_test002_parallel_inner_mptiluc COMMAND ${MPIEXEC} -np 4 $ 1 20) +add_test(NAME solver_test002_parallel_inner_mptiluc COMMAND ${MPIEXEC} -np 4 $ 8 20) endif() +if(HAVE_SOLVER_MPTILU2) +add_test(NAME solver_test002_parallel_inner_mptilu2 COMMAND ${MPIEXEC} -np 4 $ 9 20) +endif() + if(USE_SOLVER_PETSC) add_test(NAME solver_test002_parallel_petsc COMMAND ${MPIEXEC} -np 4 $ 2 20) endif() diff --git a/tests/solver_test002/main.cpp b/tests/solver_test002/main.cpp index f3bd2259fe12040fd97e864ca8e01744282f5d7e..994ad66580893a50567938b2f86f9133baee440d 100644 --- a/tests/solver_test002/main.cpp +++ b/tests/solver_test002/main.cpp @@ -102,6 +102,7 @@ int main(int argc, char ** argv) case 6: type = Solver::Trilinos_ML; break; case 7: type = Solver::ANI; break; case 8: type = Solver::INNER_MPTILUC; break; + case 9: type = Solver::INNER_MPTILU2; break; } int n = atoi(argv[2]); Solver::Initialize(&argc,&argv,argc > 3 ? argv[3] : NULL); // Initialize the linear solver in accordance with args