//------------------------------------------------------------------------------------------------ // File: k3d.cpp //------------------------------------------------------------------------------------------------ #include "k3d.h" #include #include #include #include #include #include #include #include #include namespace k3d { /// @brief Perform binary search //======================================================================================== int BinarySearch (long long _isup, int _nblks, long long *_blks, int _iblkprev) { int iblkprev = _iblkprev; if (iblkprev < 0) iblkprev = 0; if (iblkprev >= _nblks) { iblkprev = _nblks-1; } if (_isup >= _blks[_iblkprev] && _isup < _blks[_iblkprev+1]) { return iblkprev; } int ibegblk = 0; int iendblk = _nblks; int iblk; while (true) { if (ibegblk == iendblk-1) { iblkprev = ibegblk; break; } else { iblk = (ibegblk+iendblk-1) / 2; if (iblk <= ibegblk) iblk = ibegblk+1; if (_isup >= _blks[iblk] && _isup < _blks[iblk+1]) { iblkprev = iblk; break; } else if (_isup < _blks[iblk]) { iendblk = iblk; } else { ibegblk = iblk+1; } } } return iblkprev; }; /// @brief Print array //======================================================================================== void PrintArray (ostream &_stream, const char *_name, int _isize, int *_iarr) { _stream << _name << " (Size = " << _isize << ")" << endl; for (int i=0;i<_isize;i++) { if ((i%5 == 0) && (i > 0)) _stream << endl; _stream << setw(12) << _iarr[i]; } _stream << endl; }; /// @brief Print array //======================================================================================== void PrintArray (ostream &_stream, const char *_name, int _isize, long long *_iarr) { _stream << _name << " (Size = " << _isize << ")" << endl; for (int i=0;i<_isize;i++) { if ((i%5 == 0) && (i > 0)) _stream << endl; _stream << setw(18) << _iarr[i]; } _stream << endl; }; /// @brief Print array //======================================================================================== void PrintArray (ostream &_stream, const char *_name, int _isize, float *_farr) { _stream << _name << " (Size = " << _isize << ")" << endl; for (int i=0;i<_isize;i++) { if ((i%5 == 0) && (i > 0)) _stream << endl; _stream << setw(14) << setprecision(8) << _farr[i] << " "; } _stream << endl; }; /// @brief Print array //======================================================================================== void PrintArray (ostream &_stream, const char *_name, int _isize, double *_darr) { _stream << _name << " (Size = " << _isize << ")" << endl; for (int i=0;i<_isize;i++) { if ((i%3 == 0) && (i > 0)) _stream << endl; _stream << setw(23) << setprecision(16) << _darr[i] << " "; } _stream << endl; }; /// @brief Set manip //======================================================================================== ostream &SetPw (ostream &stream) { stream.unsetf(ios::scientific); stream << ' ' << setprecision(5) << setw (9); return stream; }; /// @brief Round two values //======================================================================================== void Round (double &_dx, double &_dy) { long long i, i0; double dx1 = 1.0e1 * _dx; i0 = (long long) dx1; i = (long long) ((dx1-(double)i0) * 10000); dx1 = (double) i; dx1 = dx1 * 1.0e-4+(double)i0; _dx = dx1 * 1.e-1; double dy1 = 1.0e1 * _dy; i0 = (long long) dy1; i = (long long) ((dy1-(double)i0) * 10000); dy1 = (double) i; dy1 = dy1 * 1.0e-4+(double)i0; _dy = dy1 * 1.e-1; }; // Compute optimal ordering for the matrix //======================================================================================== template void CIlu2_impl<_Int,_Flt>::ComputeOptimalOrder (int _ordtype, const int _n, const vector<_Int> &_ia_alu, const vector<_Int> &_ja_alu, vector &_order) { // Compute symmetrized matrix vector<_Int> ia_symm; vector<_Int> ja_symm; CIlu2_impl<_Int,_Flt>::SymmetrizeSparsity (_n, _ia_alu, _ja_alu, ia_symm, ja_symm); // Call METIS ordering routine _order.resize (_n+1); int nzjas = (int)ia_symm[_n]; if (_ordtype == -1) { /* vector ialoc (_n+1); vector jaloc(nzjas+1); idx_t *pialoc = &ialoc[0]; idx_t *pjaloc = &jaloc[0]; idx_t i, j, jj; ialoc[0] = 0; idx_t nz = 0; for (i=0;i<_n;i++) { for (j=ia_symm[i];j order_int (_n*2+1); idx_t *porder_int = &order_int[0]; idx_t options_ND[METIS_NOPTIONS]; METIS_SetDefaultOptions (options_ND); options_ND[METIS_OPTION_NUMBERING] = 0; options_ND[METIS_OPTION_SEED] = 0; idx_t nloc = _n; METIS_NodeND ((idx_t *)&nloc, (idx_t *)pialoc, (idx_t *)pjaloc, NULL, (idx_t *)options_ND, (idx_t *)porder_int+nloc, (idx_t *)porder_int); for (i=0;i<_n;i++) { _order[i] = (int)porder_int[i]; } */ } else if (_ordtype == 0) { // Identity ordering int i; for (i=0;i<_n;i++) { _order[i] = i; } } else if (_ordtype == 1) { // Reversed Cuthill-McCee ordering vector ialoc(_n+1); vector jaloc(nzjas+1); int *pialoc = &ialoc[0]; int *pjaloc = &jaloc[0]; int i; for (i=0;i<=_n;i++) pialoc[i] = (int)(ia_symm[i] + 1); for (i=0;i xls(_n+1); vector mask(_n+1); int *pxls = &xls[0]; int *pmask = &mask[0]; vector order_int(_n+1); int *porder_int = &order_int[0]; void genrcm(int n, int * xadj, int * iadj, int * perm, int * xls, int * mask); genrcm (_n, pialoc, pjaloc, porder_int, pxls, pmask); for (i=0;i<_n;i++) { porder_int[i] -= 1; } for (i=0;i<_n;i++) { pmask[porder_int[i]] = i; } for (i=0;i<_n;i++) { _order[i] = pmask[i]; } } }; // // Compute optimal ordering for the matrix via splitting into 3 blocks: 1-st order Schur complement for third block, which is the separator between first block and remaining data //======================================================================================== template void CIlu2_impl<_Int,_Flt>::ComputeOptimalOrderSchur (int _ordtype, const int _n, int _n1, const vector<_Int> &_ia_alu, const vector<_Int> &_ja_alu, int &_n2, vector &_order) { _order.resize (_n+1); int *porder = &_order[0]; // Compute symmetrized matrix vector<_Int> ia_symm; vector<_Int> ja_symm; CIlu2_impl<_Int,_Flt>::SymmetrizeSparsity (_n, _ia_alu, _ja_alu, ia_symm, ja_symm); _Int *pia_symm = &ia_symm[0]; _Int *pja_symm = &ja_symm[0]; // Get submatrix corresponding to the first block and compute optimal ordering for it int nzja_1 = 0; int i, j, jj; for (i=0;i<_n1;i++) { for (j=(int)pia_symm[i];j ia1 (_n1+1); vector<_Int> ja1 (nzja_1+1); _Int *pia1 = &ia1[0]; _Int *pja1 = &ja1[0]; nzja_1 = 0; pia1[0] = 0; for (i=0;i<_n1;i++) { for (j=(int)pia_symm[i];j order1 (1); CIlu2_impl<_Int,_Flt>::ComputeOptimalOrder (_ordtype, _n1, ia1, ja1, order1); int *porder1 = &order1[0]; for (i=0;i<_n1;i++) porder[i] = porder1[i]; { vector<_Int> ia1_temp(1); vector<_Int> ja1_temp(1); vector order1_temp(1); ia1.swap(ia1_temp); ja1.swap(ja1_temp); order1.swap(order1); } // Register columns in the second part which is to be moved to the bordering vector imask (_n+1); vector list (_n+1); vector indarr (_n+1); int *pimask = &imask[0]; int *plist = &list[0]; int *pindarr = &indarr[0]; for (i=_n1;i<_n;i++) pimask[i] = -1; int nlist_bord = 0; for (i=0;i<_n1;i++) { for (j=(int)pia_symm[i];j= _n1 && pimask[jj] == -1) { plist[nlist_bord] = jj; nlist_bord++; pimask[jj] = 1; } } } sort (plist,plist+nlist_bord); _n2 = _n-nlist_bord; int ip2 = _n1; int ip3 = _n2; for (i=_n1;i<_n;i++) { if (pimask[i] == -1) { porder[i] = ip2; ip2++; } else { porder[i] = ip3; ip3++; } } vector iorder (_n+1); int *piorder = &iorder[0]; for (i=0;i<_n;i++) piorder[porder[i]] = i; // Reorder the matrix vector<_Int> ia_ord(1); vector<_Int> ja_ord(1); CIlu2_impl<_Int,_Flt>::ReorderMatrixSp (_n, _order, ia_symm, ja_symm, ia_ord, ja_ord); _Int *pia_ord = &ia_ord[0]; _Int *pja_ord = &ja_ord[0]; // Get submatrix 2 and compute its ordering int nzja_2 = 0; for (i=_n1;i<_n2;i++) { for (j=(int)pia_ord[i];j= _n1 && jj < _n2) nzja_2++; } } int n12 = _n2-_n1; vector<_Int> ia2 (n12+1); vector<_Int> ja2 (nzja_2+1); _Int *pia2 = &ia2[0]; _Int *pja2 = &ja2[0]; nzja_2 = 0; pia2[0] = 0; for (i=_n1;i<_n2;i++) { for (j=(int)pia_ord[i];j= _n1 && jj < _n2) { pja2[nzja_2] = jj-_n1; nzja_2++; } } pia2[i-_n1+1] = nzja_2; } vector order2 (1); CIlu2_impl<_Int,_Flt>::ComputeOptimalOrder (_ordtype, n12, ia2, ja2, order2); int *porder2 = &order2[0]; int iold; for (i=0;i ia2_temp (1); vector<_Int> ja2_temp (1); vector order2_temp (1); ia2.swap(ia2_temp); ja2.swap(ja2_temp); order2.swap(order2); } // Compute ends of second block in all rows vector ia_end2 (_n+1); int *pia_end2 = &ia_end2[0]; for (i=0;i<_n;i++) { pia_end2[i] = (int)pia_ord[i]; for (j=(int)pia_ord[i];j ia_bord (n23+1); int *pia_bord = &ia_bord[0]; pia_bord[0] = 0; int nzja_bord = 0; vector ja_bord (1); int nlistloc, k, kk; for (i=_n2;i<_n;i++) { icycle++; nlistloc = 0; for (j=(int)pia_ord[i];j<=pia_end2[i];j++) { jj = (int)pja_ord[j]; for (k=(int)pia_ord[jj];k<=pia_end2[jj];k++) { kk = (int)pja_ord[k]; if (pimask[kk] != icycle) { plist[nlistloc] = kk; nlistloc++; pimask[kk] = icycle; } } } sort (plist,plist+nlistloc); ja_bord.resize (nzja_bord+nlistloc+1); for (j=0;j ia_bord_t (1); vector ja_bord_t (1); CIlu2_impl::TransposeSp (nlistloc, ia_bord, ja_bord, ia_bord_t, ja_bord_t); int *pia_bord_t = &ia_bord_t[0]; int *pja_bord_t = &ja_bord_t[0]; // Compute Schur complement via extended block data vector<_Int> ia3_Schur (n23+1); vector<_Int> ja3_Schur (1); _Int *pia3_Schur = &ia3_Schur[0]; int nzja_Schur = 0; for (i=0;i ia3 (n23+1); vector<_Int> ja3 (1); int nzja_3 = 0; _Int *pia3 = &ia3[0]; for (i=0;i order3 (1); CIlu2_impl<_Int,_Flt>::ComputeOptimalOrder (_ordtype, n23, ia3, ja3, order3); int *porder3 = &order3[0]; for (i=0;i void CIlu2_impl<_Int,_Flt>::ReorderMatrixSp (int _n, const vector &_order, vector<_Int> &_ia_alu, vector<_Int> &_ja_alu, vector<_Int> &_ia_alu_ord, vector<_Int> &_ja_alu_ord) { // Compute inverse order vector iord (_n+1); int *piord = &iord[0]; int i; for (i=0;i<_n;i++) piord[_order[i]] = i; // Reorder the matrix int nzja = (int)_ia_alu[_n]; _ia_alu_ord.resize (_n+1); _ja_alu_ord.resize (nzja+1); _Int *piaord = &_ia_alu_ord[0]; _Int *pjaord = &_ja_alu_ord[0]; for (i=0;i<=_n;i++) piaord[i] = 0; int nimax = 0; int ni; for (i=0;i<_n;i++) { ni = (int)(_ia_alu[i+1]-_ia_alu[i]); if (ni>nimax) nimax = ni; } vector iiarr (nimax+1); CSortInt *piiarr = &iiarr[0]; int j; for (i=0;i<_n;i++) { j = _order[i]; piaord[j+1] = _ia_alu[i+1]-_ia_alu[i]; } for (i=0;i<_n;i++) piaord[i+1] += piaord[i]; int nz = 0; int inew, nzloc, ibs, jold, jnew; for (inew=0;inew<_n;inew++) { i = piord[inew]; nzloc = (int)(_ia_alu[i+1]-_ia_alu[i]); ibs = nz; for (j=(int)_ia_alu[i];j<_ia_alu[i+1];j++) { jold = (int)_ja_alu[j]; jnew = _order[jold]; pjaord[nz] = (_Int)jnew; nz++; } // Sort elements for (j=0;j void CIlu2_impl<_Int,_Flt>::ReorderMatrix (int _n, const vector &_order, vector<_Int> &_ia_alu, vector<_Int> &_ja_alu, vector<_Flt> &_a_alu, vector<_Int> &_ia_alu_ord, vector<_Int> &_ja_alu_ord, vector<_Flt> &_a_alu_ord) { // Compute inverse order vector iord (_n+1); int *piord = &iord[0]; int i; for (i=0;i<_n;i++) piord[_order[i]] = i; // Reorder the matrix int nzja = (int)_ia_alu[_n]; _ia_alu_ord.resize (_n+1); _ja_alu_ord.resize (nzja+1); _a_alu_ord.resize (nzja+1); _Int *piaord = &_ia_alu_ord[0]; _Int *pjaord = &_ja_alu_ord[0]; _Flt *paord = &_a_alu_ord[0]; for (i=0;i<=_n;i++) piaord[i] = 0; int nimax = 0; int ni; for (i=0;i<_n;i++) { ni = (int)(_ia_alu[i+1]-_ia_alu[i]); if (ni>nimax) nimax = ni; } vector iiarr (nimax+1); vector<_Flt> elems (nimax+1); CSortInt *piiarr = &iiarr[0]; _Flt *pelems = &elems[0]; int j; for (i=0;i<_n;i++) { j = _order[i]; piaord[j+1] = _ia_alu[i+1]-_ia_alu[i]; } for (i=0;i<_n;i++) piaord[i+1] += piaord[i]; int nz = 0; int inew, nzloc, ibs, jold, jnew; for (inew=0;inew<_n;inew++) { i = piord[inew]; nzloc = (int)(_ia_alu[i+1]-_ia_alu[i]); ibs = nz; for (j=(int)_ia_alu[i];j<_ia_alu[i+1];j++) { jold = (int)_ja_alu[j]; jnew = _order[jold]; pjaord[nz] = (_Int)jnew; paord[nz] = _a_alu[j]; nz++; } // Sort elements for (j=0;j void CIlu2_impl<_Int,_Flt>::Ilu2BlockTransform (int _sctype, int _nitersc, int _fcttype, double _pivmin, double _tau1, double _tau2, double _theta, int _n, vector<_Int> &_ia_alu, vector<_Int> &_ja_alu, vector<_Flt> &_a_alu, vector<_Int> &_ia_lu, vector<_Int> &_ida_lu, vector<_Int> &_ja_lu, vector<_Flt> &_a_lu, double &_eigmin_att, double &_eigmax_att) { // Perform fct vector<_Int> ia_lt; vector<_Int> ja_lt; vector<_Flt> a_lt; vector<_Int> ia_u; vector<_Int> ja_u; vector<_Flt> a_u; CIlu2_impl<_Int,_Flt>::Ilu2Block (_sctype, _nitersc, _fcttype, _pivmin, _tau1, _tau2, _theta, _n, _ia_alu, _ja_alu, _a_alu, ia_lt, ja_lt, a_lt, ia_u, ja_u, a_u, _eigmin_att, _eigmax_att); // Transpose L vector<_Int> ia_l; vector<_Int> ja_l; vector<_Flt> a_l; CIlu2_impl<_Int,_Flt>::Transpose (_n, ia_lt, ja_lt, a_lt, ia_l, ja_l, a_l); { vector<_Int> ia_dummy; vector<_Int> ja_dummy; vector<_Flt> a_dummy; ia_lt.swap (ia_dummy); ja_lt.swap (ja_dummy); a_lt.swap (a_dummy); } // Combine rows LU CIlu2_impl<_Int,_Flt>::CombineRowsLU (_n, ia_l, ja_l, a_l, ia_u, ja_u, a_u, _ia_lu, _ida_lu, _ja_lu, _a_lu); }; // // Perform ILU2 point factorization of the block with future diagonal modification //======================================================================================== template void CIlu2_impl<_Int,_Flt>::Ilu2Block (int _sctype, int _nitersc, int _fcttype, double _pivmin, double _tau1, double _tau2, double _theta, int _n, vector<_Int> &_ia_alu, vector<_Int> &_ja_alu, vector<_Flt> &_a_alu, vector<_Int> &_ia_l, vector<_Int> &_ja_l, vector<_Flt> &_a_l, vector<_Int> &_ia_u, vector<_Int> &_ja_u, vector<_Flt> &_a_u, double &_eigmin_att, double &_eigmax_att) { // Compute explicit scaling vector<_Flt> scl_L; vector<_Flt> scl_U; CIlu2_impl<_Int,_Flt>::ComputeScaling (_sctype, _nitersc, _n, _ia_alu, _ja_alu, _a_alu, scl_L, scl_U); // Perform explicit scaling int nza_alu = (int)_ia_alu[_n]; vector<_Flt> a_scl_expl (nza_alu+1); _Flt *p_a_alu = &_a_alu[0]; _Flt *pa_scl_expl = &a_scl_expl[0]; memcpy (pa_scl_expl, p_a_alu, (size_t)(nza_alu*sizeof(_Flt))); CIlu2_impl<_Int,_Flt>::MatrixScale (_n, scl_L, scl_U, _ia_alu, _ja_alu, a_scl_expl); // Split vector<_Int> ia_l; vector<_Int> ja_l; vector<_Flt> a_l; vector<_Int> ia_u; vector<_Int> ja_u; vector<_Flt> a_u; CIlu2_impl<_Int,_Flt>::SplitLU (_n, _ia_alu, _ja_alu, a_scl_expl, ia_l, ja_l, a_l, ia_u, ja_u, a_u); { vector<_Flt> a_dummy; a_scl_expl.swap (a_dummy); } // Transpose L vector<_Int> ia_lt; vector<_Int> ja_lt; vector<_Flt> a_lt; CIlu2_impl<_Int,_Flt>::Transpose (_n, ia_l, ja_l, a_l, ia_lt, ja_lt, a_lt); { vector<_Int> ia_dummy; vector<_Int> ja_dummy; vector<_Flt> a_dummy; ia_l.swap (ia_dummy); ja_l.swap (ja_dummy); a_l.swap (a_dummy); } // Combine into extended pairs vector<_Int> ia_alu_cnd; vector<_Int> ja_alu_cnd; vector<_Flt> a_alu_cnd; CIlu2_impl<_Int,_Flt>::CombinePairs (_n, ia_lt, ja_lt, a_lt, ia_u, ja_u, a_u, ia_alu_cnd, ja_alu_cnd, a_alu_cnd); { vector<_Int> ia_dummy; vector<_Int> ja_dummy; vector<_Flt> a_dummy; ia_lt.swap (ia_dummy); ja_lt.swap (ja_dummy); a_lt.swap (a_dummy); vector<_Int> ia_dummy1; vector<_Int> ja_dummy1; vector<_Flt> a_dummy1; ia_u.swap (ia_dummy1); ja_u.swap (ja_dummy1); a_u.swap (a_dummy1); } // Perform fct vector ja_char_alu_cnd; if (_fcttype >= 0) { int nzja_lu_cnd = (int)ia_alu_cnd[_n]; ja_char_alu_cnd.resize (nzja_lu_cnd+1); int i; for (i=0;i ia_lu_cnd; vector<_Int> ja_lu_cnd; vector ja_char_lu_cnd; vector<_Flt> a_lu_cnd; CIlu2_impl<_Int,_Flt>::Ilu2BlockIlu2 (_fcttype, _pivmin, _tau1, _tau2, _theta, _n, _n, ia_alu_cnd, ja_alu_cnd, ja_char_alu_cnd, a_alu_cnd, ia_lu_cnd, ja_lu_cnd, ja_char_lu_cnd, a_lu_cnd, _eigmin_att, _eigmax_att); { vector<_Int> ia_dummy; vector<_Int> ja_dummy; vector ja_char_dummy; vector<_Flt> a_dummy; vector ja_char1_dummy; ia_alu_cnd.swap (ia_dummy); ja_alu_cnd.swap (ja_dummy); ja_char_alu_cnd.swap (ja_char_dummy); a_alu_cnd.swap (a_dummy); ja_char_lu_cnd.swap (ja_char1_dummy); } // Split pairs with filtering CIlu2_impl<_Int,_Flt>::SplitPairsFilter (_tau1, _n, ia_lu_cnd, ja_lu_cnd, a_lu_cnd, _ia_l, _ja_l, _a_l, _ia_u, _ja_u, _a_u); { vector<_Int> ia_dummy; vector<_Int> ja_dummy; vector<_Flt> a_dummy; ia_lu_cnd.swap (ia_dummy); ja_lu_cnd.swap (ja_dummy); a_lu_cnd.swap (a_dummy); } // Perform explicit rescaling of compute triangular factors vector<_Flt> inv_scl_L (_n+1); vector<_Flt> inv_scl_U (_n+1); CIlu2_impl<_Int,_Flt>::InverseDiag (_n, scl_L, inv_scl_L); CIlu2_impl<_Int,_Flt>::InverseDiag (_n, scl_U, inv_scl_U); CIlu2_impl<_Int,_Flt>::RescaleU (_n, scl_L, inv_scl_L, _ia_l, _ja_l, _a_l); CIlu2_impl<_Int,_Flt>::RescaleU (_n, scl_U, inv_scl_U, _ia_u, _ja_u, _a_u); { vector<_Flt> dl_dummy; vector<_Flt> du_dummy; vector<_Flt> idl_dummy; vector<_Flt> idu_dummy; scl_L.swap (dl_dummy); scl_U.swap (du_dummy); inv_scl_L.swap (idl_dummy); inv_scl_U.swap (idu_dummy); } }; // // Compute scaling //======================================================================================== template void CIlu2_impl<_Int,_Flt>::ComputeScaling (int _sctype, int _nitersc, int _n, vector<_Int> &_ia_alu, vector<_Int> &_ja_alu, vector<_Flt> &_a_alu, vector<_Flt> &_sclL, vector<_Flt> &_sclU) { _sclL.resize (_n+1); _sclU.resize (_n+1); // Simple diagonal based scaling int i, j, jj; _Flt diag; if (_sctype == -1) { diag = (_Flt)1.; for (i=0;i<_n;i++) { _sclL[i] = diag; _sclU[i] = diag; } } else if (_sctype == 0) { for (i=0;i<_n;i++) { diag = 1.; for (j=(int)_ia_alu[i];j<_ia_alu[i+1];j++) { jj = (int)_ja_alu[j]; if (i == jj) { diag = _a_alu[j]; } } if (diag > 0.) { diag = (_Flt)(1./sqrt(diag)); _sclL[i] = diag; _sclU[i] = diag; } else { diag = -diag; diag = (_Flt)(1./sqrt(diag)); _sclL[i] = -diag; _sclU[i] = diag; } } } else { // Iterative rows/columns balancing scaling _Flt aux, aux1; int iter; for (i=0;i<_n;i++) { aux = 0.; for (j=(int)_ia_alu[i];j<_ia_alu[i+1];j++) { jj = (int)_ja_alu[j]; aux1 = _a_alu[j]; aux += aux1*aux1; } aux = (_Flt)(1.0 / sqrt(aux)); _sclL[i] = aux; } for (iter=0;iter<_nitersc;iter++) { for (i=0;i<_n;i++) _sclU[i] = 0.; for (i=0;i<_n;i++) { for (j=(int)_ia_alu[i];j<_ia_alu[i+1];j++) { jj = (int)_ja_alu[j]; aux1 = _a_alu[j]; _sclU[jj] += _sclL[i]*aux1*aux1; } } for (i=0;i<_n;i++) { _sclU[i] = (_Flt)(1./_sclU[i]); } for (i=0;i<_n;i++) { aux = 0.; for (j=(int)_ia_alu[i];j<_ia_alu[i+1];j++) { jj = (int)_ja_alu[j]; aux1 = _a_alu[j]; aux += _sclU[jj]*aux1*aux1; } aux = (_Flt)(1.0 / aux); _sclL[i] = aux; } } for (i=0;i<_n;i++) _sclL[i] = (_Flt)(sqrt(_sclL[i])); for (i=0;i<_n;i++) _sclU[i] = (_Flt)(sqrt(_sclU[i])); } }; // // Perform explicit scaling //======================================================================================== template void CIlu2_impl<_Int,_Flt>::MatrixScale (int _n, vector<_Flt> &_sclL, vector<_Flt> &_sclU, vector<_Int> &_ia_alu, vector<_Int> &_ja_alu, vector<_Flt> &_a_alu) { int i, j, jj; for (i=0;i<_n;i++) { for (j=(int)_ia_alu[i];j<_ia_alu[i+1];j++) { jj = (int)_ja_alu[j]; _a_alu[j] = _sclL[i]*_a_alu[j]*_sclU[jj]; } } } // // Symmetrize sparsity //======================================================================================== template void CIlu2_impl<_Int,_Flt>::SymmetrizeSparsity (int _n, const vector<_Int> &_ia_alu, const vector<_Int> &_ja_alu, vector<_Int> &_ia_symm, vector<_Int> &_ja_symm) { // Split vector<_Int> ia_l; vector<_Int> ja_l; vector<_Int> ia_u; vector<_Int> ja_u; CIlu2_impl<_Int,_Flt>::SplitLUSp (_n, _ia_alu, _ja_alu, ia_l, ja_l, ia_u, ja_u); // Transpose L vector<_Int> ia_lt; vector<_Int> ja_lt; CIlu2_impl<_Int,_Flt>::TransposeSp (_n, ia_l, ja_l, ia_lt, ja_lt); { vector<_Int> ia_dummy; vector<_Int> ja_dummy; ia_l.swap (ia_dummy); ja_l.swap (ja_dummy); } // Combine into extended pairs vector<_Int> ia_alu_cnd; vector<_Int> ja_alu_cnd; CIlu2_impl<_Int,_Flt>::CombineLUSp (_n, ia_lt, ja_lt, ia_u, ja_u, ia_alu_cnd, ja_alu_cnd); { vector<_Int> ia_dummy; vector<_Int> ja_dummy; ia_lt.swap (ia_dummy); ja_lt.swap (ja_dummy); vector<_Int> ia_dummy1; vector<_Int> ja_dummy1; ia_u.swap (ia_dummy1); ja_u.swap (ja_dummy1); } // Transpose combined sparsity vector<_Int> ia_alu_cnd_t; vector<_Int> ja_alu_cnd_t; CIlu2_impl<_Int,_Flt>::TransposeSp (_n, ia_alu_cnd, ja_alu_cnd, ia_alu_cnd_t, ja_alu_cnd_t); // Compute symmetrized matrix CIlu2_impl<_Int,_Flt>::CombineLUSp (_n, ia_alu_cnd_t, ja_alu_cnd_t, ia_alu_cnd, ja_alu_cnd, _ia_symm, _ja_symm); }; // // Split matrix data into L and U parts for sparsity only //======================================================================================== template void CIlu2_impl<_Int,_Flt>::SplitLUSp (int _n, const vector<_Int> &_ia_alu, const vector<_Int> &_ja_alu, vector<_Int> &_ia_l, vector<_Int> &_ja_l, vector<_Int> &_ia_u, vector<_Int> &_ja_u) { int nzja_alu = (int)_ia_alu[_n]; // Count number of elems int nzja_l = 0; int nzja_u = 0; int i, j, jj; for (i=0;i<_n;i++) { for (j=(int)_ia_alu[i];j<_ia_alu[i+1];j++) { jj = (int)_ja_alu[j]; if (jj <= i) nzja_l++; if (jj >= i) nzja_u++; } } // Allocate and fill _ia_l.resize (_n+1); _ja_l.resize (nzja_l+1); _ia_u.resize (_n+1); _ja_u.resize (nzja_u+1); nzja_l = 0; nzja_u = 0; _ia_l[0] = 0; _ia_u[0] = 0; for (i=0;i<_n;i++) { for (j=(int)_ia_alu[i];j<_ia_alu[i+1];j++) { jj = (int)_ja_alu[j]; if (jj <= i) { _ja_l[nzja_l] = jj; nzja_l++; } if (jj >= i) { _ja_u[nzja_u] = jj; nzja_u++; } } _ia_l[i+1] = nzja_l; _ia_u[i+1] = nzja_u; } }; // // Split matrix data into L and U parts //======================================================================================== template void CIlu2_impl<_Int,_Flt>::SplitLU (int _n, vector<_Int> &_ia_alu, vector<_Int> &_ja_alu, vector<_Flt> &_a_alu, vector<_Int> &_ia_l, vector<_Int> &_ja_l, vector<_Flt> &_a_l, vector<_Int> &_ia_u, vector<_Int> &_ja_u, vector<_Flt> &_a_u) { int nzja_alu = (int)_ia_alu[_n]; // Count number of elems int nzja_l = 0; int nzja_u = 0; int i, j, jj; for (i=0;i<_n;i++) { for (j=(int)_ia_alu[i];j<_ia_alu[i+1];j++) { jj = (int)_ja_alu[j]; if (jj <= i) nzja_l++; if (jj >= i) nzja_u++; } } // Allocate and fill _ia_l.resize (_n+1); _ja_l.resize (nzja_l+1); _a_l.resize (nzja_l+1); _ia_u.resize (_n+1); _ja_u.resize (nzja_u+1); _a_u.resize (nzja_u+1); nzja_l = 0; nzja_u = 0; _ia_l[0] = 0; _ia_u[0] = 0; for (i=0;i<_n;i++) { for (j=(int)_ia_alu[i];j<_ia_alu[i+1];j++) { jj = (int)_ja_alu[j]; if (jj <= i) { _ja_l[nzja_l] = jj; _a_l[nzja_l] = _a_alu[j]; nzja_l++; } if (jj >= i) { _ja_u[nzja_u] = jj; _a_u[nzja_u] = _a_alu[j]; nzja_u++; } } _ia_l[i+1] = nzja_l; _ia_u[i+1] = nzja_u; } }; // // Transpose square matrix for sparsity only //======================================================================================== template void CIlu2_impl<_Int,_Flt>::TransposeSp (int _n, vector<_Int> &_ia_a, vector<_Int> &_ja_a, vector<_Int> &_ia_at, vector<_Int> &_ja_at) { int nzja_a = (int)_ia_a[_n]; // Allocate work array vector<_Int> iptr (_n+1); // Allocate tranposed data _ia_at.resize (_n+1); _ja_at.resize (nzja_a+1); int i, j, jj, k; for (i=0;i<=_n;i++) _ia_at[i] = 0; for (i=0;i void CIlu2_impl<_Int,_Flt>::Transpose (int _n, vector<_Int> &_ia_a, vector<_Int> &_ja_a, vector<_Flt> &_a_a, vector<_Int> &_ia_at, vector<_Int> &_ja_at, vector<_Flt> &_a_at) { int nzja_a = (int)_ia_a[_n]; // Allocate work array vector<_Int> iptr (_n+1); // Allocate tranposed data _ia_at.resize (_n+1); _ja_at.resize (nzja_a+1); _a_at.resize (nzja_a+1); int i, j, jj, k; for (i=0;i<=_n;i++) _ia_at[i] = 0; for (i=0;i void CIlu2_impl<_Int,_Flt>::CombineLUSp (int _n, vector<_Int> &_ia_l, vector<_Int> &_ja_l, vector<_Int> &_ia_u, vector<_Int> &_ja_u, vector<_Int> &_ia_alu, vector<_Int> &_ja_alu) { // Compute number of extended elems int nzja_ext = 0; int i, ipl, ipu, iendl, iendu, jj_l, jj_u; for (i=0;i<_n;i++) { ipl = (int)_ia_l[i]; ipu = (int)_ia_u[i]; iendl = (int)_ia_l[i+1]-1; iendu = (int)_ia_u[i+1]-1; while (ipl <= iendl || ipu <= iendu) { if (ipl <= iendl && ipu <= iendu) { jj_l = (int)_ja_l[ipl]; jj_u = (int)_ja_u[ipu]; if (jj_l == jj_u) { nzja_ext++; ipl++; ipu++; } else if (jj_l < jj_u) { nzja_ext++; ipl++; } else { nzja_ext++; ipu++; } } else if (ipl <= iendl) { nzja_ext++; ipl++; } else { nzja_ext++; ipu++; } } } // Count number of elems _ia_alu.resize (_n+1); _ja_alu.resize (nzja_ext+1); _ia_alu[0] = 0; nzja_ext = 0; for (i=0;i<_n;i++) { ipl = (int)_ia_l[i]; ipu = (int)_ia_u[i]; iendl = (int)_ia_l[i+1]-1; iendu = (int)_ia_u[i+1]-1; while (ipl <= iendl || ipu <= iendu) { if (ipl <= iendl && ipu <= iendu) { jj_l = (int)_ja_l[ipl]; jj_u = (int)_ja_u[ipu]; if (jj_l == jj_u) { _ja_alu[nzja_ext] = jj_l; nzja_ext++; ipl++; ipu++; } else if (jj_l < jj_u) { _ja_alu[nzja_ext] = jj_l; nzja_ext++; ipl++; } else { _ja_alu[nzja_ext] = jj_u; nzja_ext++; ipu++; } } else if (ipl <= iendl) { _ja_alu[nzja_ext] = _ja_l[ipl]; nzja_ext++; ipl++; } else { _ja_alu[nzja_ext] = _ja_u[ipu]; nzja_ext++; ipu++; } } _ia_alu[i+1] = nzja_ext; } }; // // Combine L and U data into extended pairs //======================================================================================== template void CIlu2_impl<_Int,_Flt>::CombinePairs (int _n, vector<_Int> &_ia_l, vector<_Int> &_ja_l, vector<_Flt> &_a_l, vector<_Int> &_ia_u, vector<_Int> &_ja_u, vector<_Flt> &_a_u, vector<_Int> &_ia_alu, vector<_Int> &_ja_alu, vector<_Flt> &_a_alu) { // Compute number of extended elems int nzja_ext = 0; int i, ipl, ipu, iendl, iendu, jj_l, jj_u; for (i=0;i<_n;i++) { ipl = (int)_ia_l[i]; ipu = (int)_ia_u[i]; iendl = (int)_ia_l[i+1]-1; iendu = (int)_ia_u[i+1]-1; while (ipl <= iendl || ipu <= iendu) { if (ipl <= iendl && ipu <= iendu) { jj_l = (int)_ja_l[ipl]; jj_u = (int)_ja_u[ipu]; if (jj_l == jj_u) { nzja_ext++; ipl++; ipu++; } else if (jj_l < jj_u) { nzja_ext++; ipl++; } else { nzja_ext++; ipu++; } } else if (ipl <= iendl) { nzja_ext++; ipl++; } else { nzja_ext++; ipu++; } } } // Count number of elems _ia_alu.resize (_n+1); _ja_alu.resize (nzja_ext+1); _a_alu.resize (nzja_ext*2+1); _ia_alu[0] = 0; nzja_ext = 0; for (i=0;i<_n;i++) { ipl = (int)_ia_l[i]; ipu = (int)_ia_u[i]; iendl = (int)_ia_l[i+1]-1; iendu = (int)_ia_u[i+1]-1; while (ipl <= iendl || ipu <= iendu) { if (ipl <= iendl && ipu <= iendu) { jj_l = (int)_ja_l[ipl]; jj_u = (int)_ja_u[ipu]; if (jj_l == jj_u) { _ja_alu[nzja_ext] = jj_l; _a_alu[nzja_ext*2] = _a_l[ipl]; _a_alu[nzja_ext*2+1] = _a_u[ipu]; nzja_ext++; ipl++; ipu++; } else if (jj_l < jj_u) { _ja_alu[nzja_ext] = jj_l; _a_alu[nzja_ext*2] = _a_l[ipl]; _a_alu[nzja_ext*2+1] = 0.; nzja_ext++; ipl++; } else { _ja_alu[nzja_ext] = jj_u; _a_alu[nzja_ext*2] = 0.; _a_alu[nzja_ext*2+1] = _a_u[ipu]; nzja_ext++; ipu++; } } else if (ipl <= iendl) { _ja_alu[nzja_ext] = _ja_l[ipl]; _a_alu[nzja_ext*2] = _a_l[ipl]; _a_alu[nzja_ext*2+1] = 0.; nzja_ext++; ipl++; } else { _ja_alu[nzja_ext] = _ja_u[ipu]; _a_alu[nzja_ext*2] = 0.; _a_alu[nzja_ext*2+1] = _a_u[ipu]; nzja_ext++; ipu++; } } _ia_alu[i+1] = nzja_ext; } }; // // Split pairs fct data into L and U parts with post filtering //======================================================================================== template void CIlu2_impl<_Int,_Flt>::SplitPairsFilter (double _tau1, int _n, vector<_Int> &_ia_alu, vector<_Int> &_ja_alu, vector<_Flt> &_a_alu, vector<_Int> &_ia_l, vector<_Int> &_ja_l, vector<_Flt> &_a_l, vector<_Int> &_ia_u, vector<_Int> &_ja_u, vector<_Flt> &_a_u) { int nzja_alu = (int)_ia_alu[_n]; // Allocate and fill _ia_l.resize (_n+1); _ja_l.resize (nzja_alu+1); _a_l.resize (nzja_alu+1); _ia_u.resize (_n+1); _ja_u.resize (nzja_alu+1); _a_u.resize (nzja_alu+1); int nzja_l = 0; int nzja_u = 0; int i, j, jj; double auxL, auxU; _ia_l[0] = 0; _ia_u[0] = 0; for (i=0;i<_n;i++) { for (j=(int)_ia_alu[i];j<_ia_alu[i+1];j++) { jj = (int)_ja_alu[j]; auxL = _a_alu[j*2]; auxU = _a_alu[j*2+1]; if (auxL < 0.) auxL = -auxL; if (auxU < 0.) auxU = -auxU; if (jj == i || auxL >= _tau1) { _ja_l[nzja_l] = jj; _a_l[nzja_l] = _a_alu[j*2]; nzja_l++; } if (jj == i || auxU >= _tau1) { _ja_u[nzja_u] = jj; _a_u[nzja_u] = _a_alu[j*2+1]; nzja_u++; } } _ia_l[i+1] = nzja_l; _ia_u[i+1] = nzja_u; } }; // // Combine L and U data into extended pairs //======================================================================================== template void CIlu2_impl<_Int,_Flt>::CombineRowsLU (int _n, vector<_Int> &_ia_l, vector<_Int> &_ja_l, vector<_Flt> &_a_l, vector<_Int> &_ia_u, vector<_Int> &_ja_u, vector<_Flt> &_a_u, vector<_Int> &_ia_alu, vector<_Int> &_ida_alu, vector<_Int> &_ja_alu, vector<_Flt> &_a_alu) { // Compute number of extended elems int nzja_ext = (int)(_ia_l[_n]+_ia_u[_n]); // Count number of elems _ia_alu.resize (_n+1); _ida_alu.resize (_n+1); _ja_alu.resize (nzja_ext+1); _a_alu.resize (nzja_ext+1); _ia_alu[0] = 0; int i, j; nzja_ext = 0; for (i=0;i<_n;i++) { for (j=(int)_ia_l[i];j<_ia_l[i+1];j++) { _ja_alu[nzja_ext] = _ja_l[j]; _a_alu[nzja_ext] = _a_l[j]; nzja_ext++; } _ida_alu[i] = nzja_ext; for (j=(int)_ia_u[i];j<_ia_u[i+1];j++) { _ja_alu[nzja_ext] = _ja_u[j]; _a_alu[nzja_ext] = _a_u[j]; nzja_ext++; } _ia_alu[i+1] = nzja_ext; } }; // // Compute inverse scaling //======================================================================================== template void CIlu2_impl<_Int,_Flt>::InverseDiag (int _n, vector<_Flt> &_sclU, vector<_Flt> &_invsclU) { int i; for (i=0;i<_n;i++) { _invsclU[i] = (_Flt)(1. / _sclU[i]); } } // // Rescale factor back //======================================================================================== template void CIlu2_impl<_Int,_Flt>::RescaleU (int _n, vector<_Flt> &_sclU, vector<_Flt> &_invsclU, vector<_Int> &_ia_u, vector<_Int> &_ja_u, vector<_Flt> &_a_u) { int i, j, jj; for (i=0;i<_n;i++) { for (j=(int)_ia_u[i];j<_ia_u[i+1];j++) { jj = (int)_ja_u[j]; if (jj != i) { _a_u[j] *= _invsclU[jj]; } else { _a_u[j] *= _sclU[i]; } } } }; // // Perform ILU2 point factorization of the block with future diagonal modification (no structural control) //======================================================================================== template void CIlu2_impl<_Int,_Flt>::Ilu2BlockIlu2 (double _pivmin, double _tau1, double _tau2, double _theta, int _n, int _n_ini, vector<_Int> &_ia_alu, vector<_Int> &_ja_alu, vector<_Flt> &_a_alu, vector<_Int> &_ia_lu, vector<_Int> &_ja_lu, vector<_Flt> &_a_lu, double &_eigmin_att, double &_eigmax_att) { // Open structures int nzja_alu = (int)_ia_alu[_n]; // Prepare mask arrays vector<_Int> ibegm (_n+1); vector<_Int> madj (_n+1); vector<_Int> iv (_n+1); vector<_Int> imaskc (_n+1); vector<_Int> listc (_n+1); vector<_Flt> fmaskc (2*_n+1); vector<_Flt> a_dia (_n+1); _Int *plistc = &listc[0]; int i; for (i=0;i<=_n;i++) ibegm[i] = -1; for (i=0;i<=_n;i++) madj[i] = -1; for (i=0;i<=_n;i++) imaskc[i] = -1; for (i=0;i<_n;i++) a_dia[i] = 0.; double eigmin_att = FLT_MAX; double eigmax_att = -FLT_MAX; _Flt fzero = 0.; _Flt fone = 1.; int j, jj, k, kcolmn, irwprv, nlistcnew, ind, ind1, irow, irwpr1, jcolmn; double auxL, auxU; double aux1, aux2, dfnorm; int icycle_int = -1; _ia_lu.resize(_n+1); _ja_lu.reserve(_n+1); _a_lu.reserve(_n*2+1); _ia_lu[0] = 0; int nzja_lu = 0; for (i=0;i<_n;i++) { irow = i; // Init current row int nlistcloc = 0; icycle_int++; if (i < _n_ini) { for (k=(int)_ia_alu[i];k<_ia_alu[i+1];k++) { kcolmn = (int)_ja_alu[k]; listc[nlistcloc] = kcolmn; nlistcloc++; fmaskc[kcolmn*2] = _a_alu[k*2]; fmaskc[kcolmn*2+1] = _a_alu[k*2+1]; imaskc[kcolmn] = icycle_int; } } else { listc[nlistcloc] = i; nlistcloc++; fmaskc[i*2] = fzero; fmaskc[i*2+1] = fzero; imaskc[i] = icycle_int; } // Update current row irwprv = (int)ibegm[irow]; while (irwprv != -1) { irwpr1 = (int)madj[irwprv]; if (iv[irwprv] < _ia_lu[irwprv+1]) { j = (int)iv[irwprv]; jj = (int)_ja_lu[j]; auxL = _a_lu[j*2]; auxU = _a_lu[j*2+1]; jcolmn = jj; if (jcolmn < 0) jcolmn = -jcolmn-1; if (jj >= 0) { for (k=(int)iv[irwprv];k<_ia_lu[irwprv+1];k++) { kcolmn = (int)_ja_lu[k]; if (kcolmn < 0) kcolmn = -kcolmn-1; if (imaskc[kcolmn] != icycle_int) { listc[nlistcloc] = kcolmn; nlistcloc++; fmaskc[kcolmn*2] = fzero; fmaskc[kcolmn*2+1] = fzero; imaskc[kcolmn] = icycle_int; } fmaskc[kcolmn*2] -= (_Flt)(auxU * _a_lu[k*2]); fmaskc[kcolmn*2+1] -= (_Flt)(auxL * _a_lu[k*2+1]); } } else { for (k=(int)iv[irwprv];k<_ia_lu[irwprv+1];k++) { kcolmn = (int)_ja_lu[k]; if (kcolmn >= 0) { if (imaskc[kcolmn] != icycle_int) { listc[nlistcloc] = kcolmn; nlistcloc++; fmaskc[kcolmn*2] = fzero; fmaskc[kcolmn*2+1] = fzero; imaskc[kcolmn] = icycle_int; } fmaskc[kcolmn*2] -= (_Flt)(auxU * _a_lu[k*2]); fmaskc[kcolmn*2+1] -= (_Flt)(auxL * _a_lu[k*2+1]); } } } } irwprv = irwpr1; } // Update transposed structure irwprv = (int)ibegm[irow]; while (irwprv != -1) { irwpr1 = (int)madj[irwprv]; if (iv[irwprv] >= _ia_lu[irwprv+1]-1) { madj[irwprv] = -1; } else { j = (int)iv[irwprv]+1; jcolmn = (int)_ja_lu[j]; if (jcolmn < 0) jcolmn = -jcolmn-1; madj[irwprv] = ibegm[jcolmn]; ibegm[jcolmn] = irwprv; } iv[irwprv]++; irwprv = irwpr1; } // Perform filtering of the data and modify diagonal if (i<_n_ini) { nlistcnew = 0; for (j=0;j aux2) ? aux1 : aux2; if (dfnorm < _tau2) { a_dia[irow] += (_Flt)(dfnorm); a_dia[ind] += (_Flt)(dfnorm); } else { listc[nlistcnew] = ind; nlistcnew++; } } else { listc[nlistcnew] = irow; nlistcnew++; } } nlistcloc = nlistcnew; } if (fmaskc[irow*2] > fzero) { fmaskc[irow*2] += (_Flt)(a_dia[irow]*_theta); } else { fmaskc[irow*2] -= (_Flt)(a_dia[irow]*_theta); } // Factorize current row and split into first and second order sort (plistc, plistc+nlistcloc); if (i<_n_ini) { aux1 = fmaskc[irow*2]; if (aux1 > 0.) { if (aux1 < eigmin_att) eigmin_att = aux1; if (aux1 > eigmax_att) eigmax_att = aux1; if (aux1 < _pivmin) aux1 = _pivmin; aux1 = sqrt(aux1); aux1 = fone / aux1; auxL = aux1; auxU = aux1; } else { aux1 = -aux1; if (aux1 < eigmin_att) eigmin_att = aux1; if (aux1 > eigmax_att) eigmax_att = aux1; if (aux1 < _pivmin) aux1 = _pivmin; aux1 = sqrt(aux1); aux1 = fone / aux1; auxL = -aux1; auxU = aux1; } fmaskc[irow*2] = (_Flt)(auxL); fmaskc[irow*2+1] = (_Flt)(auxU); } if (i<_n_ini) { for (j=0;j aux2) ? aux1 : aux2; if (dfnorm < _tau1) { listc[j] = (_Int) (-ind-1); } } } } // Store computed row elems for (j=0;j 1) { if (i < _n_ini) { iv[irow] = _ia_lu[i]+1; ind = (int)listc[1]; if (ind < 0) ind = -ind-1; madj[irow] = ibegm[ind]; ibegm[ind] = (_Int) irow; } else { iv[irow] = _ia_lu[i+1]+1; } } } _eigmin_att = eigmin_att; _eigmax_att = eigmax_att; // Condense the result (filter second order elems) int nzja_new = 0; for (i=0;i= 0) nzja_new++; } vector<_Int> jlu_cnd (nzja_new+1); vector<_Flt> lu_cnd (2*nzja_new+1); nzja_new = 0; listc[0] = 0; for (i=0;i<_n;i++) { for (j=(int)_ia_lu[i];j<_ia_lu[i+1];j++) { if (_ja_lu[j] >= 0) { jlu_cnd[nzja_new] = _ja_lu[j]; lu_cnd[nzja_new*2] = _a_lu[j*2]; lu_cnd[nzja_new*2+1] = _a_lu[j*2+1]; nzja_new++; } } listc[i+1] = nzja_new; } for (i=0;i<=_n;i++) _ia_lu[i] = listc[i]; _ja_lu.swap (jlu_cnd); _a_lu.swap (lu_cnd); }; // // Perform ILU2 point factorization of the block with future diagonal modification (with structural control) //======================================================================================== template void CIlu2_impl<_Int,_Flt>::Ilu2BlockIlu2 (int _fcttype, double _pivmin, double _tau1, double _tau2, double _theta, int _n, int _n_ini, vector<_Int> &_ia_alu, vector<_Int> &_ja_alu, vector &_ja_char_alu, vector<_Flt> &_a_alu, vector<_Int> &_ia_lu, vector<_Int> &_ja_lu, vector &_ja_char_lu, vector<_Flt> &_a_lu, double &_eigmin_att, double &_eigmax_att) { // Fast return if (_fcttype == -1) { CIlu2_impl<_Int,_Flt>::Ilu2BlockIlu2 (_pivmin, _tau1, _tau2, _theta, _n, _n_ini, _ia_alu, _ja_alu, _a_alu, _ia_lu, _ja_lu, _a_lu, _eigmin_att, _eigmax_att); return; } // Open structures int nzja_alu = (int)_ia_alu[_n]; // Prepare mask arrays vector<_Int> ibegm (_n+1); vector<_Int> madj (_n+1); vector<_Int> iv (_n+1); vector<_Int> imaskc (_n+1); vector imaskchar (_n+1); vector<_Int> listc (_n+1); vector<_Flt> fmaskc (2*_n+1); vector<_Flt> a_dia (_n+1); _Int *plistc = &listc[0]; int i; for (i=0;i<=_n;i++) ibegm[i] = -1; for (i=0;i<=_n;i++) madj[i] = -1; for (i=0;i<=_n;i++) imaskc[i] = -1; for (i=0;i<_n;i++) a_dia[i] = 0.; double eigmin_att = FLT_MAX; double eigmax_att = -FLT_MAX; int j, jj, k, kcolmn, irwprv, nlistcnew, ind, ind1, irow, irwpr1, jcolmn; double auxL, auxU; double aux1, aux2, dfnorm; _Flt fzero = 0.; _Flt fone = 1.; int icycle_int = -1; _ia_lu.resize(_n+1); _ja_lu.reserve(_n+1); _ja_char_lu.reserve(_n+1); _a_lu.reserve(_n*2+1); _ia_lu[0] = 0; int nzja_lu = 0; char jjchar1, jjchar2, jjchar; for (i=0;i<_n;i++) { irow = i; // Init current row int nlistcloc = 0; icycle_int++; if (i<_n_ini) { for (k=(int)_ia_alu[i];k<_ia_alu[i+1];k++) { kcolmn = (int)_ja_alu[k]; listc[nlistcloc] = kcolmn; nlistcloc++; fmaskc[kcolmn*2] = _a_alu[k*2]; fmaskc[kcolmn*2+1] = _a_alu[k*2+1]; imaskc[kcolmn] = icycle_int; imaskchar[kcolmn] = _ja_char_alu[k]; } } else { listc[nlistcloc] = i; nlistcloc++; fmaskc[i*2] = fzero; fmaskc[i*2+1] = fzero; imaskc[i] = icycle_int; imaskchar[i] = 0; } // Update current row irwprv = (int)ibegm[irow]; while (irwprv != -1) { irwpr1 = (int)madj[irwprv]; if (iv[irwprv] < _ia_lu[irwprv+1]) { j = (int)iv[irwprv]; jj = (int)_ja_lu[j]; jjchar1 = _ja_char_lu[j]+1; auxL = _a_lu[j*2]; auxU = _a_lu[j*2+1]; jcolmn = jj; if (jcolmn < 0) jcolmn = -jcolmn-1; if (jj >= 0) { for (k=(int)iv[irwprv];k<_ia_lu[irwprv+1];k++) { kcolmn = (int)_ja_lu[k]; if (kcolmn < 0) kcolmn = -kcolmn-1; jjchar2 = _ja_char_lu[k]+1; jjchar = (jjchar1 < jjchar2) ? jjchar2 : jjchar1; if (imaskc[kcolmn] != icycle_int) { listc[nlistcloc] = kcolmn; nlistcloc++; fmaskc[kcolmn*2] = fzero; fmaskc[kcolmn*2+1] = fzero; imaskc[kcolmn] = icycle_int; imaskchar[kcolmn] = jjchar; } else { if (imaskchar[kcolmn] > jjchar) imaskchar[kcolmn] = jjchar; } fmaskc[kcolmn*2] -= (_Flt)(auxU * _a_lu[k*2]); fmaskc[kcolmn*2+1] -= (_Flt)(auxL * _a_lu[k*2+1]); } } else { for (k=(int)iv[irwprv];k<_ia_lu[irwprv+1];k++) { kcolmn = (int)_ja_lu[k]; if (kcolmn >= 0) { jjchar2 = _ja_char_lu[k]+1; jjchar = (jjchar1 < jjchar2) ? jjchar2 : jjchar1; if (imaskc[kcolmn] != icycle_int) { listc[nlistcloc] = kcolmn; nlistcloc++; fmaskc[kcolmn*2] = fzero; fmaskc[kcolmn*2+1] = fzero; imaskc[kcolmn] = icycle_int; imaskchar[kcolmn] = jjchar; } else { if (imaskchar[kcolmn] > jjchar) imaskchar[kcolmn] = jjchar; } fmaskc[kcolmn*2] -= (_Flt)(auxU * _a_lu[k*2]); fmaskc[kcolmn*2+1] -= (_Flt)(auxL * _a_lu[k*2+1]); } } } } irwprv = irwpr1; } // Update transposed structure irwprv = (int)ibegm[irow]; while (irwprv != -1) { irwpr1 = (int)madj[irwprv]; if (iv[irwprv] >= _ia_lu[irwprv+1]-1) { madj[irwprv] = -1; } else { j = (int)iv[irwprv]+1; jcolmn = (int)_ja_lu[j]; if (jcolmn < 0) jcolmn = -jcolmn-1; madj[irwprv] = ibegm[jcolmn]; ibegm[jcolmn] = irwprv; } iv[irwprv]++; irwprv = irwpr1; } // Perform filtering of the data and modify diagonal if (i<_n_ini) { nlistcnew = 0; for (j=0;j aux2) ? aux1 : aux2; if (jjchar > 0 && (dfnorm < _tau2 || jjchar > _fcttype)) { a_dia[irow] += (_Flt)dfnorm; a_dia[ind] += (_Flt)dfnorm; } else { listc[nlistcnew] = ind; nlistcnew++; } } else { listc[nlistcnew] = irow; nlistcnew++; } } nlistcloc = nlistcnew; } if (fmaskc[irow*2] > fzero) { fmaskc[irow*2] += (_Flt) (a_dia[irow]*_theta); } else { fmaskc[irow*2] -= (_Flt) (a_dia[irow]*_theta); } // Factorize current row and split into first and second order sort (plistc, plistc+nlistcloc); if (i<_n_ini) { aux1 = fmaskc[irow*2]; if (aux1 > fzero) { if (aux1 < eigmin_att) eigmin_att = aux1; if (aux1 > eigmax_att) eigmax_att = aux1; if (aux1 < _pivmin) aux1 = _pivmin; aux1 = sqrt(aux1); aux1 = fone / aux1; auxL = aux1; auxU = aux1; } else { aux1 = -aux1; if (aux1 < eigmin_att) eigmin_att = aux1; if (aux1 > eigmax_att) eigmax_att = aux1; if (aux1 < _pivmin) aux1 = _pivmin; aux1 = sqrt(aux1); aux1 = fone / aux1; auxL = -aux1; auxU = aux1; } fmaskc[irow*2] = (_Flt)(auxL); fmaskc[irow*2+1] = (_Flt)(auxU); } if (i<_n_ini) { for (j=0;j aux2) ? aux1 : aux2; jjchar = imaskchar[ind]; if (jjchar > 0 && dfnorm < _tau1) { listc[j] = (_Int) (-ind-1); } } } } // Store computed row elems for (j=0;j 1) { if (i < _n_ini) { iv[irow] = _ia_lu[i]+1; ind = (int)listc[1]; if (ind < 0) ind = -ind-1; madj[irow] = ibegm[ind]; ibegm[ind] = (_Int) irow; } else { iv[irow] = _ia_lu[i+1]+1; } } } _eigmin_att = eigmin_att; _eigmax_att = eigmax_att; // Condense the result (filter second order elems) int nzja_new = 0; for (i=0;i= 0) nzja_new++; } vector<_Int> jlu_cnd (nzja_new+1); vector jlu_char_cnd (nzja_new+1); vector<_Flt> lu_cnd (2*nzja_new+1); nzja_new = 0; listc[0] = 0; for (i=0;i<_n;i++) { for (j=(int)_ia_lu[i];j<_ia_lu[i+1];j++) { if (_ja_lu[j] >= 0) { jlu_cnd[nzja_new] = _ja_lu[j]; jlu_char_cnd[nzja_new] = _ja_char_lu[j]; lu_cnd[nzja_new*2] = _a_lu[j*2]; lu_cnd[nzja_new*2+1] = _a_lu[j*2+1]; nzja_new++; } } listc[i+1] = nzja_new; } for (i=0;i<=_n;i++) _ia_lu[i] = listc[i]; _ja_lu.swap (jlu_cnd); _ja_char_lu.swap (jlu_char_cnd); _a_lu.swap (lu_cnd); }; // // Balance diagonal //======================================================================================== template void CIlu2_impl<_Int,_Flt>::BalanceDiag (int _n, vector<_Int> &_ia_a, vector<_Int> &_ja_a, vector<_Flt> &_a_a, vector<_Int> &_ia_l, vector<_Int> &_ja_l, vector<_Flt> &_a_l, vector<_Int> &_ia_u, vector<_Int> &_ja_u, vector<_Flt> &_a_u, double &_diacorr_min, double &_diacorr_max) { // Get arrays _Int *pia_a = &_ia_a[0]; _Int *pja_a = &_ja_a[0]; _Flt *pa_a = &_a_a[0]; _Int *pia_l = &_ia_l[0]; _Int *pja_l = &_ja_l[0]; _Flt *pa_l = &_a_l[0]; _Int *pia_u = &_ia_u[0]; _Int *pja_u = &_ja_u[0]; _Flt *pa_u = &_a_u[0]; // Compute sum array vector<_Flt> sum_arr (_n+1); _Flt *psum_arr = &sum_arr[0]; _Flt fzero = (_Flt) 0.0e0; _Flt fone = (_Flt) 1.0e0; int i; for (i=0;i<_n;i++) psum_arr[i] = fzero; int ipL, ipU, jjL, jjU, iendL, iendU; for (i=0;i<_n;i++) { iendL = (int)pia_l[i+1]-1; iendU = (int)pia_u[i+1]-1; ipL = (int)pia_l[i]+1; ipU = (int)pia_u[i]+1; while (ipL <= iendL && ipU <= iendU) { jjL = (int)pja_l[ipL]; jjU = (int)pja_u[ipU]; if (jjL == jjU) { psum_arr[jjL] += (pa_l[ipL]*pa_u[ipU]); ipL++; ipU++; } else if (jjL < jjU) { ipL++; } else { ipU++; } } } // Correct diagonal values and get stat data _diacorr_min = 1.0e20; _diacorr_max = -1.0e20; int j, jj; _Flt diagA, invDiagL, invDiagU, sum, aux, lu_inv_prod; double daux; bool is_found; for (i=0;i<_n;i++) { is_found = false; for (j=(int)pia_a[i];j::BalanceDiag: error: diagonal value is not found! "; } ipL = (int)pia_l[i]; invDiagL = pa_l[ipL]; ipU = (int)pia_u[i]; invDiagU = pa_u[ipU]; sum = diagA-psum_arr[i]; lu_inv_prod = invDiagL * invDiagU; aux = sum * lu_inv_prod; if (aux < _diacorr_min) _diacorr_min = aux; if (aux > _diacorr_max) _diacorr_max = aux; if (aux < 0.0e0) aux = 1.0e0; daux = (double)aux; daux = sqrt(daux); aux = (_Flt)daux; aux = fone / aux; pa_l[ipL] *= aux; pa_u[ipU] *= aux; } }; // // Multiply by super sparse matrix by rows and add result into prescribed positions //======================================================================================== template void CMvmSlv_impl<_Int,_Flt,_FltVect>::MvmA (int _nlist, const vector<_Int> &_list_alu, const vector<_Int> &_ia_alu, const vector<_Int> &_ja_alu, const vector<_Flt> &_a_alu, const _FltVect *_x, _FltVect *_ax) { const _Int *p_list_a = &_list_alu[0]; const _Int *p_ia_a = &_ia_alu[0]; const _Int *p_ja_a = &_ja_alu[0]; const _Flt *p_a_a = &_a_alu[0]; int i, irow, j, jj; for (i=0;i<_nlist;i++) { irow = (int)p_list_a[i]; for (j=(int)p_ia_a[i];j void CMvmSlv_impl<_Int,_Flt,_FltVect>::MvmAT (int _nlist, const vector<_Int> &_list_alu, const vector<_Int> &_ia_alu, const vector<_Int> &_ja_alu, const vector<_Flt> &_a_alu, const _FltVect *_x, _FltVect *_ax) { const _Int *p_list_a = &_list_alu[0]; const _Int *p_ia_a = &_ia_alu[0]; const _Int *p_ja_a = &_ja_alu[0]; const _Flt *p_a_a = &_a_alu[0]; int i, irow, j, jj; for (i=0;i<_nlist;i++) { irow = (int)p_list_a[i]; for (j=(int)p_ia_a[i];j void CMvmSlv_impl<_Int,_Flt,_FltVect>::SolveL (int _n, const vector<_Int> &_ia_l, const vector<_Int> &_ja_l, const vector<_Flt> &_a_l, const _FltVect *_x, _FltVect *_lx) { int i; for (i=0;i<_n;i++) _lx[i] = _x[i]; const _Int *p_ia_l = &_ia_l[0]; const _Int *p_ja_l = &_ja_l[0]; const _Flt *p_a_l = &_a_l[0]; int j, jj, ibeg, iend; for (i=0;i<_n;i++) { ibeg = (int)p_ia_l[i]; iend = (int)p_ia_l[i+1]-1; _lx[i] *= p_a_l[ibeg]; for (j=ibeg+1;j<=iend;j++) { jj = (int)p_ja_l[j]; _lx[jj] -= p_a_l[j]*_lx[i]; } } }; // // Solve with U, U is stored by rows (diag is inverted) //======================================================================================== template void CMvmSlv_impl<_Int,_Flt,_FltVect>::SolveU (int _n, const vector<_Int> &_ia_u, const vector<_Int> &_ja_u, const vector<_Flt> &_a_u, const _FltVect *_x, _FltVect *_ux) { int i; const _Int *p_ia_u = &_ia_u[0]; const _Int *p_ja_u = &_ja_u[0]; const _Flt *p_a_u = &_a_u[0]; for (i=0;i<_n;i++) _ux[i] = _x[i]; int j, jj, ibeg, iend; for (i=_n-1;i>=0;i--) { ibeg = (int)p_ia_u[i]; iend = (int)p_ia_u[i+1]-1; for (j=ibeg+1;j<=iend;j++) { jj = (int)p_ja_u[j]; _ux[i] -= p_a_u[j]*_ux[jj]; } _ux[i] *= p_a_u[ibeg]; } }; // Copy constructor //======================================================================================== template CMatrix<_Int,_Flt>::CMatrix (const CMatrix<_Int,_Flt> &_aa) { int nlistloc = _aa.GetNlist (); int nlist2loc = _aa.GetNlist2 (); int nzjaloc = _aa.GetNzja (); int nzja2loc = _aa.GetNzja2 (); int nzaloc = _aa.GetNza (); const _Int *plist_aa = &(_aa.list_matr[0]); const _Int *plist2_aa = &(_aa.list2_matr[0]); const _Int *pia_aa = &(_aa.ia_matr[0]); const _Int *pja_aa = &(_aa.ja_matr[0]); const _Int *pja2_aa = &(_aa.ja2_matr[0]); const _Flt *pa_aa = &(_aa.a_matr[0]); this->n_list = nlistloc; this->n_list2 = nlist2loc; this->nz_ja = nzjaloc; this->nz_ja2 = nzja2loc; this->nz_a = nzaloc; this->list_matr.resize(nlistloc+1); this->list2_matr.resize(nlist2loc+1); this->ia_matr.resize(nlistloc+1); this->ja_matr.resize(nzjaloc+1); this->ja2_matr.resize(nzja2loc+1); this->a_matr.resize(nzaloc+1); _Int *plist = &(this->list_matr[0]); _Int *plist2 = &(this->list2_matr[0]); _Int *pia = &(this->ia_matr[0]); _Int *pja = &(this->ja_matr[0]); _Int *pja2 = &(this->ja2_matr[0]); _Flt *pa = &(this->a_matr[0]); int i; for (i=0;i CMatrix<_Int,_Flt> &CMatrix<_Int,_Flt>::operator= (const CMatrix<_Int,_Flt> &_aa) { int nlistloc = _aa.GetNlist (); int nlist2loc = _aa.GetNlist2 (); int nzjaloc = _aa.GetNzja (); int nzja2loc = _aa.GetNzja2 (); int nzaloc = _aa.GetNza (); const _Int *plist_aa = &(_aa.list_matr[0]); const _Int *plist2_aa = &(_aa.list2_matr[0]); const _Int *pia_aa = &(_aa.ia_matr[0]); const _Int *pja_aa = &(_aa.ja_matr[0]); const _Int *pja2_aa = &(_aa.ja2_matr[0]); const _Flt *pa_aa = &(_aa.a_matr[0]); this->n_list = nlistloc; this->n_list2 = nlist2loc; this->nz_ja = nzjaloc; this->nz_ja2 = nzja2loc; this->nz_a = nzaloc; this->list_matr.resize(nlistloc+1); this->list2_matr.resize(nlist2loc+1); this->ia_matr.resize(nlistloc+1); this->ja_matr.resize(nzjaloc+1); this->ja2_matr.resize(nzja2loc+1); this->a_matr.resize(nzaloc+1); _Int *plist = &(this->list_matr[0]); _Int *plist2 = &(this->list2_matr[0]); _Int *pia = &(this->ia_matr[0]); _Int *pja = &(this->ja_matr[0]); _Int *pja2 = &(this->ja2_matr[0]); _Flt *pa = &(this->a_matr[0]); int i; for (i=0;i void CMatrix<_Int,_Flt>::GetSparsity (const CMatrix<_Int,_Flt> &_a_sp) { const int nlistloc = _a_sp.GetNlist (); const int nlist2loc = _a_sp.GetNlist2 (); const int nzjaloc = _a_sp.GetNzja (); const int nzja2loc = _a_sp.GetNzja2 (); const _Int *plist_aa = &(_a_sp.list_matr[0]); const _Int *plist2_aa = &(_a_sp.list2_matr[0]); const _Int *pia_aa = &(_a_sp.ia_matr[0]); const _Int *pja_aa = &(_a_sp.ja_matr[0]); const _Int *pja2_aa = &(_a_sp.ja2_matr[0]); this->n_list = nlistloc; this->n_list2 = nlist2loc; this->nz_ja = nzjaloc; this->nz_ja2 = nzja2loc; this->nz_a = 0; this->list_matr.resize(nlistloc+1); this->list2_matr.resize(nlist2loc+1); this->ia_matr.resize(nlistloc+1); this->ja_matr.resize(nzjaloc+1); this->ja2_matr.resize(nzja2loc+1); this->a_matr.resize(1); _Int *plist = &(this->list_matr[0]); _Int *plist2 = &(this->list2_matr[0]); _Int *pia = &(this->ia_matr[0]); _Int *pja = &(this->ja_matr[0]); _Int *pja2 = &(this->ja2_matr[0]); int i; for (i=0;i void CMatrix<_Int,_Flt>::TransposedSparsityListSp (int &_icycle, int *_imask, int *_indarr, int *_iptr, int *_listloc, int *_ialoc, CMatrix<_Int,_Flt> &_at) const { const int nlist_ini = this->n_list; const int nzja_ini = this->nz_ja; const _Int *plist = &this->list_matr[0]; const _Int *pia = &this->ia_matr[0]; const _Int *pja = &this->ja_matr[0]; // Count the sparsity of at int nlistloc = 0; _icycle++; int i, j, jj; for (i=0;i at; at.ResizeAndSetAllSp (nlistloc, 0, nzja_ini, 0); _Int *plist_at = at.GetListArr (); _Int *pia_at = at.GetIaArr (); _Int *pja_at = at.GetJaArr (); for (i=0;i void CMatrix<_Int,_Flt>::TransposedSparsityList (int &_icycle, int *_imask, int *_indarr, int *_iptr, int *_listloc, int *_ialoc, CMatrix<_Int,_Flt> &_at) const { const int nlist_ini = this->n_list; const int nzja_ini = this->nz_ja; const _Int *plist = &this->list_matr[0]; const _Int *pia = &this->ia_matr[0]; const _Int *pja = &this->ja_matr[0]; const _Flt *pa = &this->a_matr[0]; // Count the sparsity of at int nlistloc = 0; _icycle++; int i, j, jj; for (i=0;i at; at.ResizeAndSetAll (nlistloc, 0, nzja_ini, 0, nzja_ini); _Int *plist_at = at.GetListArr (); _Int *pia_at = at.GetIaArr (); _Int *pja_at = at.GetJaArr (); _Flt *pa_at = at.GetAArr (); for (i=0;i void CMatrix<_Int,_Flt>::ExtendSparsity (const CMatrix<_Int,_Flt> &_a_sp, CMatrix<_Int,_Flt> &_a_ext) const { // Open sparsities const int nlist_ini = this->n_list; const int nzja_ini = this->nz_ja; const _Int *plist = &this->list_matr[0]; const _Int *pia = &this->ia_matr[0]; const _Int *pja = &this->ja_matr[0]; const _Flt *pa = &this->a_matr[0]; const int nlist_sp = _a_sp.GetNlist(); const int nzja_sp = _a_sp.GetNzja(); const _Int *plist_sp = _a_sp.GetListArr(); const _Int *pia_sp = _a_sp.GetIaArr(); const _Int *pja_sp = _a_sp.GetJaArr(); // Compute extended by zeroes data int nlist_ext_max = nlist_ini+nlist_sp; int nzja_ext_max = nzja_ini+nzja_sp; vector<_Int> list_ext (nlist_ext_max+1); vector<_Int> ia_ext (nlist_ext_max+1); vector<_Int> ja_ext (nzja_ext_max+1); vector<_Flt> a_ext (nzja_ext_max+1); _Int *plist_ext = &list_ext[0]; _Int *pia_ext = &ia_ext[0]; _Int *pja_ext = &ja_ext[0]; _Flt *pa_ext = &a_ext[0]; _Flt fzero = (_Flt) 0.0e0; int nlist_ext = 0; int nzja_ext = 0; int ip_list, ip_list_sp, irow, irow_sp, j, jj, jj_sp, jp, jp_sp; ip_list = 0; ip_list_sp = 0; pia_ext[0] = 0; while (ip_list < nlist_ini || ip_list_sp < nlist_sp) { if (ip_list < nlist_ini && ip_list_sp < nlist_sp) { irow = (int)plist[ip_list]; irow_sp = (int)plist_sp[ip_list_sp]; if (irow == irow_sp) { plist_ext[nlist_ext] = irow; nlist_ext++; jp = (int)pia[ip_list]; jp_sp = (int)pia_sp[ip_list_sp]; while (jp < pia[ip_list+1] || jp_sp < pia_sp[ip_list_sp+1]) { if (jp < pia[ip_list+1] && jp_sp < pia_sp[ip_list_sp+1]) { jj = (int)pja[jp]; jj_sp = (int)pja_sp[jp_sp]; if (jj == jj_sp) { pja_ext[nzja_ext] = pja[jp]; pa_ext[nzja_ext] = pa[jp]; nzja_ext++; jp++; jp_sp++; } else if (jj < jj_sp) { pja_ext[nzja_ext] = pja[jp]; pa_ext[nzja_ext] = pa[jp]; nzja_ext++; jp++; } else { pja_ext[nzja_ext] = pja_sp[jp_sp]; pa_ext[nzja_ext] = fzero; nzja_ext++; jp_sp++; } } else if (jp < pia[ip_list+1]) { pja_ext[nzja_ext] = pja[jp]; pa_ext[nzja_ext] = pa[jp]; nzja_ext++; jp++; } else { pja_ext[nzja_ext] = pja_sp[jp_sp]; pa_ext[nzja_ext] = fzero; nzja_ext++; jp_sp++; } } ip_list++; ip_list_sp++; } else if (irow < irow_sp) { plist_ext[nlist_ext] = irow; nlist_ext++; for (j=(int)pia[ip_list];j a_new; a_new.ResizeAndSetAll (nlist_ext, 0, nzja_ext, 0, nzja_ext); _Int *plist_new = a_new.GetListArr(); _Int *pia_new = a_new.GetIaArr(); _Int *pja_new = a_new.GetJaArr(); _Flt *pa_new = a_new.GetAArr(); int i; for (i=0;i void CMatrix<_Int,_Flt>::MatrixByMatrixMultiply (int &_icycle, int *_imask, int *_imask1, int *_indarr, int *_listloc, _Flt *_fmask, CMatrix<_Int,_Flt> &_a, CMatrix<_Int,_Flt> &_b, CMatrix<_Int,_Flt> &_a_times_b) { // Open sparsities of A and B int nlist_a = _a.GetNlist(); _Int *plist_a = _a.GetListArr(); _Int *pia_a = _a.GetIaArr(); _Int *pja_a = _a.GetJaArr(); _Flt *pa_a = _a.GetAArr(); int nlist_b = _b.GetNlist(); _Int *plist_b = _b.GetListArr(); _Int *pia_b = _b.GetIaArr(); _Int *pja_b = _b.GetJaArr(); _Flt *pa_b = _b.GetAArr(); // Perform multiplication vector<_Int> list_ab (nlist_a+1); vector<_Int> ia_ab (nlist_a+1); _Int *plist_ab = &list_ab[0]; _Int *pia_ab = &ia_ab[0]; vector<_Int> ja_ab (1); vector<_Flt> a_ab (1); int i; for (i=0;i *p_list_ab = _a_times_b.GetList(); vector<_Int> *p_ia_ab = _a_times_b.GetIa(); vector<_Int> *p_ja_ab = _a_times_b.GetJa(); vector<_Flt> *p_a_ab = _a_times_b.GetA(); p_list_ab->swap (list_ab); p_ia_ab->swap (ia_ab); p_ja_ab->swap (ja_ab); p_a_ab->swap (a_ab); _a_times_b.SetNlist (nlist_a); _a_times_b.SetNzja (nzja_ab); _a_times_b.SetNza (nzja_ab); } // Compute the packed size //======================================================================================== template int CMatrix<_Int,_Flt>::GetPackedSize () const { int nlistloc = this->n_list; int nlist2loc = this->n_list2; int nzjaloc = this->nz_ja; int nzja2loc = this->nz_ja2; int nzaloc = this->nz_a; int isize = 5*sizeof(int) + (2*nlistloc+1+nlist2loc+nzjaloc+nzja2loc)*sizeof(_Int) + nzaloc*sizeof(_Flt); return isize; } // Fill by packed data //======================================================================================== template void CMatrix<_Int,_Flt>::FillPacked (int _length, char *_obj) const { int nlistloc = this->n_list; int nlist2loc = this->n_list2; int nzjaloc = this->nz_ja; int nzja2loc = this->nz_ja2; int nzaloc = this->nz_a; char* pLoc; pLoc = _obj; int *pHead = NULL; _Int *plist_obj = NULL; _Int *plist2_obj = NULL; _Int *pia_obj = NULL; _Int *pja_obj = NULL; _Int *pja2_obj = NULL; _Flt *pa_obj = NULL; pHead = (int *) pLoc; pLoc += 5 * sizeof(int); plist_obj = (_Int *) pLoc; pLoc += nlistloc * sizeof(_Int); plist2_obj = (_Int *) pLoc; pLoc += nlist2loc * sizeof(_Int); pia_obj = (_Int *) pLoc; pLoc += (nlistloc+1) * sizeof(_Int); pja_obj = (_Int *) pLoc; pLoc += (nzjaloc) * sizeof(_Int); pja2_obj = (_Int *) pLoc; pLoc += (nzja2loc) * sizeof(_Int); pa_obj = (_Flt *) pLoc; pLoc += (nzaloc) * sizeof(_Flt); pHead[0] = nlistloc; pHead[1] = nlist2loc; pHead[2] = nzjaloc; pHead[3] = nzja2loc; pHead[4] = nzaloc; if (pLoc-_obj != _length) { throw " CMatrix<_Int,_Flt>::FillPacked: incorrect length on entry "; } // Fill arrays const _Int *plist = &(this->list_matr[0]); const _Int *plist2 = &(this->list2_matr[0]); const _Int *pia = &(this->ia_matr[0]); const _Int *pja = &(this->ja_matr[0]); const _Int *pja2 = &(this->ja2_matr[0]); const _Flt *pa = &(this->a_matr[0]); int j; for (j = 0; j < nlistloc; j++) plist_obj[j] = plist[j]; for (j = 0; j < nlist2loc; j++) plist2_obj[j] = plist2[j]; for (j = 0; j < nlistloc+1; j++) pia_obj[j] = pia[j]; for (j = 0; j < nzjaloc; j++) pja_obj[j] = pja[j]; for (j = 0; j < nzja2loc; j++) pja2_obj[j] = pja2[j]; for (j = 0; j < nzaloc; j++) pa_obj[j] = pa[j]; } // Fill by packed data //======================================================================================== template void CMatrix<_Int,_Flt>::UnPack (int _length, char *_obj) { // Get head data char *pLoc; pLoc = _obj; int *pHead; pHead = (int *) pLoc; pLoc += 5 * sizeof(int); int nlistloc = pHead[0]; int nlist2loc = pHead[1]; int nzjaloc = pHead[2]; int nzja2loc = pHead[3]; int nzaloc = pHead[4]; _Int *plist_obj = NULL; _Int *plist2_obj = NULL; _Int *pia_obj = NULL; _Int *pja_obj = NULL; _Int *pja2_obj = NULL; _Flt *pa_obj = NULL; plist_obj = (_Int *) pLoc; pLoc += nlistloc * sizeof(_Int); plist2_obj = (_Int *) pLoc; pLoc += nlist2loc * sizeof(_Int); pia_obj = (_Int *) pLoc; pLoc += (nlistloc+1) * sizeof(_Int); pja_obj = (_Int *) pLoc; pLoc += (nzjaloc) * sizeof(_Int); pja2_obj = (_Int *) pLoc; pLoc += (nzja2loc) * sizeof(_Int); pa_obj = (_Flt *) pLoc; pLoc += (nzaloc) * sizeof(_Flt); // Store data this->n_list = nlistloc; this->n_list2 = nlist2loc; this->nz_ja = nzjaloc; this->nz_ja2 = nzja2loc; this->nz_a = nzaloc; this->list_matr.resize(nlistloc+1); this->list2_matr.resize(nlist2loc+1); this->ia_matr.resize(nlistloc+1); this->ja_matr.resize(nzjaloc+1); this->ja2_matr.resize(nzja2loc+1); this->a_matr.resize(nzaloc+1); int i; _Int *piarr; _Flt *paarr; piarr = &(this->list_matr[0]); for (i=0;ilist2_matr[0]); for (i=0;iia_matr[0]); for (i=0;i<=nlistloc;i++) piarr[i] = pia_obj[i]; piarr = &(this->ja_matr[0]); for (i=0;ija2_matr[0]); for (i=0;ia_matr[0]); for (i=0;i void CVect<_FltVect>::SetByZeroes (int _n, _FltVect *_x) { int i; _FltVect fzero = (_FltVect)0.0e0; for (i=0;i<_n;i++) _x[i] = fzero; }; // // Compute scalar product //======================================================================================== template void CVect<_FltVect>::CopyVector (int _n, _FltVect *_x, _FltVect *_y) { int i; for (i=0;i<_n;i++) _y[i] = _x[i]; }; // // Compute scalar product //======================================================================================== template _FltVect CVect<_FltVect>::ScProd (int _n, _FltVect *_x, _FltVect *_y) { _FltVect fsum = (_FltVect)0.0e0; int i; for (i=0;i<_n;i++) fsum += _x[i]*_y[i]; return fsum; }; // // Add vector data //======================================================================================== template void CVect<_FltVect>::AddReplaceVector (int _n, _FltVect *_x1, _FltVect *_x1plus2) { int i; for (i=0;i<_n;i++) _x1plus2[i] += _x1[i]; }; // // Subtract vector data //======================================================================================== template void CVect<_FltVect>::SubtractReplaceVector (int _n, _FltVect *_x1, _FltVect *_x2minus1) { int i; for (i=0;i<_n;i++) _x2minus1[i] -= _x1[i]; }; // Update array for minus alpha (axpy) //======================================================================================== template void CVect<_FltVect>::UpdateVectorMinus (int _n, _FltVect *_value, _FltVect *_arr_x, _FltVect *_arr_y) { _FltVect value = *_value; int i; for (i=0;i<_n;i++) _arr_y[i] = _arr_y[i] - value * _arr_x[i]; }; // Update array reversed (aypx) //======================================================================================== template void CVect<_FltVect>::UpdateVectorReversed (int _n, _FltVect *_value, _FltVect *_arr_x, _FltVect *_arr_y) { _FltVect value = *_value; for (int i=0;i<_n;i++) _arr_y[i] = value * _arr_y[i] + _arr_x[i]; }; // Update array (axpy) //======================================================================================== template void CVect<_FltVect>::UpdateVector (int _n, _FltVect *_value, _FltVect *_arr_x, _FltVect *_arr_y) { // Update array (axpy): y=a*x+y _FltVect value = *_value; for (int i=0;i<_n;i++) _arr_y[i] += value * _arr_x[i]; }; // // Order vector data //======================================================================================== template void CVect<_FltVect>::OrderVector (int _n, const vector &_order, const _FltVect *_x, _FltVect *_x_ord) { int i; for (i=0;i<_n;i++) _x_ord[_order[i]] = _x[i]; }; // // Inverse order vector data //======================================================================================== template void CVect<_FltVect>::InvOrderVector (int _n, const vector &_order, const _FltVect *_x, _FltVect *_x_ord) { int i; for (i=0;i<_n;i++) _x_ord[i] = _x[_order[i]]; }; // // Print matrix data //======================================================================================== template void CIlu2<_Int,_Flt>::PrintMatrix (ofstream &_fout, CMatrix<_Int,_Flt> &_a_matr) { _fout << " CMatrix:" << endl; _fout << " Nlist = " << _a_matr.GetNlist() << " Nlist2 = " << _a_matr.GetNlist2 () << endl; _fout << " Nzja = " << _a_matr.GetNzja () << " Nzja2 = " << _a_matr.GetNzja2 () << " Nza = " << _a_matr.GetNza () << endl; if (_a_matr.GetNlist() > 0) PrintArray (_fout, " List = ",_a_matr.GetNlist(), _a_matr.GetListArr()); if (_a_matr.GetNlist2() > 0) PrintArray (_fout, " List2 = ",_a_matr.GetNlist2(), _a_matr.GetList2Arr()); if (_a_matr.GetNlist() > 0) PrintArray (_fout, " Ia = ",_a_matr.GetNlist()+1,_a_matr.GetIaArr()); if (_a_matr.GetNzja() > 0) PrintArray (_fout, " Ja = ",_a_matr.GetNzja(), _a_matr.GetJaArr()); if (_a_matr.GetNzja2() > 0) PrintArray (_fout, " Ja2 = ",_a_matr.GetNzja2(), _a_matr.GetJa2Arr()); if (_a_matr.GetNza() > 0) PrintArray (_fout, " A = ",_a_matr.GetNza(), _a_matr.GetAArr()); }; // // Init by data //======================================================================================== template void CMatrixConv<_Int,_Flt,_Flt2>::InitAndConv (int _n, _Int *_ia, _Int *_ja, _Flt *_a, CMatrix<_Int,_Flt2> &_amatr) { CMatrix<_Int,_Flt2> a_temp (_n, _ia, _ja); vector<_Flt2> *pa = a_temp.GetA(); int nzja = (int)_ia[_n]; pa->resize(nzja+1); _Flt2 *p_ptr_a = &((*pa)[0]); int i; for (i=0;i CHMatrix<_Int,_Flt>::CHMatrix (const CHMatrix<_Int,_Flt> &_aa) { this->nzblk = _aa.nzblk; this->hmatr_str = _aa.hmatr_str; this->asub_arr.resize (_aa.nzblk+1); const CMatrix<_Int,_Flt> *pasub_arr_aa = &(_aa.asub_arr[0]); CMatrix<_Int,_Flt> *pasub_arr = &(this->asub_arr[0]); for (int i=0;i<_aa.nzblk;i++) pasub_arr[i] = pasub_arr_aa[i]; }; // Equality operator //======================================================================================== template CHMatrix<_Int,_Flt> &CHMatrix<_Int,_Flt>::operator= (const CHMatrix<_Int,_Flt> &_aa) { this->nzblk = _aa.nzblk; this->hmatr_str = _aa.hmatr_str; this->asub_arr.resize (_aa.nzblk+1); const CMatrix<_Int,_Flt> *pasub_arr_aa = &(_aa.asub_arr[0]); CMatrix<_Int,_Flt> *pasub_arr = &(this->asub_arr[0]); for (int i=0;i<_aa.nzblk;i++) pasub_arr[i] = pasub_arr_aa[i]; return *this; }; // // Init by data //======================================================================================== template CHMatrix<_Int,_Flt>::CHMatrix (int _iblk, int _nlist, _Int *_list, _Int *_ia, _Int *_ja, _Flt *_a, int _nblks, long long *_blks, int &_icycle, int *_imaskblk) { // Check list array int i; long long jj; for (i=0;i<_nlist;i++) { jj = _list[i]; if (jj < _blks[_iblk] || jj >= _blks[_iblk+1]) { throw " CHMatrix<>::CHMatrix: error: incorrect list array "; } } // Compute ja2 array int nzja = (int)_ia[_nlist]; vector<_Int> ja2 (nzja+1); _Int *pja2 = &ja2[0]; CHMatrix<_Int,_Flt>::ComputeJa2 (_nblks, _blks, _nlist, _ia, _ja, pja2); // Create list of block numbers _icycle++; int nlistblk = 0; vector listblk; int jblk; for (i=0;i 0) plistblk = &listblk[0]; sort (plistblk,plistblk+nlistblk); vector indlistblk (_nblks); int *pindlistblk = &indlistblk[0]; for (i=0;i nzblk (_nblks); int *pnzblk = &nzblk[0]; for (i=0;i > irowarr_blk (nlistblk+1); vector > icolarr_blk (nlistblk+1); vector > valarr_blk (nlistblk+1); vector<_Int> *pirowarr_blk = &irowarr_blk[0]; vector<_Int> *picolarr_blk = &icolarr_blk[0]; vector<_Flt> *pvalarr_blk = &valarr_blk[0]; for (i=0;inzblk = nlistblk; int ia_1blk[2]; ia_1blk[0] = 0; ia_1blk[1] = nlistblk; CMatrix hmatr_str_temp (1, &_iblk, ia_1blk, plistblk); this->hmatr_str.ReplaceFree (hmatr_str_temp); // For each block compute the list of rows and ia arrays int nimax = 0; int ni; for (i=0;i nimax) nimax = ni; } vector imask (nimax+1); vector list (nimax+1); vector indlist (nimax+1); int *pimask = &imask[0]; int *plist = &list[0]; int *pindlist = &indlist[0]; int icycle = -1; for (i=0;i nlistarr_blk (nlistblk+1); vector > listarr_blk (nlistblk+1); vector > iaarr_blk (nlistblk+1); int *pnlistarr_blk = &nlistarr_blk[0]; vector<_Int> *plistarr_blk = &listarr_blk[0]; vector<_Int> *piaarr_blk = &iaarr_blk[0]; for (i=0;i nimax) { cout << " Wait !!! " << endl; } if (pimask[jj] != icycle) { plist[nlistloc] = (int)jj; nlistloc++; pimask[jj] = icycle; } } pnlistarr_blk[i] = nlistloc; sort (plist,plist+nlistloc); for (j=0;j > asub_arr_temp (nlistblk+1); CMatrix<_Int,_Flt> *pasub_arr_temp = &asub_arr_temp[0]; for (i=0;i *plist_temp = pasub_arr_temp[i].GetList(); vector<_Int> *pia_temp = pasub_arr_temp[i].GetIa(); vector<_Int> *pja_temp = pasub_arr_temp[i].GetJa(); vector<_Flt> *pa_temp = pasub_arr_temp[i].GetA(); pasub_arr_temp[i].SetNlist(pnlistarr_blk[i]); pasub_arr_temp[i].SetNzja(pnzblk[jblk]); pasub_arr_temp[i].SetNza(pnzblk[jblk]); plist_temp->swap(plistarr_blk[i]); pia_temp->swap(piaarr_blk[i]); pja_temp->swap(picolarr_blk[i]); pa_temp->swap(pvalarr_blk[i]); } this->asub_arr.resize (nlistblk+1); CMatrix<_Int,_Flt> *pasub_arr = &(this->asub_arr[0]); for (i=0;i void CHMatrix<_Int,_Flt>::SymmetrizeSubmatrices (void *_comm, int _nblks, long long *_blks, int *_blk2cpu, CHMatrix<_Int,_Flt> *_hmatr_arr, CHMatrix<_Int,_Flt> *_hmatr_symm_arr) { int myid = CExchange::GetMyid (_comm); int nproc = CExchange::GetNproc (_comm); // Create cpu reg data int icyclecpu = -1; vector imaskcpu (nproc); vector listcpu (nproc); vector indcpu (nproc); int *pimaskcpu = &imaskcpu[0]; int *plistcpu = &listcpu[0]; int *pindcpu = &indcpu[0]; int i; for (i=0;i *pHMatr_sub = _hmatr_arr[iblk].GetHMatrStr(); int nlist_hmatr = pHMatr_sub->GetNlist(); int *pia_hmatr = pHMatr_sub->GetIaArr(); int *pja_hmatr = pHMatr_sub->GetJaArr(); for (i=0;i nzblk_cpu (nlistcpu+1); int *pnzblk_cpu = &nzblk_cpu[0]; for (i=0;i *pHMatr_sub = _hmatr_arr[iblk].GetHMatrStr(); int nlist_hmatr = pHMatr_sub->GetNlist(); int *pia_hmatr = pHMatr_sub->GetIaArr(); int *pja_hmatr = pHMatr_sub->GetJaArr(); for (i=0;i > hblk_send (nlistcpu+1); CHMatrix<_Int,_Flt> *phblk_send = &hblk_send[0]; for (i=0;i astr_temp; astr_temp.ResizeAndSetAllSp (0, 0, pnzblk_cpu[i], pnzblk_cpu[i]); CMatrix *pHMatrStr = phblk_send[i].GetHMatrStr(); pHMatrStr->ReplaceFree (astr_temp); phblk_send[i].SetNzblk (pnzblk_cpu[i]); phblk_send[i].ResizeASub (pnzblk_cpu[i]+1); } int nimax = 0; int niloc; for (i=0;i<_nblks;i++) { niloc = (int)(_blks[i+1]-_blks[i]); if (niloc > nimax) nimax = niloc; } vector imaskblk (nimax+1); vector listblk (nimax+1); vector indblk (nimax+1); vector iablk (nimax+1); vector iptrblk (nimax+1); int *pimaskblk = &imaskblk[0]; int *plistblk = &listblk[0]; int *pindblk = &indblk[0]; int *piablk = &iablk[0]; int *piptrblk = &iptrblk[0]; int icycleblk = -1; for (i=0;i *pA_sub = _hmatr_arr[iblk].GetASubArr(); CMatrix *pHMatr_sub = _hmatr_arr[iblk].GetHMatrStr(); int nlist_hmatr = pHMatr_sub->GetNlist(); int *pia_hmatr = pHMatr_sub->GetIaArr(); int *pja_hmatr = pHMatr_sub->GetJaArr(); for (i=0;i *pA_cpu = phblk_send[ind].GetASubArr(); CMatrix *pHMatr_cpu = phblk_send[ind].GetHMatrStr(); int *pja_cpu = pHMatr_cpu->GetJaArr(); int *pja2_cpu = pHMatr_cpu->GetJa2Arr(); pja_cpu[k] = jj; pja2_cpu[k] = iblk; CMatrix<_Int,_Flt> a_sp; a_sp.GetSparsity (pA_sub[j]); a_sp.TransposedSparsityListSp (icycleblk, pimaskblk, pindblk, piptrblk, plistblk, piablk, pA_cpu[k]); pnzblk_cpu[ind]++; } } } } } // Pack send data vector CpuIDSend (nlistcpu); vector > ObjSend (nlistcpu); int *pCpuIDSend = NULL; vector *pObjSend = NULL; if (nlistcpu > 0) { pCpuIDSend = &CpuIDSend[0]; pObjSend = &ObjSend[0]; } long long isize; char *pobj; for (i=0;i CpuIDRecv; vector > ObjRecv; CExchange::DataExchange (_comm, CpuIDSend, ObjSend, CpuIDRecv, ObjRecv); // Free send data { vector CpuIDSend_temp; vector > ObjSend_temp; CpuIDSend.swap (CpuIDSend_temp); ObjSend.swap (ObjSend_temp); } // Unpack receive data int nrecv = (int) CpuIDRecv.size(); vector *pObjRecv = NULL; if (nrecv > 0) { pObjRecv = &ObjRecv[0]; } vector > hblk_recv (nrecv+1); CHMatrix<_Int,_Flt> *phblk_recv = &hblk_recv[0]; for (i=0;i CpuIDRecv_temp; vector > ObjRecv_temp; CpuIDRecv.swap (CpuIDRecv_temp); ObjRecv.swap (ObjRecv_temp); } // Store received block sparsity for each own block row vector iablk_recv (_nblks+1); int *piablk_recv = &iablk_recv[0]; for (i=0;i<=_nblks;i++) piablk_recv[i] = 0; for (i=0;i *pHMatr_sub = phblk_recv[i].GetHMatrStr(); int nzja_hmatr = pHMatr_sub->GetNzja(); int *pja_hmatr = pHMatr_sub->GetJaArr(); for (j=0;j iptr_recv (_nblks); vector jablk_recv (nzblk_recv+1); vector > ablk_recv (nzblk_recv+1); int *piptr_recv = &iptr_recv[0]; int *pjablk_recv = &jablk_recv[0]; CMatrix<_Int,_Flt> *pablk_recv = &ablk_recv[0]; for (i=0;i<_nblks;i++) piptr_recv[i] = piablk_recv[i]; int jblk; for (i=0;i *pA_sub = phblk_recv[i].GetASubArr(); CMatrix *pHMatr_sub = phblk_recv[i].GetHMatrStr(); int nzja_hmatr = pHMatr_sub->GetNzja(); int *pja_hmatr = pHMatr_sub->GetJaArr(); int *pja2_hmatr = pHMatr_sub->GetJa2Arr(); for (j=0;j *pA_sub = _hmatr_arr[iblk].GetASubArr(); CMatrix *pHMatr_sub = _hmatr_arr[iblk].GetHMatrStr(); int nzja_hmatr = pHMatr_sub->GetNzja(); int *pja_hmatr = pHMatr_sub->GetJaArr(); ind = -1; for (j=0;j::SymmetrizeSubmatrices: error: incorrect block sparsity "; } CMatrix<_Int,_Flt> a_sp; a_sp.GetSparsity (pA_sub[ind]); a_sp.TransposedSparsityListSp (icycleblk, pimaskblk, pindblk, piptrblk, plistblk, piablk, pablk_recv[k]); piptr_recv[iblk]++; } } // Free received hblock data for (i=0;i iiarr (_nblks); CSortInt *piiarr = &iiarr[0]; int nlist_t, ibeg, nlist_new, ip, ip_t, jj_t; for (iblk=0;iblk<_nblks;iblk++) { if (_blk2cpu[iblk] == myid) { nlist_t = piablk_recv[iblk+1]-piablk_recv[iblk]; ibeg = piablk_recv[iblk]; for (j=0;j *pA_sub = _hmatr_arr[iblk].GetASubArr(); CMatrix *pHMatr_sub = _hmatr_arr[iblk].GetHMatrStr(); int nzja_hmatr = pHMatr_sub->GetNzja(); int *pja_hmatr = pHMatr_sub->GetJaArr(); nlist_new = 0; ip = 0; ip_t = 0; while (ip < nzja_hmatr || ip_t < nlist_t) { if (ip < nzja_hmatr && ip_t < nlist_t) { jj = pja_hmatr[ip]; jj_t = piiarr[ip_t].ival; if (jj == jj_t) { nlist_new++; ip++; ip_t++; } else if (jj < jj_t) { nlist_new++; ip++; } else { nlist_new++; ip_t++; } } else if (ip < nzja_hmatr) { nlist_new++; ip++; } else { nlist_new++; ip_t++; } } _hmatr_symm_arr[iblk].ResizeASub (nlist_new+1); _hmatr_symm_arr[iblk].SetNzblk (nlist_new); CMatrix<_Int,_Flt> *pASymm_sub = _hmatr_symm_arr[iblk].GetASubArr(); CMatrix *pHMatrSymm_sub = _hmatr_symm_arr[iblk].GetHMatrStr(); pHMatrSymm_sub->ResizeAndSetAllSp (1, 0, nlist_new, 0); int *plist_hmatrsymm = pHMatrSymm_sub->GetListArr(); int *pia_hmatrsymm = pHMatrSymm_sub->GetIaArr(); int *pja_hmatrsymm = pHMatrSymm_sub->GetJaArr(); plist_hmatrsymm[0] = iblk; pia_hmatrsymm[0] = 0; pia_hmatrsymm[1] = nlist_new; nlist_new = 0; ip = 0; ip_t = 0; while (ip < nzja_hmatr || ip_t < nlist_t) { if (ip < nzja_hmatr && ip_t < nlist_t) { jj = pja_hmatr[ip]; jj_t = piiarr[ip_t].ival; if (jj == jj_t) { pja_hmatrsymm[nlist_new] = jj; ind = piiarr[ip_t].i2val; pA_sub[ip].ExtendSparsity (pablk_recv[ind], pASymm_sub[nlist_new]); nlist_new++; ip++; ip_t++; } else if (jj < jj_t) { pja_hmatrsymm[nlist_new] = jj; pASymm_sub[nlist_new] = pA_sub[ip]; nlist_new++; ip++; } else { pja_hmatrsymm[nlist_new] = jj_t; ind = piiarr[ip_t].i2val; CMatrix<_Int,_Flt> atemp; atemp.ExtendSparsity (pablk_recv[ind], pASymm_sub[nlist_new]); nlist_new++; ip_t++; } } else if (ip < nzja_hmatr) { pja_hmatrsymm[nlist_new] = pja_hmatr[ip]; pASymm_sub[nlist_new] = pA_sub[ip]; nlist_new++; ip++; } else { pja_hmatrsymm[nlist_new] = piiarr[ip_t].ival; ind = piiarr[ip_t].i2val; CMatrix<_Int,_Flt> atemp; atemp.ExtendSparsity (pablk_recv[ind], pASymm_sub[nlist_new]); nlist_new++; ip_t++; } } } } } // Compute the extended lists //======================================================================================== template void CHMatrix<_Int,_Flt>::ExtendedLists (void *_comm, int _ncycle, int _nblks, long long *_blks, int *_blk2cpu, CHMatrix<_Int,_Flt> *_hmatr_arr, int *_nlist_ext_arr, vector *_list_ext_arr) { int myid = CExchange::GetMyid (_comm); int nproc = CExchange::GetNproc (_comm); // Compute the maximal block size int nimax = 0; int i; int niloc; for (i=0;i<_nblks;i++) { niloc = (int)(_blks[i+1]-_blks[i]); if (niloc > nimax) nimax = niloc; } vector imaskblk (nimax+1); vector listblk (nimax+1); vector iablk (nimax+1); int *pimaskblk = &imaskblk[0]; int *plistblk = &listblk[0]; int *piablk = &iablk[0]; int icycleblk = -1; for (i=0;i *pA_sub = _hmatr_arr[iblk].GetASubArr(); CMatrix *pHMatr_sub = _hmatr_arr[iblk].GetHMatrStr(); int nzja_hmatr = pHMatr_sub->GetNzja(); int *pja_hmatr = pHMatr_sub->GetJaArr(); nlistnew = 0; for (j=0;j > blkrows (_nblks); CMatrix<_Int,_Flt> *pblkrows = &blkrows[0]; int nzjanew, ind, kj; for (iblk=0;iblk<_nblks;iblk++) { if (_blk2cpu[iblk] == myid) { CMatrix<_Int,_Flt> *pA_sub = _hmatr_arr[iblk].GetASubArr(); CMatrix *pHMatr_sub = _hmatr_arr[iblk].GetHMatrStr(); int nzja_hmatr = pHMatr_sub->GetNzja(); int *pja_hmatr = pHMatr_sub->GetJaArr(); nzjanew = 0; for (j=0;j anew; anew.ResizeAndSetAllSp (nlistnew,nlistnew,nzjanew,nzjanew); _Int *plist_new = anew.GetListArr(); _Int *plist2_new = anew.GetList2Arr(); _Int *pia_new = anew.GetIaArr(); _Int *pja_new = anew.GetJaArr(); _Int *pja2_new = anew.GetJa2Arr(); for (j=0;j nlist_arr (_nblks); vector > list_arr (_nblks); vector nlist_arr_prev (_nblks); vector > list_arr_prev (_nblks); int *pnlist_arr_prev = &nlist_arr_prev[0]; int *pnlist_arr = &nlist_arr[0]; vector *plist_arr = &list_arr[0]; vector *plist_arr_prev = &list_arr_prev[0]; for (iblk=0;iblk<_nblks;iblk++) pnlist_arr_prev[iblk] = 0; for (iblk=0;iblk<_nblks;iblk++) pnlist_arr[iblk] = 0; int jj2; for (iblk=0;iblk<_nblks;iblk++) { if (_blk2cpu[iblk] == myid) { nlistnew = pblkrows[iblk].GetNlist(); nzjanew = pblkrows[iblk].GetNzja(); _Int *pia_row = pblkrows[iblk].GetIaArr(); _Int *pja_row = pblkrows[iblk].GetJaArr(); _Int *pja2_row = pblkrows[iblk].GetJa2Arr(); int nzjaflt = 0; for (j=0;j ii2arr (nzjaflt+1); CSortInt2 *pii2arr = &ii2arr[0]; nzjaflt = 0; for (j=0;j imaskcpu (nproc); vector listcpu (nproc); vector indcpu (nproc); int *pimaskcpu = &imaskcpu[0]; int *plistcpu = &listcpu[0]; int *pindcpu = &indcpu[0]; for (i=0;i imasktot (_nblks); vector listtot (_nblks); vector indtot (_nblks); int *pimasktot = &imasktot[0]; int *plisttot = &listtot[0]; int *pindtot = &indtot[0]; for (i=0;i<_nblks;i++) { pimasktot[i] = icycletot; } // Main extention cycle int icycle_ext, jcpu; for (icycle_ext=0;icycle_ext<_ncycle-1;icycle_ext++) { // Create list of cpus and count sizes icyclecpu++; int nlistcpu = 0; for (iblk=0;iblk<_nblks;iblk++) { if (_blk2cpu[iblk] == myid) { int *pplist_arr_prev = &(plist_arr_prev[iblk][0]); for (j=0;j nz_send (nlistcpu+1); int *pnz_send = &nz_send[0]; for (i=0;i > hblk_send (nlistcpu+1); CHMatrix<_Int,_Flt> *phblk_send = &hblk_send[0]; for (i=0;i *pA_sub = phblk_send[i].GetASubArr(); CMatrix<_Int,_Flt> ablk_temp; ablk_temp.ResizeAndSetAllSp (0, 0, 3*pnz_send[i], 0); pA_sub->ReplaceFree (ablk_temp); } for (i=0;i *pA_sub = phblk_send[ind].GetASubArr(); _Int *pja_asub = pA_sub->GetJaArr(); k = pnz_send[ind]; pja_asub[3*k] = iblk; pja_asub[3*k+1] = jj; pja_asub[3*k+2] = jj2; pnz_send[ind]++; } } } // Pack send data vector CpuIDSend (nlistcpu); vector > ObjSend (nlistcpu); int *pCpuIDSend = NULL; vector *pObjSend = NULL; if (nlistcpu > 0) { pCpuIDSend = &CpuIDSend[0]; pObjSend = &ObjSend[0]; } long long isize; char *pobj; for (i=0;i CpuIDRecv; vector > ObjRecv; CExchange::DataExchange (_comm, CpuIDSend, ObjSend, CpuIDRecv, ObjRecv); // Free send data { vector CpuIDSend_temp; vector > ObjSend_temp; CpuIDSend.swap (CpuIDSend_temp); ObjSend.swap (ObjSend_temp); } // Unpack receive data int nrecv = (int) CpuIDRecv.size(); vector *pObjRecv = NULL; if (nrecv > 0) { pObjRecv = &ObjRecv[0]; } vector > hblk_recv (nrecv+1); CHMatrix<_Int,_Flt> *phblk_recv = &hblk_recv[0]; for (i=0;i > ObjRecv_temp; ObjRecv.swap (ObjRecv_temp); } // Prepare the answer for all received data for (i=0;i *pA_sub = phblk_recv[i].GetASubArr(); int nzjaloc = pA_sub->GetNzja(); _Int *pja_sub = pA_sub->GetJaArr(); int nlistloc = nzjaloc / 3; int nzja_new = 0; for (j=0;j a_new; a_new.ResizeAndSetAllSp (nlistloc, 0, nzja_new, nzja_new); _Int *plist_new = a_new.GetListArr(); _Int *pia_new = a_new.GetIaArr(); _Int *pja_new = a_new.GetJaArr(); _Int *pja2_new = a_new.GetJa2Arr(); nzja_new = 0; pia_new[0] = 0; for (j=0;jReplaceFree (a_new); } // Pack send data ObjRecv.resize (nrecv); pObjRecv = NULL; if (nrecv > 0) { pObjRecv = &ObjRecv[0]; } for (i=0;i CpuIDSend_temp; vector > ObjSend_temp; CpuIDRecv.swap (CpuIDSend_temp); ObjRecv.swap (ObjSend_temp); } // Unpack receive data nrecv = (int) CpuIDSend.size(); pObjSend = NULL; if (nrecv > 0) { pObjSend = &ObjSend[0]; } hblk_send.resize (nrecv+1); phblk_send = &hblk_send[0]; for (i=0;i CpuIDRecv_temp; vector > ObjRecv_temp; CpuIDSend.swap (CpuIDRecv_temp); ObjSend.swap (ObjRecv_temp); } // Combine received data into the new lists icycletot++; int nlisttot = 0; for (i=0;i *pA_sub = phblk_send[i].GetASubArr(); int nlist_sub = pA_sub->GetNlist(); _Int *plist_sub = pA_sub->GetListArr(); for (j=0;j nz_list_new (nlisttot+1); int *pnz_list_new = &nz_list_new[0]; for (i=0;i *pA_sub = phblk_send[i].GetASubArr(); int nlist_sub = pA_sub->GetNlist(); _Int *plist_sub = pA_sub->GetListArr(); _Int *pia_sub = pA_sub->GetIaArr(); for (j=0;j > list_new (nlisttot+1); vector *plist_new = &list_new[0]; for (i=0;i *pA_sub = phblk_send[i].GetASubArr(); int nlist_sub = pA_sub->GetNlist(); _Int *plist_sub = pA_sub->GetListArr(); _Int *pia_sub = pA_sub->GetIaArr(); _Int *pja_sub = pA_sub->GetJaArr(); _Int *pja2_sub = pA_sub->GetJa2Arr(); for (j=0;j ii2arr (pnz_list_new[i]+1); CSortInt2 *pii2arr = &ii2arr[0]; for (j=0;j list_add (2*(pnlist_arr[iblk]+nznew)+1); vector list_flt (2*nznew+1); int *plist_add = &list_add[0]; int *plist_flt = &list_flt[0]; int nlistadd = 0; int nlistflt = 0; int jj_new, jj2_new; int ip = 0; int ip_new = 0; while (ip < pnlist_arr[iblk] || ip_new < nznew) { if (ip < pnlist_arr[iblk] && ip_new < nznew) { jj = pplist_arr[2*ip]; jj2 = pplist_arr[2*ip+1]; jj_new = pii2arr[ip_new].iyval; jj2_new = pii2arr[ip_new].ixval; if (jj == jj_new && jj2 == jj2_new) { plist_add[2*nlistadd] = jj; plist_add[2*nlistadd+1] = jj2; nlistadd++; ip++; ip_new++; } else if (jj2 < jj2_new || (jj2 == jj2_new && jj < jj_new)) { plist_add[2*nlistadd] = jj; plist_add[2*nlistadd+1] = jj2; nlistadd++; ip++; } else { plist_add[2*nlistadd] = jj_new; plist_add[2*nlistadd+1] = jj2_new; nlistadd++; plist_flt[2*nlistflt] = jj_new; plist_flt[2*nlistflt+1] = jj2_new; nlistflt++; ip_new++; } } else if (ip < pnlist_arr[iblk]) { plist_add[2*nlistadd] = pplist_arr[2*ip]; plist_add[2*nlistadd+1] = pplist_arr[2*ip+1]; nlistadd++; ip++; } else { plist_add[2*nlistadd] = pii2arr[ip_new].iyval; plist_add[2*nlistadd+1] = pii2arr[ip_new].ixval; nlistadd++; plist_flt[2*nlistflt] = pii2arr[ip_new].iyval; plist_flt[2*nlistflt+1] = pii2arr[ip_new].ixval; nlistflt++; ip_new++; } } // Replace pnlist_arr[iblk] = nlistadd; pnlist_arr_prev[iblk] = nlistflt; plist_arr[iblk].swap(list_add); plist_arr_prev[iblk].swap(list_flt); } } // Prepare final data for (iblk=0;iblk<_nblks;iblk++) { if (_blk2cpu[iblk] == myid) { int *pplist_arr = &(plist_arr[iblk][0]); _nlist_ext_arr[iblk] = pnlist_arr[iblk]; _list_ext_arr[iblk].resize(2*pnlist_arr[iblk]+1); int *pplist_ext = &(_list_ext_arr[iblk][0]); for (j=0;j<2*(pnlist_arr[iblk]);j++) pplist_ext[j] = pplist_arr[j]; } } } // Get the extended submatrices //======================================================================================== template void CHMatrix<_Int,_Flt>::GetExtendedSubmatrices (void *_comm, int _nblks, long long *_blks, int *_blk2cpu, CHMatrix<_Int,_Flt> *_hmatr_arr, int *_nlist_ext_arr, vector *_list_ext_arr, CMatrix<_Int,_Flt> *_matr_ext_arr) { int myid = CExchange::GetMyid (_comm); int nproc = CExchange::GetNproc (_comm); // Compute the maximal block size int nimax = 0; int i; int niloc; for (i=0;i<_nblks;i++) { niloc = (int)(_blks[i+1]-_blks[i]); if (niloc > nimax) nimax = niloc; } vector imaskblk (nimax+1); vector listblk (nimax+1); vector iablk (nimax+1); int *pimaskblk = &imaskblk[0]; int *plistblk = &listblk[0]; int *piablk = &iablk[0]; int icycleblk = -1; for (i=0;i > blkrows (_nblks); CMatrix<_Int,_Flt> *pblkrows = &blkrows[0]; int nzjanew, ind, kj; int iblk, j, jj, k, kk, nlistnew; for (iblk=0;iblk<_nblks;iblk++) { if (_blk2cpu[iblk] == myid) { CMatrix<_Int,_Flt> *pA_sub = _hmatr_arr[iblk].GetASubArr(); CMatrix *pHMatr_sub = _hmatr_arr[iblk].GetHMatrStr(); int nzja_hmatr = pHMatr_sub->GetNzja(); int *pja_hmatr = pHMatr_sub->GetJaArr(); nzjanew = 0; for (j=0;j anew; anew.ResizeAndSetAll (nlistnew,nlistnew,nzjanew,nzjanew,nzjanew); _Int *plist_new = anew.GetListArr(); _Int *plist2_new = anew.GetList2Arr(); _Int *pia_new = anew.GetIaArr(); _Int *pja_new = anew.GetJaArr(); _Int *pja2_new = anew.GetJa2Arr(); _Flt *pa_new = anew.GetAArr(); for (j=0;j imaskcpu (nproc); vector listcpu (nproc); vector indcpu (nproc); int *pimaskcpu = &imaskcpu[0]; int *plistcpu = &listcpu[0]; int *pindcpu = &indcpu[0]; for (i=0;i imasktot (_nblks); vector listtot (_nblks); vector indtot (_nblks); vector indtot2 (_nblks); int *pimasktot = &imasktot[0]; int *plisttot = &listtot[0]; int *pindtot = &indtot[0]; int *pindtot2 = &indtot2[0]; for (i=0;i<_nblks;i++) { pimasktot[i] = icycletot; } // Create list of cpus and count sizes icyclecpu++; int nlistcpu = 0; int jj2, jcpu; for (iblk=0;iblk<_nblks;iblk++) { if (_blk2cpu[iblk] == myid) { int *pplist_ext_arr = &(_list_ext_arr[iblk][0]); for (j=0;j<_nlist_ext_arr[iblk];j++) { jj2 = (int)pplist_ext_arr[j*2+1]; jcpu = _blk2cpu[jj2]; if (pimaskcpu[jcpu] != icyclecpu) { plistcpu[nlistcpu] = jcpu; nlistcpu++; pimaskcpu[jcpu] = icyclecpu; } } } } sort (plistcpu,plistcpu+nlistcpu); for (i=0;i nz_send (nlistcpu+1); int *pnz_send = &nz_send[0]; for (i=0;i > hblk_send (nlistcpu+1); CHMatrix<_Int,_Flt> *phblk_send = &hblk_send[0]; for (i=0;i *pA_sub = phblk_send[i].GetASubArr(); CMatrix<_Int,_Flt> ablk_temp; ablk_temp.ResizeAndSetAllSp (0, 0, 3*pnz_send[i], 0); pA_sub->ReplaceFree (ablk_temp); } for (i=0;i *pA_sub = phblk_send[ind].GetASubArr(); _Int *pja_asub = pA_sub->GetJaArr(); k = pnz_send[ind]; pja_asub[3*k] = iblk; pja_asub[3*k+1] = jj; pja_asub[3*k+2] = jj2; pnz_send[ind]++; } } } // Pack send data vector CpuIDSend (nlistcpu); vector > ObjSend (nlistcpu); int *pCpuIDSend = NULL; vector *pObjSend = NULL; if (nlistcpu > 0) { pCpuIDSend = &CpuIDSend[0]; pObjSend = &ObjSend[0]; } long long isize; char *pobj; for (i=0;i CpuIDRecv; vector > ObjRecv; CExchange::DataExchange (_comm, CpuIDSend, ObjSend, CpuIDRecv, ObjRecv); // Free send data { vector CpuIDSend_temp; vector > ObjSend_temp; CpuIDSend.swap (CpuIDSend_temp); ObjSend.swap (ObjSend_temp); } // Unpack receive data int nrecv = (int) CpuIDRecv.size(); vector *pObjRecv = NULL; if (nrecv > 0) { pObjRecv = &ObjRecv[0]; } vector > hblk_recv (nrecv+1); CHMatrix<_Int,_Flt> *phblk_recv = &hblk_recv[0]; for (i=0;i > ObjRecv_temp; ObjRecv.swap (ObjRecv_temp); } // Prepare the answer for all received data for (i=0;i *pA_sub = phblk_recv[i].GetASubArr(); int nzjaloc = pA_sub->GetNzja(); _Int *pja_sub = pA_sub->GetJaArr(); int nlistloc = nzjaloc / 3; int nzja_new = 0; for (j=0;j a_new; a_new.ResizeAndSetAll (nlistloc, nlistloc*2, nzja_new, nzja_new, nzja_new); _Int *plist_new = a_new.GetListArr(); _Int *plist2_new = a_new.GetList2Arr(); _Int *pia_new = a_new.GetIaArr(); _Int *pja_new = a_new.GetJaArr(); _Int *pja2_new = a_new.GetJa2Arr(); _Flt *pa_new = a_new.GetAArr(); nzja_new = 0; pia_new[0] = 0; for (j=0;jReplaceFree (a_new); } // Pack send data ObjRecv.resize (nrecv); pObjRecv = NULL; if (nrecv > 0) { pObjRecv = &ObjRecv[0]; } for (i=0;i CpuIDSend_temp; vector > ObjSend_temp; CpuIDRecv.swap (CpuIDSend_temp); ObjRecv.swap (ObjSend_temp); } // Unpack receive data nrecv = (int) CpuIDSend.size(); pObjSend = NULL; if (nrecv > 0) { pObjSend = &ObjSend[0]; } hblk_send.resize (nrecv+1); phblk_send = &hblk_send[0]; for (i=0;i CpuIDRecv_temp; vector > ObjRecv_temp; CpuIDSend.swap (CpuIDRecv_temp); ObjSend.swap (ObjRecv_temp); } // Combine received data into the new lists and add local data icycletot++; int nlisttot = 0; for (i=0;i *pA_sub = phblk_send[i].GetASubArr(); int nlist_sub = pA_sub->GetNlist(); _Int *plist_sub = pA_sub->GetListArr(); for (j=0;j nlist_new (_nblks); vector nz_list_new (_nblks); int *pnlist_new = &nlist_new[0]; int *pnz_list_new = &nz_list_new[0]; for (i=0;i<_nblks;i++) pnlist_new[i] = 0; for (i=0;i<_nblks;i++) pnz_list_new[i] = 0; for (i=0;i *pA_sub = phblk_send[i].GetASubArr(); int nlist_sub = pA_sub->GetNlist(); _Int *plist_sub = pA_sub->GetListArr(); _Int *pia_sub = pA_sub->GetIaArr(); for (j=0;j > matr_arr_new (_nblks); CMatrix<_Int,_Flt> *pmatr_arr_new = &matr_arr_new[0]; for (i=0;i *pA_sub = phblk_send[i].GetASubArr(); int nlist_sub = pA_sub->GetNlist(); _Int *plist_sub = pA_sub->GetListArr(); _Int *plist2_sub = pA_sub->GetList2Arr(); _Int *pia_sub = pA_sub->GetIaArr(); _Int *pja_sub = pA_sub->GetJaArr(); _Int *pja2_sub = pA_sub->GetJa2Arr(); _Flt *pa_sub = pA_sub->GetAArr(); for (j=0;j ia_list (nlisttot+1); vector ja_list (nlist_new+1); vector iptr (nlisttot+1); int *pia_list = &ia_list[0]; int *pja_list = &ja_list[0]; int *piptr = &iptr[0]; for (j=0;j<=nlisttot;j++) pia_list[j] = 0; for (j=0;j imask_elems (nzja_new+1); vector ia_elems (nlisttot+1); vector ind_elems (nzja_new+1); int *pimask_elems = &imask_elems[0]; int *pia_elems = &ia_elems[0]; int *pind_elems = &ind_elems[0]; for (j=0;j= 0) nzja_flt++; } CMatrix<_Int,_Flt> a_flt; a_flt.ResizeAndSetAll (nlist_new, 0, nzja_flt, 0, nzja_flt); _Int *plist_flt = a_flt.GetListArr(); _Int *pia_flt = a_flt.GetIaArr(); _Int *pja_flt = a_flt.GetJaArr(); _Flt *pa_flt = a_flt.GetAArr(); for (j=0;j= 0) { pja_flt[nzja_flt] = pimask_elems[j]; pa_flt[nzja_flt] = pa_new[j]; nzja_flt++; } } pia_flt[i+1] = nzja_flt; } int njmax = 0; int njloc = 0; for (j=0;j njmax) njmax = njloc; } vector iiarr (njmax+1); vector<_Flt> elems (njmax+1); CSortInt *piiarr = &iiarr[0]; _Flt *pelems = &elems[0]; int ibeg, jloc; for (i=0;i long long CHMatrix<_Int,_Flt>::GetPackedSize () const { long long length = sizeof(int); length += (this->nzblk+2) * sizeof(long long); length += this->hmatr_str.GetPackedSize(); if (this->nzblk > 0) { const CMatrix<_Int,_Flt> *pABlocks = &(this->asub_arr[0]); int i; for (i=0;inzblk;i++) { length += pABlocks[i].GetPackedSize(); } } return length; } // Fill by packed data //======================================================================================== template void CHMatrix<_Int,_Flt>::FillPacked (long long _length, char *_obj) const { char* pLoc; pLoc = _obj; int *pHead; long long *pibsblk; pHead = (int *) pLoc; pLoc += sizeof(int); pibsblk = (long long *) pLoc; pLoc += (this->nzblk+2) * sizeof(long long); int sizeloc = this->hmatr_str.GetPackedSize(); pibsblk[0] = pLoc-_obj; pLoc += sizeloc; pibsblk[1] = pLoc-_obj; int i; if (this->nzblk > 0) { const CMatrix<_Int,_Flt> *pABlocks = &(this->asub_arr[0]); for (i=0;inzblk;i++) { sizeloc = pABlocks[i].GetPackedSize (); pibsblk[i+1] = pLoc-_obj; pLoc += sizeloc; } pibsblk[this->nzblk+1] = pLoc-_obj; } pHead[0] = nzblk; if (pLoc-_obj != _length) { throw " CHMatrix<_Int,_Flt>::FillPacked: incorrect length on entry "; } // Pack sparsity block sizeloc = (int)(pibsblk[1]-pibsblk[0]); this->hmatr_str.FillPacked (sizeloc, _obj+pibsblk[0]); // Pack blocks if (this->nzblk > 0) { const CMatrix<_Int,_Flt> *pABlocks = &(this->asub_arr[0]); for (i=0;inzblk;i++) { sizeloc = (int)(pibsblk[i+2]-pibsblk[i+1]); pABlocks[i].FillPacked (sizeloc, _obj+pibsblk[i+1]); } } } // Fill by packed data //======================================================================================== template void CHMatrix<_Int,_Flt>::UnPack (long long _length, char *_obj) { // Get head data char *pLoc; pLoc = _obj; int *pHead; pHead = (int *) pLoc; pLoc += sizeof(int); int nzblk_loc = pHead[0]; long long *pibsblk; pibsblk = (long long *) pLoc; pLoc += (nzblk_loc+2)*sizeof(long long); this->nzblk = nzblk_loc; // Unpack sparsity int sizeloc = (int)(pibsblk[1]-pibsblk[0]); this->hmatr_str.UnPack (sizeloc, pLoc); pLoc += sizeloc; // Pack blocks this->asub_arr.resize (nzblk_loc+1); CMatrix<_Int,_Flt> *pABlocks = &(this->asub_arr[0]); int i; for (i=0;i void CHMatrix<_Int,_Flt>::FilterListsBack (int _myid, int _nblks, int *_blk2cpu, int *_nlist_ext_arr, vector *_list_ext_arr) { int iblk, i, jj, jj2; for (iblk=0;iblk<_nblks;iblk++) { if (_blk2cpu[iblk] == _myid) { int *plist_ext = &(_list_ext_arr[iblk][0]); vector list_flt (2*_nlist_ext_arr[iblk]+1); int *plist_flt = &list_flt[0]; int nlistflt = 0; for (i=0;i<_nlist_ext_arr[iblk];i++) { jj = plist_ext[2*i]; jj2 = plist_ext[2*i+1]; if (jj2 < iblk) { plist_flt[nlistflt*2] = jj; plist_flt[nlistflt*2+1] = jj2; nlistflt++; } } _nlist_ext_arr[iblk] = nlistflt; _list_ext_arr[iblk].swap(list_flt); } } }; // Compute Ja2 array by binary search //======================================================================================== template void CHMatrix<_Int,_Flt>::ComputeJa2 (int _nblks, long long *_blks, int _nlist, const _Int *_ia, const _Int *_ja, _Int *_ja2) { int nzja = (int)_ia[_nlist]; int i; int iblk, iblkprev; long long jj; iblkprev = -1; for (i=0;i= 0 && iblkprev < _nblks) { if (jj >= _blks[iblkprev] && jj < _blks[iblkprev+1]) { iblk = iblkprev; } else { iblk = BinarySearch (jj, _nblks, _blks, iblkprev); } } else { iblk = BinarySearch (jj, _nblks, _blks, iblkprev); } _ja2[i] = iblk; iblkprev = iblk; } }; /// @brief Split structural matrix into the set of matrices //======================================================================================== template void CHMatrix<_Int,_Flt>::SplitMatrSpIntoHMatrSp (int _nblks, long long *_blks, const CMatrix<_Int,_Flt> &_amatr, CHMatrix<_Int,_Flt> &_ahmatr) { // Open sparsity const int nlistloc_ablk = _amatr.GetNlist(); const int nzjaloc_ablk = _amatr.GetNzja(); const _Int *pialoc_ablk = _amatr.GetIaArr(); const _Int *pjaloc_ablk = _amatr.GetJaArr(); if (nlistloc_ablk != _blks[_nblks]) throw " CHMatrix<_Int,_Flt>::SplitMatrSpIntoHMatrSp: incorrect matrix on entry "; // Compute block indices vector<_Int> ja2loc(nzjaloc_ablk+1); _Int *pja2loc = &ja2loc[0]; CHMatrix<_Int,_Flt>::ComputeJa2 (_nblks, _blks, nlistloc_ablk, pialoc_ablk, pjaloc_ablk, pja2loc); // Compute block sparsity vector imaskblk(_nblks); vector listblk(_nblks); int *pimaskblk = &imaskblk[0]; int *plistblk = &listblk[0]; int i; for (i=0;i<_nblks;i++) pimaskblk[i] = -1; int icycle = -1; vector ia_blk(_nblks+1); int *pia_blk = &ia_blk[0]; for (i=0;i<=_nblks;i++) pia_blk[i] = 0; int ihblk, nlistloc, j, jhblk; for (ihblk=0;ihblk<_nblks;ihblk++) { icycle++; nlistloc = 0; for (i=(int)_blks[ihblk];i<_blks[ihblk+1];i++) { for (j=(int)pialoc_ablk[i];j ja_blk(nzjablk+1); int *pja_blk = &ja_blk[0]; int ibeg; for (ihblk=0;ihblk<_nblks;ihblk++) { icycle++; nlistloc = 0; for (i=(int)_blks[ihblk];i<_blks[ihblk+1];i++) { for (j=(int)pialoc_ablk[i];j nz_blk(nzjablk+1); int *pnz_blk = &nz_blk[0]; for (i=0;i iptr(nzjablk+1); vector > iindarr(nzjablk+1); vector > jindarr(nzjablk+1); vector > j2indarr(nzjablk+1); int *piptr = &iptr[0]; vector<_Int> *piindarr = &iindarr[0]; vector<_Int> *pjindarr = &jindarr[0]; for (i=0;i nzjmax) nzjmax = pnz_blk[i]; }; vector iiarr(nzjmax+1); CSortInt *piiarr = &iiarr[0]; _ahmatr.ResizeASub (nzjablk); _ahmatr.SetNzblk(nzjablk); CMatrix<_Int,_Flt> *pABlocks = _ahmatr.GetASubArr(); int jj, k, iloc, jloc, niloc; for (ihblk=0;ihblk<_nblks;ihblk++) { for (i=pia_blk[ihblk];i ablk; ablk.ResizeAndSetAllSp (nlistloc,0,pnz_blk[i],0); _Int *plist_loc = ablk.GetListArr(); _Int *pia_loc = ablk.GetIaArr(); _Int *pja_loc = ablk.GetJaArr(); irow_prev = -1; pia_loc[0] = 0; nlistloc = 0; for (k=0;k *pAHBlkStr = _ahmatr.GetHMatrStr(); pAHBlkStr->ResizeAndSetAllSp (_nblks,0,nzjablk,0); vector *pia_HBlk = pAHBlkStr->GetIa(); vector *pja_HBlk = pAHBlkStr->GetJa(); pia_HBlk->swap (ia_blk); pja_HBlk->swap (ja_blk); int *plist_blk = pAHBlkStr->GetListArr(); for (i=0;i<_nblks;i++) plist_blk[i] = i; }; // Print hmatrix data //======================================================================================== template void CHMatrix<_Int,_Flt>::PrintHMatrix (ofstream &_fout) { _fout << " CHMatrix:" << endl; _fout << " HMatrStr:" << endl; CIlu2::PrintMatrix (_fout, this->hmatr_str); _fout << " Nzblk = " << this->nzblk << endl; _fout << " ABlocks: " << endl; if (this->nzblk > 0) { int i; CMatrix<_Int,_Flt> *pABlocks = this->GetASubArr (); for (i=0;inzblk;i++) { _fout << " Iblk = " << i << endl; CIlu2<_Int,_Flt>::PrintMatrix (_fout, pABlocks[i]); } } } // Print sparsity with boxes that show nonzero blocks //======================================================================================== template void CHMatrix<_Int,_Flt>::Str2PsBox (int _collap, const CMatrix<_Int,_Flt> &_amatr, char *_fname, int _nblks, int *_blks) { // Compute new blocks partitioning vector blksnw (_nblks+2); int *pblksnw = &blksnw[0]; const int nsupmx = _amatr.GetNlist(); if (_nblks == 0) { pblksnw[0] = 0; pblksnw[1] = (nsupmx+_collap-1) / _collap; } else { pblksnw[0] = 0; for (int iblk=0;iblk<_nblks;iblk++) { int ni = _blks[iblk+1]-_blks[iblk]; int niloc = (ni+_collap-1) / _collap; pblksnw[iblk+1] = pblksnw[iblk]+niloc; } } // Compute nd2sp and sp2nd arrays int nnew; if (_nblks == 0) { nnew = pblksnw[_nblks+1]; } else { nnew = pblksnw[_nblks]; } vector nd2sp (_amatr.GetNlist()+1); vector sp2nd (nnew+1); int *pnd2sp = &nd2sp[0]; int *psp2nd = &sp2nd[0]; int i, j; if (_nblks == 0) { psp2nd[0] = 0; for (i=0;i imask (nnew+1); vector lstloc (nnew+1); int *pimask = &imask[0]; int *plstloc = &lstloc[0]; for (i=0;i temp; temp.ResizeAndSetAllSp(nnew,0,nz,0); int *plistt = temp.GetListArr(); int *piat = temp.GetIaArr(); int *pjat = temp.GetJaArr(); piat[0] = 0; for (isup=0;isup::Str2PsBox (temp, _fname, _nblks, pblksnw); }; // Print sparsity with boxes that show nonzero blocks //======================================================================================== template void CHMatrix<_Int,_Flt>::Str2PsBox (const CMatrix<_Int,_Flt> &_amatr, char *_fname, int _nblks, int *_blks) { // Open output file ofstream fout (_fname); if (!fout.is_open()) { cout << " Error: File named " << _fname << " is not opened !!!" << endl; return; } static float default_bg[3] = {(float)0.9, (float)0.9, (float)1.0}; static float default_fg[3] = {(float)0.7, (float)0.7, (float)1.0}; // Write the header information fout << "%!PS-Adobe-2.0" << endl; fout << "%%BoundingBox: 0 0 600 600" << endl; fout << "/m {moveto} def % x y" << endl; fout << "/l {lineto} def % x y" << endl; fout << "/s {stroke} def % x y" << endl; fout << "/n {newpath} def % x y" << endl; fout << "/c {closepath} def % x y" << endl; // Parameters of the local window const int nsupmx = _amatr.GetNlist(); double s, s1; s1 = 50.0e0; s = 500.0e0 / ((double) nsupmx); // Print the bounding window double dx, dy; fout << SetPw << 0.03 << " setlinewidth" << endl; fout << " n" << endl; dx = s1+s*0.5; dy = s1+s*0.5; Round (dx,dy); fout << SetPw << dx << SetPw << dy << " m" << endl; dx = s1+s*(nsupmx+0.5); dy = s1+s*0.5; Round (dx,dy); fout << SetPw << dx << SetPw << dy << " l" << endl; dx = s1+s*(nsupmx+0.5); dy = s1+s*(nsupmx+0.5); Round (dx,dy); fout << SetPw << dx << SetPw << dy << " l" << endl; dx = s1+s*0.5; dy = s1+s*(nsupmx+0.5); Round (dx,dy); fout << SetPw << dx << SetPw << dy << " l" << endl; dx = s1+s*0.5; dy = s1+s*0.5; Round (dx,dy); fout << SetPw << dx << SetPw << dy << " l s c" << endl; // Print blocks partitioning int i, j, k; double x, y; double x1, y1; if (_nblks > 0) { vector blks_new (_nblks+2); long long *pblks_new = &blks_new[0]; int nblks_new = 0; pblks_new[0] = 0; for (i=0;i<_nblks;i++) { if (_blks[i+1] > _blks[i]) { pblks_new[nblks_new+1] = _blks[i+1]; nblks_new++; } } if (pblks_new[nblks_new] < _amatr.GetNlist()) { pblks_new[nblks_new+1] = _amatr.GetNlist(); nblks_new++; } CHMatrix<_Int,_Flt> ahblk; CHMatrix<_Int,_Flt>::SplitMatrSpIntoHMatrSp (nblks_new, pblks_new, _amatr, ahblk); CMatrix *pahblkstr = ahblk.GetHMatrStr(); int nlistloc = pahblkstr->GetNlist(); int *plistloc = pahblkstr->GetListArr(); int *pialoc = pahblkstr->GetIaArr(); int *pjaloc = pahblkstr->GetJaArr(); for(int ilist = 0;ilist < nlistloc;ilist++) { int i = plistloc[ilist]; for(int jlist=pialoc[ilist];jlist 1.0e0) r=1.0e0; r = s * r / 2.0; if (r < 0.01e0) r=0.01e0; fout << SetPw << 2*r << " setlinewidth" << endl; fout << " /d {moveto currentpoint " << SetPw << r; fout << " 0 360 arc fill} def % x y" << endl; // Print the sparsity int i1, j1; const _Int *plist = _amatr.GetListArr(); const _Int *pia = _amatr.GetIaArr(); const _Int *pja = _amatr.GetJaArr(); fout << " n" << endl; for (int ilist=0;ilist<_amatr.GetNlist();ilist++) { i = (int)plist[ilist]; for (k=(int)pia[ilist];k void CMvmPar<_Int,_Flt,_FltVect>::InitMvmA (CHMatrix<_Int,_Flt> *_hmatr_arr) { // Get control data void *pcomm_loc = this->pcomm; int nblks_loc = this->nblks; long long *pblks_loc = this->pblks; int *pblk2cpu_loc = this->pblk2cpu; int ni_cpu_loc = this->ni_cpu; int *pibsblk_loc = &this->ibsblk[0]; int nlistblk_own_loc = this->nlistblk_own; int *plistblk_own_loc = &this->listblk_own[0]; int nproc = CExchange::GetNproc (pcomm_loc); int myid = CExchange::GetMyid (pcomm_loc); // Compute maximal block size int nimax = 0; int i, niloc; for (i=0;i nimax) nimax = niloc; } // Store pointer to matrix data this->phmatr = _hmatr_arr; // Compute list of external blocks vector imaskblk (nblks_loc+1); vector listblk (nblks_loc+1); vector indblk (nblks_loc+1); int *pimaskblk = &imaskblk[0]; int *plistblk = &listblk[0]; int *pindblk = &indblk[0]; for (i=0;i *phmatr_str_loc = _hmatr_arr[iblk].GetHMatrStr(); int nzja_hblk = phmatr_str_loc->GetNzja(); int *pja_hblk = phmatr_str_loc->GetJaArr(); for (j=0;jnblks_recvs = nlistblk_ext; this->listblk_recvs.resize (nlistblk_ext+1); int *plistblk_recvs = &this->listblk_recvs[0]; for (i=0;i ia_list_rcv (nlistblk_ext+1); vector > ja_pair_rcv (nlistblk_ext+1); int *pia_list_rcv = &ia_list_rcv[0]; vector *pja_pair_rcv = &ja_pair_rcv[0]; for (i=0;i<=nlistblk_ext;i++) pia_list_rcv[i] = 0; int ind; for (i=0;i *phmatr_str_loc = _hmatr_arr[iblk].GetHMatrStr(); int nzja_hblk = phmatr_str_loc->GetNzja(); int *pja_hblk = phmatr_str_loc->GetJaArr(); for (j=0;jialist_recvs.resize (nlistblk_ext+1); int *pialist_recvs = &this->ialist_recvs[0]; for (i=0;i<=nlistblk_ext;i++) pialist_recvs[i] = pia_list_rcv[i]; int nzja_pairs = pialist_recvs[nlistblk_ext]; this->japairs_recvs.resize (2*nzja_pairs+1); int *pjapairs_recvs = &this->japairs_recvs[0]; int ibeg, jloc; for (i=0;i imask_work (nimax+1); vector list_work (nimax+1); int *pimask_work = &imask_work[0]; int *plist_work = &list_work[0]; for (i=0;i ia_blk_rcv (nlistblk_ext+1); vector > ja_blk_rcv (nlistblk_ext+1); int *pia_blk_rcv = &ia_blk_rcv[0]; vector *pja_blk_rcv = &ja_blk_rcv[0]; for (i=0;i<=nlistblk_ext;i++) pia_blk_rcv[i] = 0; int nlistloc, k, jj; for (i=0;i *pasub_loc = _hmatr_arr[iblk].GetASubArr(); int nzjaloc = pasub_loc[ind].GetNzja (); _Int *pjaloc = pasub_loc[ind].GetJaArr (); for (k=0;kiablk_recvs.resize (nlistblk_ext+1); int *piablk_recvs = &this->iablk_recvs[0]; for (i=0;i<=nlistblk_ext;i++) piablk_recvs[i] = pia_blk_rcv[i]; int nzja_ind = pia_blk_rcv[nlistblk_ext]; this->ind_recvs.resize (nzja_ind+1); int *pind_recvs = &this->ind_recvs[0]; for (i=0;ix_recv.resize (nzja_ind+1); // Finally create cpu recv data vector imaskcpu (nproc); vector listcpu (nproc); vector indcpu (nproc); int *pimaskcpu = &imaskcpu[0]; int *plistcpu = &listcpu[0]; int *pindcpu = &indcpu[0]; for (i=0;inrecvs = nlistcpu; this->rcv2cpu.resize (nlistcpu+1); this->ia_recvs.resize (nlistcpu+1); this->ja_recvs.resize (2*nzja_ind+1); int *prcv2cpu = &this->rcv2cpu[0]; int *pia_recvs = &this->ia_recvs[0]; int *pja_recvs = &this->ja_recvs[0]; for (i=0;i > hblk_send (nlistcpu+1); CHMatrix *phblk_send = &hblk_send[0]; for (i=0;i *pA_sub = phblk_send[i].GetASubArr(); CMatrix ablk_temp; int niloc = pia_recvs[i+1]-pia_recvs[i]; ablk_temp.ResizeAndSetAllSp (0, 0, niloc*2, 0); int *pjaloc = ablk_temp.GetJaArr (); ibeg = pia_recvs[i]; for (j=pia_recvs[i];jReplaceFree (ablk_temp); } // Pack send data vector CpuIDSend (nlistcpu); vector > ObjSend (nlistcpu); int *pCpuIDSend = NULL; vector *pObjSend = NULL; if (nlistcpu > 0) { pCpuIDSend = &CpuIDSend[0]; pObjSend = &ObjSend[0]; } long long isize; char *pobj; for (i=0;i CpuIDRecv; vector > ObjRecv; CExchange::DataExchange (pcomm_loc, CpuIDSend, ObjSend, CpuIDRecv, ObjRecv); // Free send data { vector CpuIDSend_temp; vector > ObjSend_temp; CpuIDSend.swap (CpuIDSend_temp); ObjSend.swap (ObjSend_temp); } // Unpack receive data int nrecv_loc = (int) CpuIDRecv.size(); vector *pObjRecv = NULL; int *pCpuIDRecv = NULL; if (nrecv_loc > 0) { pObjRecv = &ObjRecv[0]; pCpuIDRecv = &CpuIDRecv[0]; } vector > hblk_recv (nrecv_loc+1); CHMatrix *phblk_recv = &hblk_recv[0]; for (i=0;i > ObjRecv_temp; ObjRecv.swap (ObjRecv_temp); } // Compute correct ordering of cpu data vector iiarr (nrecv_loc+1); CSortInt *piiarr = &iiarr[0]; for (i=0;insends = nrecv_loc; this->snd2cpu.resize (nrecv_loc+1); this->ia_sends.resize (nrecv_loc+1); int *psnd2cpu = &this->snd2cpu[0]; int *pia_sends = &this->ia_sends[0]; int nz_sends = 0; pia_sends[0] = 0; for (i=0;i *pA_sub = phblk_recv[ind].GetASubArr(); int nzjaloc = pA_sub->GetNzja() / 2; nz_sends += nzjaloc; pia_sends[i+1] = nz_sends; } this->ind_sends.resize (nz_sends+1); this->x_send.resize (nz_sends+1); int *pind_sends = &this->ind_sends[0]; nz_sends = 0; int jj2, ibs; for (i=0;i *pA_sub = phblk_recv[ind].GetASubArr(); int nzjaloc = pA_sub->GetNzja() / 2; int *pjaloc = pA_sub->GetJaArr(); for (j=0;jx_temp.resize (nimax+1); }; // // Perform multiplication by A //======================================================================================== template void CMvmPar<_Int,_Flt,_FltVect>::MvmA (const _FltVect *_x, _FltVect *_ax) { // Open mvm structure void *pcomm_loc = this->pcomm; int nblks_loc = this->nblks; long long *pblks_loc = this->pblks; int *pblk2cpu_loc = this->pblk2cpu; int ni_cpu_loc = this->ni_cpu; int *pibsblk_loc = &this->ibsblk[0]; int nlistblk_own_loc = this->nlistblk_own; int *plistblk_own_loc = &this->listblk_own[0]; CHMatrix<_Int,_Flt> *phmatr_loc = this->phmatr; int nsends_loc = this->nsends; int *psnd2cpu_loc = &this->snd2cpu[0]; int *pia_sends_loc = &this->ia_sends[0]; int *pind_sends_loc = &this->ind_sends[0]; _FltVect *px_send_loc = &this->x_send[0]; int nrecvs_loc = this->nrecvs; int *prcv2cpu_loc = &this->rcv2cpu[0]; int *pia_recvs_loc = &this->ia_recvs[0]; int *pja_recvs_loc = &this->ja_recvs[0]; _FltVect *px_recv_loc = &this->x_recv[0]; int nblks_recvs_loc = this->nblks_recvs; int *plistblk_recvs_loc = &this->listblk_recvs[0]; int *pialist_recvs_loc = &this->ialist_recvs[0]; int *pjapairs_recvs_loc = &this->japairs_recvs[0]; int *piablk_recvs_loc = &this->iablk_recvs[0]; int *pind_recvs_loc = &this->ind_recvs[0]; _FltVect *px_temp_loc = &this->x_temp[0]; int nproc = CExchange::GetNproc (pcomm_loc); int myid = CExchange::GetMyid (pcomm_loc); // Init array ax by zeroes CVect<_FltVect>::SetByZeroes (ni_cpu_loc, _ax); // Prepare send int ni_send_loc = pia_sends_loc[nsends_loc]; int i, ind; for (i=0;i::SetByZeroes (ni_recv_loc, px_recv_loc); // Init async recvs and sends void *psndrcv_recvs_loc; void *psndrcv_stats_loc; CExchange::AllocateRecvs (nrecvs_loc+nsends_loc, psndrcv_recvs_loc); CExchange::AllocateStats (nrecvs_loc+nsends_loc, psndrcv_stats_loc); int icpu, isize, ibs; for (i=0;i *pasub_loc = phmatr_loc[iblk].GetASubArr(); CMatrix *phmatr_str_loc = phmatr_loc[iblk].GetHMatrStr(); int nzja_hblk = phmatr_str_loc->GetNzja(); int *pja_hblk = phmatr_str_loc->GetJaArr(); for (j=0;j::MvmA (pasub_loc[j], _x+ibs_j, _ax+ibs_i); } } } // Wait for completetion of sends/recvs CExchange::WaitAll (nrecvs_loc+nsends_loc, psndrcv_recvs_loc, psndrcv_stats_loc); // Perform remaining multiplications int jj, jj2; for (i=0;i::MvmA: error: incorrect block number !"; } px_temp_loc[jj] = px_recv_loc[ind]; } for (j=pialist_recvs_loc[i];j *pasub_loc = phmatr_loc[iblk].GetASubArr(); CMatrix *phmatr_str_loc = phmatr_loc[iblk].GetHMatrStr(); int *pja_hblk = phmatr_str_loc->GetJaArr(); if (pja_hblk[ind] != jblk) { throw " CTMvmSlvPar<_Int,_Flt,_FltVect>::MvmA: error 2: incorrect block number !"; } CMvmSlv<_Int,_Flt,_FltVect>::MvmA (pasub_loc[ind], px_temp_loc, _ax+ibs_i); } } CExchange::DeleteRecvs (psndrcv_recvs_loc); CExchange::DeleteStats (psndrcv_stats_loc); }; // Clean MvmA structure //======================================================================================== template void CMvmPar<_Int,_Flt,_FltVect>::Clean () { pcomm = NULL; nblks = 0; pblks = NULL; pblk2cpu = NULL; ni_cpu = 0; vector ibsblk_dummy; ibsblk.swap (ibsblk_dummy); nlistblk_own = 0; vector listblk_own_dummy; listblk_own.swap (listblk_own_dummy); phmatr = NULL; nsends = 0; vector snd2cpu_dummy; vector ia_sends_dummy; vector ind_sends_dummy; vector<_FltVect> x_send_dummy; snd2cpu.swap (snd2cpu_dummy); ia_sends.swap (ia_sends_dummy); ind_sends.swap (ind_sends_dummy); x_send.swap (x_send_dummy); nrecvs = 0; vector rcv2cpu_dummy; vector ia_recvs_dummy; vector ja_recvs_dummy; vector<_FltVect> x_recv_dummy; rcv2cpu.swap (rcv2cpu_dummy); ia_recvs.swap (ia_recvs_dummy); ja_recvs.swap (ja_recvs_dummy); x_recv.swap (x_recv_dummy); nblks_recvs = 0; vector listblk_recvs_dummy; vector ialist_recvs_dummy; vector japairs_recvs_dummy; vector iablk_recvs_dummy; vector ind_recvs_dummy; vector<_FltVect> x_temp_dummy; listblk_recvs.swap (listblk_recvs_dummy); ialist_recvs.swap (ialist_recvs_dummy); japairs_recvs.swap (japairs_recvs_dummy); iablk_recvs.swap (iablk_recvs_dummy); ind_recvs.swap (ind_recvs_dummy); x_temp.swap (x_temp_dummy); }; // Init SolveLU data //======================================================================================== template void CSlvPar<_Int,_Flt,_FltVect>::InitSolveLU (vector *_listpairs_ext, CMatrix<_Int,_Flt> *_matrL, CMatrix<_Int,_Flt> *_matrU, vector *_orderLU) { // Get control data void *pcomm_loc = this->pcomm; int nblks_loc = this->nblks; long long *pblks_loc = this->pblks; long long *pblks_ext_loc = this->pblks_ext; int *pblk2cpu_loc = this->pblk2cpu; int ni_cpu_loc = this->ni_cpu; int *pibsblk_loc = &this->ibsblk[0]; int nlistblk_own_loc = this->nlistblk_own; int *plistblk_own_loc = &this->listblk_own[0]; int nproc = CExchange::GetNproc (pcomm_loc); int myid = CExchange::GetMyid (pcomm_loc); // Compute maximal block size int nimax_ext = 0; int i, niloc; for (i=0;i nimax_ext) nimax_ext = niloc; } // Store pointers to data this->plistpairs_ext = _listpairs_ext; this->pmatrL = _matrL; this->pmatrU = _matrU; this->porderLU = _orderLU; // Create cpu recv data vector imaskcpu (nproc); vector listcpu (nproc); vector indcpu (nproc); int *pimaskcpu = &imaskcpu[0]; int *plistcpu = &listcpu[0]; int *pindcpu = &indcpu[0]; for (i=0;inrecvs = nlistcpu; this->rcv2cpu.resize (nlistcpu+1); this->ia_recvs.resize (nlistcpu+1); int *prcv2cpu = &this->rcv2cpu[0]; int *pia_recvs = &this->ia_recvs[0]; for (i=0;ix_recv.resize (nz_recvs+1); this->ialist_recvs.resize (nlistblk_own_loc+1); this->jalist_recvs.resize (2*nz_recvs+1); int *pialist_recvs = &this->ialist_recvs[0]; int *pjalist_recvs = &this->jalist_recvs[0]; nz_recvs = 0; for (i=0;i > hblk_send (nlistcpu+1); CHMatrix *phblk_send = &hblk_send[0]; for (i=0;i *pA_sub = phblk_send[i].GetASubArr(); CMatrix ablk_temp; int niloc = pia_recvs[i+1]-pia_recvs[i]; ablk_temp.ResizeAndSetAllSp (0, 0, niloc*2, 0); pA_sub->ReplaceFree (ablk_temp); } nz_recvs = 0; for (i=0;i *pA_sub = phblk_send[ind].GetASubArr(); int *pjaloc = pA_sub->GetJaArr (); k = plistcpu[ind]-pia_recvs[ind]; pjaloc[k*2] = jj; pjaloc[k*2+1] = jj2; plistcpu[ind]++; } } } // Pack send data vector CpuIDSend (nlistcpu); vector > ObjSend (nlistcpu); int *pCpuIDSend = NULL; vector *pObjSend = NULL; if (nlistcpu > 0) { pCpuIDSend = &CpuIDSend[0]; pObjSend = &ObjSend[0]; } long long isize; char *pobj; for (i=0;i CpuIDRecv; vector > ObjRecv; CExchange::DataExchange (pcomm_loc, CpuIDSend, ObjSend, CpuIDRecv, ObjRecv); // Free send data { vector CpuIDSend_temp; vector > ObjSend_temp; CpuIDSend.swap (CpuIDSend_temp); ObjSend.swap (ObjSend_temp); } // Unpack receive data int nrecv_loc = (int) CpuIDRecv.size(); vector *pObjRecv = NULL; int *pCpuIDRecv = NULL; if (nrecv_loc > 0) { pObjRecv = &ObjRecv[0]; pCpuIDRecv = &CpuIDRecv[0]; } vector > hblk_recv (nrecv_loc+1); CHMatrix *phblk_recv = &hblk_recv[0]; for (i=0;i > ObjRecv_temp; ObjRecv.swap (ObjRecv_temp); } // Compute correct ordering of cpu data vector iiarr (nrecv_loc+1); CSortInt *piiarr = &iiarr[0]; for (i=0;insends = nrecv_loc; this->snd2cpu.resize (nrecv_loc+1); this->ia_sends.resize (nrecv_loc+1); int *psnd2cpu = &this->snd2cpu[0]; int *pia_sends = &this->ia_sends[0]; int nz_sends = 0; pia_sends[0] = 0; for (i=0;i *pA_sub = phblk_recv[ind].GetASubArr(); int nzjaloc = pA_sub->GetNzja() / 2; nz_sends += nzjaloc; pia_sends[i+1] = nz_sends; } this->ind_sends.resize (nz_sends+1); this->x_send.resize (nz_sends+1); int *pind_sends = &this->ind_sends[0]; nz_sends = 0; int ibs; for (i=0;i *pA_sub = phblk_recv[ind].GetASubArr(); int nzjaloc = pA_sub->GetNzja() / 2; int *pjaloc = pA_sub->GetJaArr(); for (j=0;jx_temp.resize (nimax_ext*2+1); }; // Perform SolveLU computations //======================================================================================== template void CSlvPar<_Int,_Flt,_FltVect>::SolveLU (const _FltVect *_x, _FltVect *_px) { // Open mvm structure void *pcomm_loc = this->pcomm; int nblks_loc = this->nblks; long long *pblks_loc = this->pblks; long long *pblks_ext_loc = this->pblks_ext; int *pblk2cpu_loc = this->pblk2cpu; int ni_cpu_loc = this->ni_cpu; int *pibsblk_loc = &this->ibsblk[0]; int nlistblk_own_loc = this->nlistblk_own; int *plistblk_own_loc = &this->listblk_own[0]; vector *plistpairs_ext_loc = this->plistpairs_ext; CMatrix<_Int,_Flt> *pmatrL_loc = this->pmatrL; CMatrix<_Int,_Flt> *pmatrU_loc = this->pmatrU; vector *porderLU_loc = this->porderLU; int nsends_loc = this->nsends; int *psnd2cpu_loc = &this->snd2cpu[0]; int *pia_sends_loc = &this->ia_sends[0]; int *pind_sends_loc = &this->ind_sends[0]; _FltVect *px_send_loc = &this->x_send[0]; int nrecvs_loc = this->nrecvs; int *prcv2cpu_loc = &this->rcv2cpu[0]; int *pia_recvs_loc = &this->ia_recvs[0]; _FltVect *px_recv_loc = &this->x_recv[0]; int *pialist_recvs_loc = &this->ialist_recvs[0]; int *pjalist_recvs_loc = &this->jalist_recvs[0]; _FltVect *px_temp_loc = &this->x_temp[0]; int nproc = CExchange::GetNproc (pcomm_loc); int myid = CExchange::GetMyid (pcomm_loc); // Init array ax by zeroes CVect<_FltVect>::SetByZeroes (ni_cpu_loc, _px); // Prepare send int ni_send_loc = pia_sends_loc[nsends_loc]; int i, ind; for (i=0;i::SetByZeroes (ni_recv_loc, px_recv_loc); // Init async recvs and sends void *psndrcv_recvs_loc; void *psndrcv_stats_loc; CExchange::AllocateRecvs (nrecvs_loc+nsends_loc, psndrcv_recvs_loc); CExchange::AllocateStats (nrecvs_loc+nsends_loc, psndrcv_stats_loc); int icpu, isize, ibs; for (i=0;i::SetByZeroes (ni_send_loc, px_send_loc); // Perform local computations int iblk, ibs_i, j, jj, jj2, ibs_j, ind1, niloc, niextloc, ni_ini; _FltVect *px1_temp; _FltVect *px2_temp; for (i=0;i::SetByZeroes (ni_ini, px1_temp); memcpy (px1_temp+ni_ini,_x+ibs_i,(size_t)(niloc*sizeof(_FltVect))); int *pplistpairs_loc = &(plistpairs_ext_loc[iblk][0]); for (j=0;j::OrderVector (niextloc, porderLU[iblk], px1_temp, px2_temp); CMvmSlv<_Int,_Flt,_FltVect>::SolveL (pmatrL_loc[iblk], px2_temp, px1_temp); CVect<_FltVect>::SetByZeroes (ni_ini, px1_temp); CMvmSlv<_Int,_Flt,_FltVect>::SolveU (pmatrU_loc[iblk], px1_temp, px2_temp); CVect<_FltVect>::InvOrderVector (niextloc, porderLU[iblk], px2_temp, px1_temp); } else { CMvmSlv<_Int,_Flt,_FltVect>::SolveL (pmatrL_loc[iblk], px1_temp, px2_temp); CVect<_FltVect>::SetByZeroes (ni_ini, px2_temp); CMvmSlv<_Int,_Flt,_FltVect>::SolveU (pmatrU_loc[iblk], px2_temp, px1_temp); } for (j=pialist_recvs_loc[i];j::AddReplaceVector (niloc, px1_temp+ni_ini, _px+ibs_i); } // Backward exchanges for (i=0;i void CSlvPar<_Int,_Flt,_FltVect>::Clean () { pcomm = NULL; nblks = 0; pblks = NULL; pblks_ext = NULL; pblk2cpu = NULL; ni_cpu = 0; vector ibsblk_dummy; ibsblk.swap (ibsblk_dummy); nlistblk_own = 0; vector listblk_own_dummy; listblk_own.swap (listblk_own_dummy); pmatrL = NULL; pmatrU = NULL; porderLU = NULL; nsends = 0; vector snd2cpu_dummy; vector ia_sends_dummy; vector ind_sends_dummy; vector<_FltVect> x_send_dummy; snd2cpu.swap (snd2cpu_dummy); ia_sends.swap (ia_sends_dummy); ind_sends.swap (ind_sends_dummy); x_send.swap (x_send_dummy); nrecvs = 0; vector rcv2cpu_dummy; vector ia_recvs_dummy; vector<_FltVect> x_recv_dummy; vector ialist_recvs_dummy; vector jalist_recvs_dummy; vector<_FltVect> x_temp_dummy; rcv2cpu.swap (rcv2cpu_dummy); ia_recvs.swap (ia_recvs_dummy); x_recv.swap (x_recv_dummy); ialist_recvs.swap (ialist_recvs_dummy); jalist_recvs.swap (jalist_recvs_dummy); x_temp.swap (x_temp_dummy); } // Prepare solver structures including performing parallel fct //======================================================================================== template void CSolver<_Int,_Flt,_FltVect>::PrepareSolver (void *_pcomm, SSolverParams *_params, int _nblks, long long *_blks, int *_blk2cpu, bool _b_store_matrix, _Int *_ia, _Int *_ja, _Flt *_a) { // Store control data this->pcomm = _pcomm; this->params = *_params; this->nblks = _nblks; this->blks.resize (_nblks+1); this->blks_ext.resize (_nblks+1); this->blk2cpu.resize (_nblks+1); long long *pblks = &this->blks[0]; long long *pblks_ext = &this->blks_ext[0]; int *pblk2cpu = &this->blk2cpu[0]; int i; for (i=0;i<=_nblks;i++) pblks[i] = _blks[i]; for (i=0;i<=_nblks;i++) pblks_ext[i] = _blks[i]; for (i=0;i<_nblks;i++) pblk2cpu[i] = _blk2cpu[i]; int myid = CExchange::GetMyid(_pcomm); // Allocate arrays for data this->hmatr_arr.resize (_nblks); this->nlist_ext_arr.resize (_nblks); this->list_ext_arr.resize (_nblks); this->order_LU.resize (_nblks); this->matrL_float.resize (_nblks); this->matrL_double.resize (_nblks); this->matrU_float.resize (_nblks); this->matrU_double.resize (_nblks); CHMatrix<_Int,_Flt> *phmatr_arr = &this->hmatr_arr[0]; int *pnlist_ext_arr = &this->nlist_ext_arr[0]; vector *plist_ext_arr = &this->list_ext_arr[0]; vector *porder_LU = &this->order_LU[0]; CMatrix<_Int,float> *pmatrL_float = &this->matrL_float[0]; CMatrix<_Int,double> *pmatrL_double = &this->matrL_double[0]; CMatrix<_Int,float> *pmatrU_float = &this->matrU_float[0]; CMatrix<_Int,double> *pmatrU_double = &this->matrU_double[0]; // Create matrix as set of hblocks int nimax = 0; int niloc; for (i=0;i<_nblks;i++) { niloc = (int)(pblks[i+1]-pblks[i]); if (niloc > nimax) nimax = niloc; } vector<_Int> listloc (nimax+1); vector<_Int> ialoc (nimax+1); _Int *plistloc = &listloc[0]; _Int *pialoc = &ialoc[0]; vector imaskblk (_nblks); int *pimaskblk = &imaskblk[0]; for (i=0;i<_nblks;i++) pimaskblk[i] = -1; int icycleblk = -1; int ibeg = 0; int ishift, j; for (i=0;i<_nblks;i++) { if (pblk2cpu[i] == myid) { niloc = (int)(pblks[i+1]-pblks[i]); ishift = (int)_ia[ibeg]; for (j=0;j hblk (i, niloc, plistloc, pialoc, _ja+ishift, _a+ishift, _nblks, pblks, icycleblk, pimaskblk); ibeg += niloc; phmatr_arr[i].ReplaceFree (hblk); } } // Symmetrize hmatr vector > hmatr_symm_arr (_nblks); CHMatrix<_Int,_Flt> *phmatr_symm_arr = &hmatr_symm_arr[0]; CHMatrix<_Int,_Flt>::SymmetrizeSubmatrices (_pcomm, _nblks, pblks, pblk2cpu, phmatr_arr, phmatr_symm_arr); for (i=0;i<_nblks;i++) { if (pblk2cpu[i] == myid) { phmatr_arr[i].ReplaceFree (phmatr_symm_arr[i]); } } // Compute extended lists int ncycle_loc = _params->ncycle; CHMatrix<_Int,_Flt>::ExtendedLists (_pcomm, ncycle_loc, _nblks, pblks, pblk2cpu, phmatr_arr, pnlist_ext_arr, plist_ext_arr); // Perform filtering of the lists CHMatrix<_Int,_Flt>::FilterListsBack (myid, _nblks, pblk2cpu, pnlist_ext_arr, plist_ext_arr); // Create extended blocks partitioning for (i=0;i<=_nblks;i++) pblks_ext[i] = 0; for (i=0;i<_nblks;i++) { if (pblk2cpu[i] == myid) { pblks_ext[i+1] = (pblks[i+1]-pblks[i])+pnlist_ext_arr[i]; } } CExchange::ExchangeArray (_pcomm, 'L', '+', _nblks+1, (void *)pblks_ext); for (i=0;i<_nblks;i++) pblks_ext[i+1] = pblks_ext[i]+pblks_ext[i+1]; // Get extended submatrices vector > matr_ext_arr (_nblks); CMatrix<_Int,_Flt> *pmatr_ext_arr = &matr_ext_arr[0]; CHMatrix<_Int,_Flt>::GetExtendedSubmatrices (_pcomm, _nblks, pblks, pblk2cpu, phmatr_arr, pnlist_ext_arr, plist_ext_arr, pmatr_ext_arr); int collap_loc = _params->collap; char strbuff[256]; int blks_2[3]; if (collap_loc > 0) { for (i=0;i<_nblks;i++) { if (pblk2cpu[i] == myid) { sprintf (strbuff,"BlkStrExt_%i.ps",i); blks_2[0] = 0; blks_2[1] = pnlist_ext_arr[i]; blks_2[2] = pmatr_ext_arr[i].GetNlist(); CHMatrix<_Int,_Flt>::Str2PsBox (collap_loc, pmatr_ext_arr[i], strbuff, 2, blks_2); } } } if (!_b_store_matrix) { this->CleanMvmA (); } // Compute new ordering vector n2_blocks (_nblks+1); int *pn2_blocks = &n2_blocks[0]; for (i=0;i<_nblks;i++) pn2_blocks[i] = pmatr_ext_arr[i].GetNlist(); int blks_3[4]; int ordtype_loc = _params->ordtype; if (ordtype_loc > 0) { vector > matr_ord_arr (_nblks); CMatrix<_Int,_Flt> *pmatr_ord_arr = &matr_ord_arr[0]; for (i=0;i<_nblks;i++) { if (pblk2cpu[i] == myid) { CIlu2_impl<_Int,_Flt>::ComputeOptimalOrderSchur (ordtype_loc, pmatr_ext_arr[i].GetNlist(), pnlist_ext_arr[i], *(pmatr_ext_arr[i].GetIa()), *(pmatr_ext_arr[i].GetJa()), pn2_blocks[i], porder_LU[i]); CIlu2<_Int,_Flt>::ReorderMatrix (pmatr_ext_arr[i], porder_LU[i], pmatr_ord_arr[i]); if (collap_loc > 0) { sprintf (strbuff,"BlkStrExtOrd_%i.ps",i); blks_3[0] = 0; blks_3[1] = pnlist_ext_arr[i]; blks_3[2] = pn2_blocks[i]; blks_3[3] = pmatr_ext_arr[i].GetNlist(); CHMatrix<_Int,_Flt>::Str2PsBox (collap_loc, pmatr_ord_arr[i], strbuff, 3, blks_3); } pmatr_ext_arr[i].ReplaceFree (pmatr_ord_arr[i]); } } } // Perform computation of the ILU decomposition for all local ordered blocks int prec_float_loc = _params->prec_float; vector eigmin_arr (nblks); vector eigmax_arr (nblks); double *peigmin_arr = &eigmin_arr[0]; double *peigmax_arr = &eigmax_arr[0]; for (i=0;i<_nblks;i++) { if (pblk2cpu[i] == myid) { if (prec_float_loc == 1) { CMatrix<_Int,float> *ptr_matr = NULL; CMatrix<_Int,float> matr_conv; if (sizeof(_Flt) == sizeof(float)) { ptr_matr = (CMatrix<_Int,float> *)(pmatr_ext_arr+i); } else { int nlist_temp = pmatr_ext_arr[i].GetNlist(); _Int *pia_temp = pmatr_ext_arr[i].GetIaArr(); _Int *pja_temp = pmatr_ext_arr[i].GetJaArr(); _Flt *pa_temp = pmatr_ext_arr[i].GetAArr(); CMatrixConv<_Int,_Flt,float>::InitAndConv (nlist_temp, pia_temp, pja_temp, pa_temp, matr_conv); ptr_matr = &matr_conv; } CIlu2<_Int,float>::Ilu2Matrix (*ptr_matr, *_params, pmatrL_float[i], pmatrU_float[i], peigmin_arr[i], peigmax_arr[i]); } else { CMatrix<_Int,double> *ptr_matr = NULL; CMatrix<_Int,double> matr_conv; if (sizeof(_Flt) == sizeof(double)) { ptr_matr = (CMatrix<_Int,double> *)(pmatr_ext_arr+i); } else { int nlist_temp = pmatr_ext_arr[i].GetNlist(); _Int *pia_temp = pmatr_ext_arr[i].GetIaArr(); _Int *pja_temp = pmatr_ext_arr[i].GetJaArr(); _Flt *pa_temp = pmatr_ext_arr[i].GetAArr(); CMatrixConv<_Int,_Flt,double>::InitAndConv (nlist_temp, pia_temp, pja_temp, pa_temp, matr_conv); ptr_matr = &matr_conv; } CIlu2<_Int,double>::Ilu2Matrix (*ptr_matr, *_params, pmatrL_double[i], pmatrU_double[i], peigmin_arr[i], peigmax_arr[i]); } if (collap_loc > 0) { sprintf (strbuff,"BlkStrU_%i.ps",i); blks_3[0] = 0; blks_3[1] = pnlist_ext_arr[i]; blks_3[2] = pn2_blocks[i]; blks_3[3] = pmatr_ext_arr[i].GetNlist(); if (prec_float_loc == 1) { CHMatrix<_Int,float>::Str2PsBox (collap_loc, pmatrU_float[i], strbuff, 3, blks_3); } else { CHMatrix<_Int,double>::Str2PsBox (collap_loc, pmatrU_double[i], strbuff, 3, blks_3); } } pmatr_ext_arr[i].Clean (); } } // Init multiplication and solve control structures if (_b_store_matrix) { this->mvm.InitControl (_pcomm, _nblks, pblks, pblk2cpu); this->mvm.InitMvmA (phmatr_arr); } if (prec_float_loc == 1) { this->slv_float.InitControl (_pcomm, _nblks, pblks, pblks_ext, pblk2cpu); if (ordtype_loc > 0) { this->slv_float.InitSolveLU (plist_ext_arr, pmatrL_float, pmatrU_float, porder_LU); } else { this->slv_float.InitSolveLU (plist_ext_arr, pmatrL_float, pmatrU_float, NULL); } } else { this->slv_double.InitControl (_pcomm, _nblks, pblks, pblks_ext, pblk2cpu); if (ordtype_loc > 0) { this->slv_double.InitSolveLU (plist_ext_arr, pmatrL_double, pmatrU_double, porder_LU); } else { this->slv_double.InitSolveLU (plist_ext_arr, pmatrL_double, pmatrU_double, NULL); } } } // Perform iterations of the iterative scheme (BiCGStab) //======================================================================================== template void CSolver<_Int,_Flt,_FltVect>::BiCGStab (int _niter_max, double _eps, int _ichk, int _msglev, ofstream *_fout, _FltVect *_rhs, _FltVect *_sol, double &_rhs_norm, double &_res_ini, int &_niter, double &_res_fin) { // Init output data _rhs_norm = 0.0e0; _res_ini = 0.0e0; _niter = 0; _res_fin = 0.0e0; // Get control data void *pcomm_loc = this->GetComm(); int nblks_loc = this->GetNblks(); long long *pblks_loc = this->GetBlks(); int *pblks2cpu_loc = this->GetBlk2cpu(); int nproc_loc = CExchange::GetNproc (pcomm_loc); int myid_loc = CExchange::GetMyid (pcomm_loc); // Get the local size of vector data int i; int n_local = 0; for (i=0;i rhs (n_local+1); vector<_FltVect> r (n_local+1); vector<_FltVect> p (n_local+1); vector<_FltVect> x (n_local+1); vector<_FltVect> u (n_local+1); vector<_FltVect> z (n_local+1); vector<_FltVect> v (n_local+1); vector<_FltVect> q (n_local+1); _FltVect *prhs = &rhs[0]; _FltVect *pr = &r[0]; _FltVect *pp = &p[0]; _FltVect *px = &x[0]; _FltVect *pu = &u[0]; _FltVect *pz = &z[0]; _FltVect *pv = &v[0]; _FltVect *pq = &q[0]; // Compute initial residual vector and its norm CVect<_FltVect>::CopyVector (n_local, _sol, px); CVect<_FltVect>::CopyVector (n_local, _rhs, pr); this->MvmA (px, pz); CVect<_FltVect>::SubtractReplaceVector (n_local, pz, pr); _FltVect rhs_norm = 0.0e0; _FltVect resi0_norm = 0.0e0; rhs_norm = CVect<_FltVect>::ScProd (n_local, _rhs, _rhs); resi0_norm = CVect<_FltVect>::ScProd (n_local, pr, pr); double sum2_arr[2]; sum2_arr[0] = (double)rhs_norm; sum2_arr[1] = (double)resi0_norm; CExchange::ExchangeArray (pcomm_loc, 'D', '+', 2, sum2_arr); double d_rhs_norm = sum2_arr[0]; double d_resi0_norm = sum2_arr[1]; d_rhs_norm = sqrt(d_rhs_norm); d_resi0_norm = sqrt(d_resi0_norm); _rhs_norm = d_rhs_norm; _res_ini = d_resi0_norm; if (_res_ini < _eps * _rhs_norm) { _res_fin = _res_ini; _niter = 0; return; } if (_msglev >= 2) std::cout << " Log10 || Rhs || = " << log10(_rhs_norm) << std::endl; if (_msglev >= 1 && _fout != NULL) *_fout << " Log10 || Rhs || = " << log10(_rhs_norm) << std::endl; if (_msglev >= 2) std::cout << " Initial Log10 || Resi || = " << log10(d_resi0_norm) << std::endl; if (_msglev >= 1 && _fout != NULL) *_fout << " Initial Log10 || Resi || = " << log10(d_resi0_norm) << std::endl; // Perform iterations starting from residual data int niter_perf = 0; double resi_norm = d_resi0_norm; double d_res_min = resi_norm; // Choose initial direction vector CVect<_FltVect>::CopyVector (n_local, pr, pz); // z=r(0) // p(1) = r(0) CVect<_FltVect>::CopyVector (n_local, pr, pp); // p=r(0) // Main iterative cycle _FltVect alpha = 0.0e0, beta, omega = 0.0e0, rho, rhoold; // method parameters _FltVect uu, ur, zv; // inner products _FltVect fzero = (_FltVect) 0.0e0; _FltVect fone = (_FltVect) 1.0e0; rho = fone; int k, it; _FltVect faux; double daux; double d_resi; bool conv = false; for (k=1;k<=_niter_max;k++) { it = k-1; // the number of MVM is equal to 2*it // rho(i-1) = z^T * r(i-1) rhoold = rho; rho = CVect<_FltVect>::ScProd (n_local, pz, pr); daux = (double) rho; CExchange::ExchangeArray (pcomm_loc, 'D', '+', 1, &daux); rho = (_FltVect) daux; if (k == 1) { alpha = omega = fone; } else { // beta(i-1) = (rho(i-1) / rho(i-2)) * (alpha(i-1) / omega(i-1)) beta = (rho / rhoold) * (alpha / omega); // p(i) = r(i-1) + beta(i-1) * (p(i-1) - omega(i-1) * v(i-1)) CVect<_FltVect>::UpdateVectorMinus (n_local, &omega, pv, pp); CVect<_FltVect>::UpdateVectorReversed (n_local, &beta, pr, pp); } // q = M_solve * p(i) this->SlvLU (pp, pq); // v(i) = A * q this->MvmA (pq, pv); // alpha(i) = rho(i-1) / z^T * v(i) zv = CVect<_FltVect>::ScProd (n_local, pz, pv); daux = (double) zv; CExchange::ExchangeArray (pcomm_loc, 'D', '+', 1, &daux); zv = (_FltVect) daux; alpha = rho / zv; // x(i-1/2) = x(i-1) + alpha(i) * q CVect<_FltVect>::UpdateVector (n_local, &alpha, pq, px); // r(i-1/2) = r(i-1) - alpha(i) * v(i) CVect<_FltVect>::UpdateVectorMinus (n_local, &alpha, pv, pr); // The intermediate check of convergence can be added here // q = M_solve * r(i-1/2) this->SlvLU (pr, pq); // u = A * q this->MvmA (pq, pu); // omega(i) = u^T * r(i-1/2) / u^T * u ur = CVect<_FltVect>::ScProd (n_local, pu, pr); uu = CVect<_FltVect>::ScProd (n_local, pu, pu); sum2_arr[0] = (double)ur; sum2_arr[1] = (double)uu; CExchange::ExchangeArray (pcomm_loc, 'D', '+', 2, sum2_arr); ur = (_FltVect)sum2_arr[0]; uu = (_FltVect)sum2_arr[1]; omega = ur / uu; // x(i) = x(i-1/2) + omega(i) * q CVect<_FltVect>::UpdateVector (n_local, &omega, pq, px); // r(i) = r(i-1/2) - omega(i) * u CVect<_FltVect>::UpdateVectorMinus (n_local, &omega, pu, pr); // Check the convergence niter_perf = (int)(it+1); faux = CVect<_FltVect>::ScProd (n_local, pr, pr); daux = (double) faux; CExchange::ExchangeArray (pcomm_loc, 'D', '+', 1, &daux); d_resi = sqrt(daux); if ((it+_ichk)%_ichk == 0 && _msglev >= 2) std::cout << " It = " << it << " Log10 || Resi || = " << log10(d_resi) << std::endl; if ((it+_ichk)%_ichk == 0 && _msglev >= 1 && _fout != NULL) *_fout << " It = " << it << " Log10 || Resi || = " << log10(d_resi) << std::endl; if (d_resi < _eps * d_rhs_norm) conv = true; // Save the best attained solution if (d_resi < d_res_min) { d_res_min = d_resi; CVect<_FltVect>::CopyVector (n_local, px, _sol); } // Break from iterations if converged if (conv) { break; } } // end of iterations // Compute the final residual this->MvmA (_sol, pr); CVect<_FltVect>::SubtractReplaceVector (n_local, _rhs, pr); // r=b-A*x // Compute the final residual norm resi_norm = CVect<_FltVect>::ScProd (n_local, pr, pr); daux = (double) resi_norm; CExchange::ExchangeArray (pcomm_loc, 'D', '+', 1, &daux); double d_resi_fin = sqrt (daux); if (_msglev >= 2) std::cout << " Final Log10 || Resi || = " << log10(d_resi_fin) << std::endl; if (_msglev >= 1 && _fout != NULL) *_fout << " Final Log10 || Resi || = " << log10(d_resi_fin) << std::endl; // Return statistics data _niter = niter_perf; _res_fin = d_resi_fin; } template class CVect ; template class CVect ; template class CIlu2_impl ; template class CIlu2_impl ; template class CIlu2_impl ; template class CIlu2_impl ; template class CMatrix ; template class CMatrix ; template class CMatrix ; template class CMatrix ; template class CMvmSlv_impl; template class CMvmSlv_impl; template class CMvmSlv_impl; template class CMvmSlv_impl; template class CMvmSlv ; template class CMvmSlv ; template class CMvmSlv ; template class CMvmSlv ; template class CIlu2 ; template class CIlu2 ; template class CIlu2 ; template class CIlu2 ; template class CHMatrix ; template class CHMatrix ; template class CHMatrix ; template class CHMatrix ; template class CMatrixConv ; template class CMatrixConv ; template class CMatrixConv ; template class CMatrixConv ; template class CMvmPar ; template class CMvmPar ; template class CMvmPar ; template class CMvmPar ; template class CSlvPar ; template class CSlvPar ; template class CSlvPar ; template class CSlvPar ; template class CSolver ; template class CSolver ; template class CSolver ; template class CSolver ; //////////////////////////////////////////////////////////////////////////////////////////////// //...reordering algorithms (reverse Cuthill-Mckee ordering); void genrcm(int n, int * xadj, int * iadj, int * perm, int * xls, int * mask); void subrcm(int * xadj, int * iadj, int * mask, int nsubg, int * subg, int * perm, int * xls, int n); ///////////////////////// //...parameters: // n - the dimension of the matrix; // xadj, iadj - the matrix structure: xadj[n+1], iadj[*]; information about row i is stored // in xadj[i-1] -- xadj[i]-1 of the the adjacency structure iadj[*]; // for each row, it contains the column indices of the nonzero entries; // perm[n] - contains the rcm ordering; // mask[n] - marks variables that have been numbered (working array); // xls[n+1] - the index vector for a level structure; the level structure is stored // in the currently unused spaces in the permutation vector perm; // nsubg - the size of the subgraph; // subg[n] - contains the nodes in subgraph (which may be disconnected); ///////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////// //...generates the connected level structure rooted at a given node; void rootls(int root, int * xadj, int * iadj, int * mask, int & nlvl, int * xls, int * ls, int) { int i, iccsze, j, jstop, jstrt, lbegin, lvlend, lvsize, nbr, node; mask[root-1] = 0; ls[0] = root; nlvl = 0; lvlend = 0; iccsze = 1; do { lbegin = lvlend + 1; lvlend = iccsze; xls[nlvl++] = lbegin; ///////////////////////////////////////////////////////////////////////////////////////////// //...Generate the next level by finding all the masked neighbors of nodes in the current level; for (i = lbegin; i <= lvlend; i++) { jstrt = xadj[(node = ls[i-1])-1]; jstop = xadj[ node]-1; for (j = jstrt; j <= jstop; j++) if (mask[(nbr = iadj[j-1])-1] != 0) { ls[iccsze++] = nbr; mask[nbr-1] = 0; } } } while ((lvsize = iccsze-lvlend) > 0); //////////////////////////////////////////////////////////// //...Reset MASK to one for the nodes in the level structure; for (xls[nlvl] = lvlend+1, i = 1; i <= iccsze; i++) mask[(node = ls[i-1])-1] = 1; } /////////////////////////////////// //...finds pseudo-peripheral nodes; void fnroot(int & root, int * xadj, int * iadj, int * mask, int & nlvl, int * xls, int * ls, int n) { int iccsze, j, jstrt, k, kstop, kstrt, mindeg, nabor, ndeg, node, nunlvl; rootls(root, xadj, iadj, mask, nlvl, xls, ls, n); if (nlvl == 1 || nlvl == (iccsze = xls[nlvl]-1)) return; do { mindeg = iccsze; root = ls[(jstrt = xls[nlvl-1])-1]; if (iccsze > jstrt) { for (j = jstrt; j <= iccsze; j++) { ndeg = 0; kstrt = xadj[(node = ls[j-1])-1]; kstop = xadj[ node]-1; for (k = kstrt; k <= kstop; k++) if (mask[(nabor = iadj[k-1])-1] > 0 ) ndeg++; if (ndeg < mindeg) { root = node; mindeg = ndeg; } } } rootls (root, xadj, iadj, mask, nunlvl, xls, ls, n); if (nunlvl <= nlvl) return; } while ((nlvl = nunlvl) < iccsze); } ////////////////////////////////////////////////////////////////// //...computes the degrees of the nodes in the connected component; void degree (int root, int * xadj, int * iadj, int * mask, int * deg, int & iccsze, int * ls, int) { int i, ideg, j, jstop, jstrt, lbegin, lvlend, lvsize, nbr, node; ls[0] = root; lvlend = 0; iccsze = 1; xadj[root-1] = -xadj[root-1]; do { lbegin = lvlend+1; lvlend = iccsze; for (i = lbegin; i <= lvlend; i++) { jstrt = -xadj[(node = ls[i-1])-1]; // jstop = abs(xadj[node])-1; jstop = xadj[node]; if (jstop < 0) jstop = -jstop; jstop--; ideg = 0; for (j = jstrt; j <= jstop; j++) if (mask[(nbr = iadj[j-1])-1] != 0) { ideg = ideg+1; if (xadj[nbr-1] >= 0) { xadj[nbr-1] = -xadj[nbr-1]; ls[iccsze++] = nbr; } } deg[node-1] = ideg; } } while ((lvsize = iccsze - lvlend) > 0); /////////////////////////////////////////////// //...Reset XADJ to its correct sign and return; for (i = 1; i <= iccsze; i++) { node = ls[i-1]; xadj[node-1] = -xadj[node-1]; } } //////////////////////////////////////////////// //...reverses the elements of an integer vector; #define iSWAP(A, B) { int iSWAP_temp = (A); (A) = (B); (B) = iSWAP_temp; } void ivec_reverse (int n, int * a) { int m, i; for (m = n/2, i = 1; i <= m; i++) iSWAP(a[i-1], a[n-i]); } #undef iSWAP ///////////////////////////////////////////////////////////////////////////// //...numbers a connected component using the reverse Cuthill McKee algorithm; void rcm(int root, int * xadj, int * iadj, int * mask, int * perm, int & iccsze, int * deg, int n) { int fnbr, i, j, jstop, jstrt, k, l, lbegin, lnbr, lperm, lvlend, nbr, node; degree (root, xadj, iadj, mask, deg, iccsze, perm, n); mask[root-1] = 0; if ( iccsze <= 1) return; lvlend = 0; lnbr = 1; do { lbegin = lvlend+1; lvlend = lnbr; for (i = lbegin; i <= lvlend; i++) { jstrt = xadj[(node = perm[i-1])-1]; jstop = xadj[ node]-1; fnbr = lnbr+1; for (j = jstrt; j <= jstop; j++) if (mask[(nbr = iadj[j-1])-1] != 0) { mask[ nbr-1] = 0; perm[lnbr++] = nbr; } ///////////////////////////////////////////////////////////// //...Sort the neighbors of node in increasing order by degree; if (fnbr < lnbr) { k = fnbr; do { l = k; nbr = perm[k++]; label40: if (l > fnbr && deg[(lperm = perm[l-1])-1] > deg[nbr-1]) { perm[l--] = lperm; goto label40; } perm[l] = nbr; } while (k < lnbr); } } } while (lnbr > lvlend); //////////////////////////////////////// //...Reverse the Cuthill-McKee ordering; ivec_reverse(iccsze, perm); } ////////////////////////////////////////////////////////////////// //...finds the reverse Cuthill-Mckee ordering for a general graph; void genrcm(int n, int * xadj, int * iadj, int * perm, int * xls, int * mask) { int i, iccsze, nlvl, num, root; for ( i = 1; i <= n; i++) mask[i-1] = 1; for (num = 1, i = 1; i <= n; i++) if (mask[i-1] != 0) { fnroot(root = i, xadj, iadj, mask, nlvl, xls, perm+num-1, n); rcm (root, xadj, iadj, mask, perm+num-1, iccsze, xls, n); if ((num += iccsze) > n) return; } } /////////////////////////////////////////////////////////////////// //...finds the reverse Cuthill-Mckee ordering for a given subgraph; void subrcm(int * xadj, int * iadj, int * mask, int nsubg, int * subg, int * perm, int * xls, int n) { int i, iccsze, nlvl, node, num; for (i = 1; i <= nsubg; i++) mask[(node = subg[i-1])-1] = 1; for (num = 0, i = 1; i <= nsubg; i++) if (mask[(node = subg[i-1])-1] > 0 ) { fnroot(node, xadj, iadj, mask, nlvl, xls, perm+num, n); rcm (node, xadj, iadj, mask, perm+num, iccsze, xls, n); if ((num += iccsze) >= nsubg ) return; } } // Barrier synchronization of the CPUs //======================================================================================== void CExchange::Synchronize (void *_comm) { MPI_Comm *pcomm = (MPI_Comm *)_comm; MPI_Barrier (*pcomm); } // Get the number of CPUs //======================================================================================== int CExchange::GetNproc (void *_comm) { MPI_Comm *pcomm = (MPI_Comm *)_comm; int nproc; MPI_Comm_size (*pcomm,&nproc); return nproc; } // Get the ID number of the CPU //======================================================================================== int CExchange::GetMyid (void *_comm) { MPI_Comm *pcomm = (MPI_Comm *)_comm; int myid; MPI_Comm_rank (*pcomm,&myid); return myid; } // Description: Asynchronous send data to the other CPU (implementation) //======================================================================================== void CExchange::ISend (void *_comm, int _rank, int _msgtag, int _length, char *_buffer, int _indrecv, void *_recv_arr) { MPI_Comm *pcomm = (MPI_Comm *)_comm; MPI_Request *p_recv_arr = (MPI_Request *)_recv_arr; int ierr = MPI_Isend (_buffer, _length, MPI_CHAR, _rank, _msgtag, *(pcomm), p_recv_arr+_indrecv); } // Description: Asynchronous receive data from the other CPU (implementation) //======================================================================================== void CExchange::IRecv (void *_comm, int _rank, int _msgtag, int _length, char *_buffer, int _indrecv, void *_recv_arr) { MPI_Comm *pcomm = (MPI_Comm *)_comm; MPI_Request *p_recv_arr = (MPI_Request *)_recv_arr; int ierr = MPI_Irecv (_buffer, _length, MPI_CHAR, _rank, _msgtag, *(pcomm), p_recv_arr+_indrecv); } // Description: Allocate send/receive requests (implementation) //======================================================================================== void CExchange::AllocateRecvs (int _nrecv, void *&_recvarr) { MPI_Request *p_recv_arr = new MPI_Request [_nrecv]; _recvarr = (void *)p_recv_arr; } // Description: Delete send/receive requests (implementation) //======================================================================================== void CExchange::DeleteRecvs (void *_recvarr) { MPI_Request *p_recv_arr = (MPI_Request *)_recvarr; delete [] p_recv_arr; } // Description: Allocate statuses (implementation) //======================================================================================== void CExchange::AllocateStats (int _nstat, void *&_statarr) { MPI_Status *p_stat_arr = new MPI_Status [_nstat]; _statarr = (void *)p_stat_arr; } // Description: Delete statuses (implementation) //======================================================================================== void CExchange::DeleteStats (void *_statarr) { MPI_Status *p_stat_arr = (MPI_Status *)_statarr; delete [] p_stat_arr; } // Description: Wait for completion of exchanges (implementation) //======================================================================================== void CExchange::WaitAll (int _count, void *_recvarr, void *_statarr) { MPI_Request *p_recv_arr = (MPI_Request *)_recvarr; MPI_Status *p_stat_arr = (MPI_Status *)_statarr; int ierr = MPI_Waitall (_count, p_recv_arr, p_stat_arr); } //======================================================================================== void CExchange::ExchangeArray (void *_comm, char _Datatype, char _Operation, int _Length, void *_Array) { static int maxbufsize = 2408*1024; MPI_Comm *pcomm = (MPI_Comm *)_comm; if (_Length < maxbufsize) { CExchange::ExchangeArray_impl(_comm, _Datatype, _Operation, _Length, _Array); } else { int myidloc; MPI_Comm_rank (*pcomm,&myidloc); char *parr = (char *)_Array; int iscale = 0; if (_Datatype == 'C') iscale = sizeof(char); if (_Datatype == 'I') iscale = sizeof(int); if (_Datatype == 'L') iscale = sizeof(long long); if (_Datatype == 'F') iscale = sizeof(float); if (_Datatype == 'D') iscale = sizeof(double); int ibeg, iend, ni; iend = -1; while (iend < _Length-1) { ibeg = iend+1; iend = ibeg+maxbufsize-1; if (iend > _Length-1) iend = _Length-1; ni = iend-ibeg+1; CExchange::ExchangeArray_impl (_comm, _Datatype, _Operation, ni, (void *)(parr+ibeg*iscale)); } } } //======================================================================================== void OpCharAdd (void *in, void *inout, int *len, MPI_Datatype * ) { char *inL = (char*)in; char *inoutL = (char*)inout; int i; char c; for (i=0; i< *len; ++i) { c = *inoutL+*inL; *inoutL = c; inL++; inoutL++; } }; //======================================================================================== void OpCharMaximum (void *in, void *inout, int *len, MPI_Datatype *dptr ) { char *inL = (char*)in; char *inoutL = (char*)inout; int i; char c; for (i=0; i< *len; ++i) { c = *inoutL>*inL ? *inoutL : *inL; *inoutL = c; inL++; inoutL++; } }; //======================================================================================== void OpCharMinimum (void *in, void *inout, int *len, MPI_Datatype *dptr ) { char *inL = (char*)in; char *inoutL = (char*)inout; int i; char c; for (i=0; i< *len; ++i) { c = *inoutL<*inL ? *inoutL : *inL; *inoutL = c; inL++; inoutL++; } }; //======================================================================================== void OpLongLongAdd (void *in, void *inout, int *len, MPI_Datatype *dptr ) { long long *inL = (long long*)in; long long *inoutL = (long long*)inout; int i; long long c; for (i=0; i< *len; ++i) { c = *inoutL+*inL; *inoutL = c; inL++; inoutL++; } }; // Collective array operation //======================================================================================== void CExchange::ExchangeArray_impl (void *_comm, char _Datatype, char _Operation, int _Length, void *_Array) { MPI_Comm *pcomm = (MPI_Comm *)_comm; int nprocloc; MPI_Comm_size (*pcomm,&nprocloc); MPI_Datatype datatype; MPI_Op op; int size_of_data = 0; bool op_create_flag = false; if (_Operation == '+') { op = MPI_SUM; } else if (_Operation == 'M') { op = MPI_MAX; } else if (_Operation == 'm') { op = MPI_MIN; } if (_Datatype == 'C') { datatype = MPI_CHAR; size_of_data = sizeof(char); if (_Operation == '+') { MPI_Op_create (OpCharAdd, true, &op); op_create_flag = true; } else if (_Operation == 'M') { MPI_Op_create (OpCharMaximum, true, &op); op_create_flag = true; } else if (_Operation == 'm') { MPI_Op_create (OpCharMinimum, true, &op); op_create_flag = true; } } else if (_Datatype == 'I') { datatype = MPI_INT; size_of_data = sizeof(int); } else if (_Datatype == 'L') { datatype = MPI_LONG_LONG_INT; size_of_data = sizeof(long long); if (_Operation == '+') { MPI_Op_create (OpLongLongAdd, true, &op); op_create_flag = true; } } else if (_Datatype == 'F') { datatype = MPI_FLOAT; size_of_data = sizeof(float); } else if (_Datatype == 'D') { datatype = MPI_DOUBLE; size_of_data = sizeof(double); } if (nprocloc != 1) { void* buffer = new char[_Length*size_of_data]; MPI_Allreduce(_Array, buffer, _Length, datatype, op, *(pcomm)); memcpy(_Array, buffer, _Length*size_of_data); delete [] (char *) buffer; } if (op_create_flag) { MPI_Op_free (&op); } } // Major data exchange function //======================================================================================== void CExchange::DataExchange (void *_comm, vector &_CpuIDSend, vector > &_ObjSend, vector &_CpuIDRecv, vector > &_ObjRecv) { int nproc = CExchange::GetNproc(_comm); if (nproc == 1) { _CpuIDRecv.swap (_CpuIDSend); _ObjRecv.swap (_ObjSend); } else { CExchange::DataExchange_impl (_comm, _CpuIDSend, _ObjSend, _CpuIDRecv, _ObjRecv); } } // Description: Packed data exchange (implementation) //======================================================================================== void CExchange::DataExchange_impl (void *_comm, vector &_CpuIDSend, vector > &_ObjSend, vector &_CpuIDRecv, vector > &_ObjRecv) { // Exchange the data int NObjSend = (int)_CpuIDSend.size(); vector CpuIDSend (NObjSend); vector ObjSizeSend (NObjSend); vector ObjSend (NObjSend); int* pCpuIDSend = NULL; int* pObjSizeSend = NULL; char** pObjSend = NULL; if (NObjSend > 0) { pCpuIDSend = &CpuIDSend[0]; pObjSizeSend = &ObjSizeSend[0]; pObjSend = &ObjSend[0]; } int *p_data_send_cpuid = NULL; vector *p_data_send_ch = NULL; if (NObjSend > 0) { p_data_send_cpuid = &_CpuIDSend[0]; p_data_send_ch = &_ObjSend[0]; } int i; for (i=0;i *p_data_recv_ch = NULL; if (NObjRecv > 0) { p_data_recv_cpuid = &_CpuIDRecv[0]; p_data_recv_ch = &_ObjRecv[0]; } char *pwork; for (i = 0; i < NObjRecv; i++) { p_data_recv_cpuid[i] = CpuIDRecv[i]; p_data_recv_ch[i].resize (ObjSizeRecv[i]); pwork = NULL; if (ObjSizeRecv[i] != 0) { pwork = &(p_data_recv_ch[i][0]); memcpy (pwork, ObjRecv[i], ObjSizeRecv[i]); } delete [] ObjRecv[i]; } // Free receive structures delete [] CpuIDRecv; delete [] ObjSizeRecv; delete [] ObjRecv; } // Data exchange function //======================================================================================== void CExchange::DataExchange_impl2 (void *_comm, int _NObjSend, int *_CpuIDSend, int *_ObjSizeSend, char **_ObjSend, int &_NObjRecv, int *&_CpuIDRecv, int *&_ObjSizeRecv, char **&_ObjRecv) { // Take the number of cpus and cpu id MPI_Comm *pcomm = (MPI_Comm *)_comm; int nproc; int myid; MPI_Comm_size (*pcomm,&nproc); MPI_Comm_rank (*pcomm,&myid); // Exchange send control data and transform it into receive control data CExchange::DataExchangeRequest (_comm, _NObjSend, _CpuIDSend, _ObjSizeSend, _NObjRecv, _CpuIDRecv, _ObjSizeRecv); // Create structures that support sparse search int *iasend, *jasend; int *iarecv, *jarecv; int *iptr; iasend = new int [nproc+1]; jasend = new int [_NObjSend]; iarecv = new int [nproc+1]; jarecv = new int [_NObjRecv]; iptr = new int [nproc]; int i, iproc; for (i=0;i<=nproc;i++) iasend[i] = 0; for (i=0;i<_NObjSend;i++) { iproc = _CpuIDSend[i]; iasend[iproc+1]++; } for (i=0;i isizesend) isizesend = sizesendtot[iproc]; if (sizerecvtot[iproc] > isizerecv) isizerecv = sizerecvtot[iproc]; } } size_t maxsendrecvbuf = 128*1024*1024; size_t isizemax = isizesend; if (isizerecv > isizemax) isizemax = isizerecv; size_t ncycle_send = 1; if (isizemax > maxsendrecvbuf) ncycle_send = (isizemax+maxsendrecvbuf-1) / maxsendrecvbuf; char *objsendbuf; char *objrecvbuf; objsendbuf = new char [isizesend]; objrecvbuf = new char [isizerecv]; // Main exchange cycles int iproc_send, iproc_recv; MPI_Status stat; for (k=1;k 0 || isizerecv > 0) { size_t iend_send = -1; size_t iend_recv = -1; for (int icycle=0;icycle= isizesend) iend_send = isizesend-1; if (iend_recv >= isizerecv) iend_recv = isizerecv-1; size_t ni_send = iend_send-ibeg_send+1; size_t ni_recv = iend_recv-ibeg_recv+1; // if (ni_send > 0 || ni_recv > 0) { MPI_Sendrecv (objsendbuf+ibeg_send, (int)(ni_send*sizeof(char)), MPI_CHAR, iproc_send, iproc_send, objrecvbuf+ibeg_recv, (int)(ni_recv*sizeof(char)), MPI_CHAR, iproc_recv, myid, *(pcomm), &stat); // } } // } isizerecv = 0; for (j=iarecv[iproc_recv];j 1) { CExchange::ExchangeArray (_comm, 'I', '+', nproc+1, cpu2obj); } for (i=0;i 1) { CExchange::ExchangeArray (_comm, 'I', '+', nobjtot, cpuidtot); CExchange::ExchangeArray (_comm, 'I', '+', nobjtot, objsizetot); } // Prepare the final data _NObjRecv = 0; for (i=0;i