solver_bcgsl.hpp 33.1 KB
Newer Older
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1

Kirill Terekhov's avatar
Kirill Terekhov committed
2
3
4
#ifndef __SOLVER_BCGS__
#define __SOLVER_BCGS__

Kirill Terekhov's avatar
Kirill Terekhov committed
5
6
7
8
//\todo
// 1. comply solvers with Method prototype, after TODO in solver_prototypes.hpp is done
// 2. Implement tricks from Read/solver/bcgsl/download.pdf with convex update and true residual correction
// 3. Detect numerical accuracy breakdown - when preconditioned residual is too far from true residual (probably 2 will fix).
Kirill Terekhov's avatar
Kirill Terekhov committed
9
10
11

#include "inmost_solver.h"

Kirill Terekhov's avatar
Kirill Terekhov committed
12
13
14
#define PSEUDOINVERSE  // same trick as in petsc with pseudoinverse
//#define USE_LAPACK_SVD // use lapack's dgesvd routine instead of built-in svdnxn

Kirill Terekhov's avatar
Kirill Terekhov committed
15
//#if !defined(NDEBUG)
Kirill Terekhov's avatar
Kirill Terekhov committed
16
#define REPORT_RESIDUAL
Kirill Terekhov's avatar
Kirill Terekhov committed
17
//#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
18
19
20

namespace INMOST
{
Kirill Terekhov's avatar
Kirill Terekhov committed
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
	//lapack svd
#if defined(PSEUDOINVERSE)
#if defined(USE_LAPACK_SVD)
	extern "C"
	{
		void dgesvd_(char*,char*,int*,int*,double*,int*,double*,double*,int*,double*,int*,double*,int*,int*);
	}
#else // SVD adopted from http://stackoverflow.com/questions/3856072/svd-implementation-c answer by Dhairya Malhotra
	void GivensL(INMOST_DATA_REAL_TYPE * S, const int N, int m, INMOST_DATA_REAL_TYPE a, INMOST_DATA_REAL_TYPE b)
	{
		INMOST_DATA_REAL_TYPE r = sqrt(a*a+b*b);
		INMOST_DATA_REAL_TYPE c = a/r;
		INMOST_DATA_REAL_TYPE s = -b/r;
		for(int i=0;i<N;i++)
		{
			INMOST_DATA_REAL_TYPE S0 = S[(m+0)*N+i];
			INMOST_DATA_REAL_TYPE S1 = S[(m+1)*N+i];
			S[(m+0)*N + i] += S0*(c-1);
			S[(m+0)*N + i] += S1*( -s);
			S[(m+1)*N + i] += S0*(s  );
			S[(m+1)*N + i] += S1*(c-1);
		}
	}

	void GivensR(INMOST_DATA_REAL_TYPE * S, const int N, int m, INMOST_DATA_REAL_TYPE a, INMOST_DATA_REAL_TYPE b)
Kirill Terekhov's avatar
Kirill Terekhov committed
46
	{
Kirill Terekhov's avatar
Kirill Terekhov committed
47
48
49
50
		INMOST_DATA_REAL_TYPE r = sqrt(a*a+b*b);
		INMOST_DATA_REAL_TYPE c = a/r;
		INMOST_DATA_REAL_TYPE s = -b/r;
		for(int i=0;i<N;i++)
Kirill Terekhov's avatar
Kirill Terekhov committed
51
		{
Kirill Terekhov's avatar
Kirill Terekhov committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
			INMOST_DATA_REAL_TYPE S0 = S[i*N+(m+0)];
			INMOST_DATA_REAL_TYPE S1 = S[i*N+(m+1)];
			S[i*N+(m+0)] += S0*(c-1);
			S[i*N+(m+0)] += S1*( -s);
			S[i*N+(m+1)] += S0*(s  );
			S[i*N+(m+1)] += S1*(c-1);
		}
	}

	void svdnxn(INMOST_DATA_REAL_TYPE * A, INMOST_DATA_REAL_TYPE * U, INMOST_DATA_REAL_TYPE * S,  INMOST_DATA_REAL_TYPE * V, const int N)
	{
		memset(S,0,sizeof(INMOST_DATA_REAL_TYPE)*N*N);
		memset(U,0,sizeof(INMOST_DATA_REAL_TYPE)*N*N);
		memset(V,0,sizeof(INMOST_DATA_REAL_TYPE)*N*N);
		for(int i=0;i<N;i++)
		{
			for(int j=0;j<N;j++)
				S[i*N+j]=A[i*N+j];
			U[i*N+i] = 1;
			V[i*N+i] = 1;
		}
		INMOST_DATA_REAL_TYPE eps = -1;
		{ // Bi-diagonalization
			std::vector<INMOST_DATA_REAL_TYPE> house_vec(N);
			for(int i=0;i<N;i++)
Kirill Terekhov's avatar
Kirill Terekhov committed
77
			{
Kirill Terekhov's avatar
Kirill Terekhov committed
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
				// Column Householder
				{
					INMOST_DATA_REAL_TYPE x1= S[i*N+i];
					if(x1<0) x1=-x1;

					INMOST_DATA_REAL_TYPE x_inv_norm=0;
					for(int j=i;j<N;j++) x_inv_norm+= S[j*N+i]*S[j*N+i];
					x_inv_norm=1/sqrt(x_inv_norm);

					INMOST_DATA_REAL_TYPE alpha=sqrt(1+x1*x_inv_norm);
					INMOST_DATA_REAL_TYPE beta=x_inv_norm/alpha;

					house_vec[i]=-alpha;
					for(int j=i+1;j<N;j++) house_vec[j]=-beta*S[j*N+i];
					if(S[i*N+i]<0) for(int j=i+1;j<N;j++) house_vec[j]=-house_vec[j];
				}
				
				for(int k=i;k<N;k++)
				{
					INMOST_DATA_REAL_TYPE dot_prod=0;
					for(int j=i;j<N;j++) dot_prod+=S[j*N+k]*house_vec[j];
					
					for(int j=i;j<N;j++) S[j*N+k]-=dot_prod*house_vec[j];
				}
				
				for(int k=0;k<N;k++)
Kirill Terekhov's avatar
Kirill Terekhov committed
104
				{
Kirill Terekhov's avatar
Kirill Terekhov committed
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
					INMOST_DATA_REAL_TYPE dot_prod=0;
					for(int j=i;j<N;j++) dot_prod+=U[k*N+j]*house_vec[j];
					for(int j=i;j<N;j++) U[k*N+j]-=dot_prod*house_vec[j];
				}

				// Row Householder
				if(i>=N-1) continue;

				{
					INMOST_DATA_REAL_TYPE x1=S[i*N+(i+1)];
					if(x1<0) x1=-x1;

					INMOST_DATA_REAL_TYPE x_inv_norm=0;
					for(int j=i+1;j<N;j++) x_inv_norm+=S[i*N+j]*S[i*N+j];
					x_inv_norm=1/sqrt(x_inv_norm);

					INMOST_DATA_REAL_TYPE alpha=sqrt(1+x1*x_inv_norm);
					INMOST_DATA_REAL_TYPE beta=x_inv_norm/alpha;

					house_vec[i+1]=-alpha;
					for(int j=i+2;j<N;j++) house_vec[j]=-beta*S[i*N+j];
					if(S[i*N+(i+1)]<0) for(int j=i+2;j<N;j++) house_vec[j]=-house_vec[j];
				}
				
				for(int k=i;k<N;k++)
				{
					INMOST_DATA_REAL_TYPE dot_prod=0;
					for(int j=i+1;j<N;j++) dot_prod+=S[k*N+j]*house_vec[j];
					for(int j=i+1;j<N;j++) S[k*N+j] -= dot_prod*house_vec[j];
				}
				
				for(int k=0;k<N;k++)
				{
					INMOST_DATA_REAL_TYPE dot_prod=0;
					for(int j=i+1;j<N;j++) dot_prod+=V[j*N+k]*house_vec[j];
					for(int j=i+1;j<N;j++) V[j*N+k]-=dot_prod*house_vec[j];
Kirill Terekhov's avatar
Kirill Terekhov committed
141
142
143
				}
			}
		}
Kirill Terekhov's avatar
Kirill Terekhov committed
144
145
146

		int k0=0;
		if(eps<0)
Kirill Terekhov's avatar
Kirill Terekhov committed
147
		{
Kirill Terekhov's avatar
Kirill Terekhov committed
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
			eps=1.0;
			while(eps+(INMOST_DATA_REAL_TYPE)1.0>1.0) eps*=0.5;
			eps*=64.0;
		}
		while(k0<N-1)
		{ // Diagonalization
			INMOST_DATA_REAL_TYPE S_max=0.0;
			for(int i=0;i<N;i++) S_max=(S_max > S[i*N+i] ? S_max : S[i*N+i]);
			while(k0<N-1 && fabs(S[k0*N+(k0+1)])<=eps*S_max) k0++;
			int k=k0;
			int n=k0+1;
			while(n<N && fabs(S[(n-1)*N+n])>eps*S_max) n++;

			INMOST_DATA_REAL_TYPE mu=0;
			{ // Compute mu
				INMOST_DATA_REAL_TYPE C[2][2];
				C[0][0]=S[(n-2)*N+(n-2)]*S[(n-2)*N+(n-2)]+S[(n-3)*N+(n-2)]*S[(n-3)*N+(n-2)]; 
				C[0][1]=S[(n-2)*N+(n-2)]*S[(n-2)*N+(n-1)];
				C[1][0]=S[(n-2)*N+(n-2)]*S[(n-2)*N+(n-1)]; 
				C[1][1]=S[(n-1)*N+(n-1)]*S[(n-1)*N+(n-1)]+S[(n-2)*N+(n-1)]*S[(n-2)*N+(n-1)];
				INMOST_DATA_REAL_TYPE b =-(C[0][0]+C[1][1])/2;
				INMOST_DATA_REAL_TYPE c =  C[0][0]*C[1][1] - C[0][1]*C[1][0];
				INMOST_DATA_REAL_TYPE d = sqrt(b*b-c);
				INMOST_DATA_REAL_TYPE lambda1 = -b+d;
				INMOST_DATA_REAL_TYPE lambda2 = -b-d;
				INMOST_DATA_REAL_TYPE d1 = lambda1-C[1][1]; d1 = (d1<0?-d1:d1);
				INMOST_DATA_REAL_TYPE d2 = lambda2-C[1][1]; d2 = (d2<0?-d2:d2);
				mu = (d1<d2?lambda1:lambda2);
Kirill Terekhov's avatar
Kirill Terekhov committed
176
			}
Kirill Terekhov's avatar
Kirill Terekhov committed
177
178
179
180
181

			INMOST_DATA_REAL_TYPE alpha = S[k*N+k] * S[k*N+k] - mu;
			INMOST_DATA_REAL_TYPE beta  = S[k*N+k] * S[k*N+(k+1)];

			for(;k<N-1;k++)
Kirill Terekhov's avatar
Kirill Terekhov committed
182
			{
Kirill Terekhov's avatar
Kirill Terekhov committed
183
184
185
186
187
188
189
190
191
192
				GivensR(S,N,k,alpha,beta);
				GivensL(V,N,k,alpha,beta);

				alpha = S[k*N+k];
				beta  = S[(k+1)*N+k];
				GivensL(S,N,k,alpha,beta);
				GivensR(U,N,k,alpha,beta);

				alpha = S[k*N+(k+1)];
				beta  = S[k*N+(k+2)];
Kirill Terekhov's avatar
Kirill Terekhov committed
193
194
			}
		}
Kirill Terekhov's avatar
Kirill Terekhov committed
195
196
		
		for(int i=0;i<N;i++)
Kirill Terekhov's avatar
Kirill Terekhov committed
197
		{
Kirill Terekhov's avatar
Kirill Terekhov committed
198
199
			INMOST_DATA_REAL_TYPE temp;
			for(int j=i+1;j<N;j++)
Kirill Terekhov's avatar
Kirill Terekhov committed
200
			{
Kirill Terekhov's avatar
Kirill Terekhov committed
201
202
203
204
205
206
207
				temp = U[i+N*j];
				U[i+N*j] = U[j+N*i];
				U[j+N*i] = temp;

				temp = V[i+N*j];
				V[i+N*j] = V[j+N*i];
				V[j+N*i] = temp;
Kirill Terekhov's avatar
Kirill Terekhov committed
208
209
210
			}
		}
		
Kirill Terekhov's avatar
Kirill Terekhov committed
211
212

		for(int i=0;i<N;i++) if( S[i*N+i] < 0.0 )
Kirill Terekhov's avatar
Kirill Terekhov committed
213
		{
Kirill Terekhov's avatar
Kirill Terekhov committed
214
215
216
217
218
			for(int j=0;j<N;j++)
			{
				U[j+N*i] *= -1;
			}
			S[i*N+i] *= -1;
Kirill Terekhov's avatar
Kirill Terekhov committed
219
		}
Kirill Terekhov's avatar
Kirill Terekhov committed
220
		
Kirill Terekhov's avatar
Kirill Terekhov committed
221
	}
Kirill Terekhov's avatar
Kirill Terekhov committed
222
223
224
225
226
227
228
229
#endif //USE_LAPACK_SVD
#endif //PSEUDOINVERSE
	int solvenxn(INMOST_DATA_REAL_TYPE * A, INMOST_DATA_REAL_TYPE * x, INMOST_DATA_REAL_TYPE * b, int n, int * order)
	{
		INMOST_DATA_REAL_TYPE temp, max;
		int temp2;
		for(int i = 0; i < n; i++) order[i] = i;
		for(int i = 0; i < n; i++)
Kirill Terekhov's avatar
Kirill Terekhov committed
230
		{
Kirill Terekhov's avatar
Kirill Terekhov committed
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
			int maxk = i, maxq = i;
			max = fabs(A[maxk*n+maxq]);
			//Find best pivot
			for(int q = i; q < n; q++) // over columns
			{
				for(int k = i; k < n; k++) // over rows
				{
					if( fabs(A[k*n+q]) > max )
					{
						max = fabs(A[k*n+q]);
						maxk = k;
						maxq = q;
					}
				}
			}
			//Exchange rows
			if( maxk != i ) 
			{
				for(int q = 0; q < n; q++)
				{
					temp = A[maxk*n+q];
					A[maxk*n+q] = A[i*n+q];
					A[i*n+q] = temp;
				}
				//exchange rhs
				{
					temp = b[maxk];
					b[maxk] = b[i];
					b[i] = temp;
				}
			}
			//Exchange columns
			if( maxq != i ) 
			{
				for(int k = 0; k < n; k++)
				{
					temp = A[k*n+maxq];
					A[k*n+maxq] = A[k*n+i];
					A[k*n+i] = temp;
				}
				//remember order in sol
				{
					temp2 = order[maxq];
					order[maxq] = order[i];
					order[i] = temp2;
				}
			}
			if( fabs(b[i]/A[i*n+i]) > 1.0e+100 )
				return i+1;
		
			for(int k = i+1; k < n; k++)
			{
				A[i*n+k] /= A[i*n+i];
				A[k*n+i] /= A[i*n+i];
			}
			for(int k = i+1; k < n; k++)
			for(int q = i+1; q < n; q++)
			{
				A[k*n+q] -= A[k*n+i] * A[i*n+i] * A[i*n+q];
			}
			for(int j = i+1; j < n; j++) //iterate over columns of L
			{
				b[j] -= b[i] * A[j*n+i];
			}
			b[i] /= A[i*n+i];
Kirill Terekhov's avatar
Kirill Terekhov committed
296
		}
Kirill Terekhov's avatar
Kirill Terekhov committed
297
298
299
300
301
302
303
304

		for(int i = n-1; i >= 0; i--) //iterate over rows of U
			for(int j = i+1; j < n; j++) 
			{
				b[i] -= b[j] * A[i*n+j];
			}
		for(int i = 0; i < n; i++)
			x[order[i]] = b[i];
Kirill Terekhov's avatar
Kirill Terekhov committed
305
	
Kirill Terekhov's avatar
Kirill Terekhov committed
306
307
		return 0;
	}
Kirill Terekhov's avatar
Kirill Terekhov committed
308

Kirill Terekhov's avatar
Kirill Terekhov committed
309
310
311
312
313
314
	class BCGSL_solver : public IterativeMethod
	{
		INMOST_DATA_REAL_TYPE rtol, atol, divtol, last_resid;
		INMOST_DATA_ENUM_TYPE iters, maxits, l, last_it;
		INMOST_DATA_REAL_TYPE resid;
		INMOST_DATA_REAL_TYPE * tau, * sigma, * gamma, *theta1, * theta2, * theta3;
Kirill Terekhov's avatar
Kirill Terekhov committed
315
		Solver::Vector r_tilde, x0, t, * u, * r;
Kirill Terekhov's avatar
Kirill Terekhov committed
316
317
		Solver::Matrix * Alink;
		Method * prec;
Kirill Terekhov's avatar
Kirill Terekhov committed
318
		std::string reason;
Kirill Terekhov's avatar
Kirill Terekhov committed
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
		Solver::OrderInfo * info;
		bool init;
	public:
		INMOST_DATA_ENUM_TYPE GetIterations() {return last_it;}
		INMOST_DATA_REAL_TYPE GetResidual() {return last_resid;}
		INMOST_DATA_REAL_TYPE & RealParameter(std::string name)
		{
			if (name[0] == ':')
			{
				if (prec != NULL) return prec->RealParameter(name.substr(1, name.size() - 1));
			}
			if (name == "rtol") return rtol;
			else if (name == "atol") return atol;
			else if (name == "divtol") return divtol;
			else if (prec != NULL) return prec->RealParameter(name);
			throw - 1;
		}
		INMOST_DATA_ENUM_TYPE & EnumParameter(std::string name)
		{
			if (name[0] == ':')
			{
				if (prec != NULL) return prec->EnumParameter(name.substr(1, name.size() - 1));
			}
			if (name == "maxits") return maxits;
343
344
345
346
347
			else if (name == "levels") 
			{
				if( init ) throw - 1; //solver was already initialized, value should not be changed
				return l;
			}
Kirill Terekhov's avatar
Kirill Terekhov committed
348
349
350
351
352
353
354
355
356
357
358
359
360
			else if (prec != NULL) return prec->EnumParameter(name);
			throw - 1;
		}
		BCGSL_solver(Method * prec, Solver::OrderInfo & info)
			:rtol(1e-8), atol(1e-9), divtol(1e+40), maxits(1500),l(2),prec(prec),info(&info)
		{
			Alink = NULL;
			init = false;
		}
		bool Initialize()
		{
			if (isInitialized()) Finalize();
			if (prec != NULL && !prec->isInitialized()) prec->Initialize();
Kirill Terekhov's avatar
Kirill Terekhov committed
361
362
			info->PrepareVector(r_tilde);
			info->PrepareVector(x0);
Kirill Terekhov's avatar
Kirill Terekhov committed
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
			info->PrepareVector(t);
			tau = new INMOST_DATA_REAL_TYPE[l * 5 + l*l];
			sigma = tau + l*l;
			gamma = sigma + l;
			theta1 = gamma + l;
			theta2 = theta1 + l;
			theta3 = theta2 + l;
			u = new Solver::Vector[l * 2 + 2];
			r = u + l + 1;
			for (INMOST_DATA_ENUM_TYPE k = 0; k < l + 1; k++)
			{
				info->PrepareVector(r[k]);
				info->PrepareVector(u[k]);
			}
			init = true;
			return true;
		}
		bool isInitialized() { return init && (prec == NULL || prec->isInitialized()); }
		bool Finalize()
		{
			if (isInitialized())
			{
				if (!prec->isFinalized()) prec->Finalize();
				delete[] u;
				delete[] tau;
				init = false;
			}
			return true;
		}
		bool isFinalized() { return !init && (prec == NULL || prec->isFinalized()); }
		void Copy(const Method * other)
		{
			const BCGSL_solver * b = dynamic_cast<const BCGSL_solver *>(other);
			assert(b != NULL);
			rtol = b->rtol;
			atol = b->atol;
			divtol = b->divtol;
			last_resid = b->last_resid;
			iters = b->iters;
			maxits = b->maxits;
			l = b->l;
			last_it = b->last_it;
			resid = b->resid;
			Alink = b->Alink;
			info = b->info;
			if (init) Finalize();
			if (b->prec != NULL)
			{
				if (prec == NULL) prec = b->prec->Duplicate();
				else prec->Copy(b->prec);
			}
			if (b->init) Initialize();
		}
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
416
		BCGSL_solver(const BCGSL_solver & other) :IterativeMethod(other)
Kirill Terekhov's avatar
Kirill Terekhov committed
417
418
419
420
421
422
423
424
425
426
427
428
429
		{
			Copy(&other);
		}
		BCGSL_solver & operator =(BCGSL_solver const & other)
		{
			Copy(&other);
			return *this;
		}
		~BCGSL_solver()
		{
			if (!isFinalized()) Finalize();
			if (prec != NULL) delete prec;
		}
Kirill Terekhov's avatar
Kirill Terekhov committed
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
		void ApplyOperator(Solver::Vector & Input, Solver::Vector & Output)
		{
			if (prec != NULL) //right preconditioning here! for left preconditioner have to reverse order
			{
				prec->Solve(Input, t); 
				info->Update(t);
				Alink->MatVec(1.0,t,0,Output);
				info->Update(Output);
			}
			else
			{
				Alink->MatVec(1.0,t,0,Output);
				info->Update(Output);
			}
		}
Kirill Terekhov's avatar
Kirill Terekhov committed
445
446
447
448
		bool Solve(Solver::Vector & RHS, Solver::Vector & SOL)
		{
			assert(isInitialized());
			INMOST_DATA_ENUM_TYPE vbeg,vend, vlocbeg, vlocend;
Kirill Terekhov's avatar
Kirill Terekhov committed
449
			INMOST_DATA_REAL_TYPE rho0 = 1, rho1, alpha = 0, beta, omega = 1, eta;
Kirill Terekhov's avatar
Kirill Terekhov committed
450
			INMOST_DATA_REAL_TYPE resid0, resid, rhs_norm;//, temp[2];
Kirill Terekhov's avatar
Kirill Terekhov committed
451
452
453
			iters = 0;
			info->PrepareVector(SOL);
			info->PrepareVector(RHS);
Kirill Terekhov's avatar
Kirill Terekhov committed
454
455
			info->Update(SOL);
			info->Update(RHS);
Kirill Terekhov's avatar
Kirill Terekhov committed
456
457
458
459
			if( prec != NULL ) prec->ReplaceSOL(SOL);
			if( prec != NULL ) prec->ReplaceRHS(RHS);
			info->GetLocalRegion(info->GetRank(),vlocbeg,vlocend);
			info->GetVectorRegion(vbeg,vend);
460

Kirill Terekhov's avatar
Kirill Terekhov committed
461
			//rhs_norm = info->ScalarProd(RHS,RHS,vlocbeg,vlocend);
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
462
			rhs_norm = 1;
Kirill Terekhov's avatar
Kirill Terekhov committed
463
			//r[0] = b
Kirill Terekhov's avatar
Kirill Terekhov committed
464
			std::copy(RHS.Begin(),RHS.End(),r[0].Begin());
Kirill Terekhov's avatar
Kirill Terekhov committed
465
			{
Kirill Terekhov's avatar
Kirill Terekhov committed
466
				// r[0] = r[0] - A x
Kirill Terekhov's avatar
Kirill Terekhov committed
467
				Alink->MatVec(-1,SOL,1,r[0]); //global multiplication, r probably needs an update
Kirill Terekhov's avatar
Kirill Terekhov committed
468
				info->Update(r[0]); // r is good
469
				std::copy(SOL.Begin(),SOL.End(),x0.Begin()); //x0 = x
Kirill Terekhov's avatar
Kirill Terekhov committed
470
				std::fill(SOL.Begin(),SOL.End(),0.0); //x = 0
Kirill Terekhov's avatar
Kirill Terekhov committed
471
			}
Kirill Terekhov's avatar
Kirill Terekhov committed
472
473
474
475
476
			std::copy(r[0].Begin(),r[0].End(),r_tilde.Begin()); // r_tilde = r[0]
			std::fill(u[0].Begin(),u[0].End(),0); // u[0] = 0
			resid = info->ScalarProd(r[0],r[0],vlocbeg,vlocend); //resid = dot(r[0],r[0])
			for(INMOST_DATA_ENUM_TYPE k = vbeg; k != vend; k++) // r_tilde = r[0] / dot(r[0],r[0])
				r_tilde[k] /= resid;
477
			last_resid = resid = resid0 = sqrt(resid/rhs_norm); //resid = sqrt(dot(r[0],r[0])
Kirill Terekhov's avatar
Kirill Terekhov committed
478
479
480
481
482
483
484
			last_it = 0;
#if defined(REPORT_RESIDUAL)
			if( info->GetRank() == 0 ) 
			{
				//std::cout << "iter " << last_it << " residual " << resid << std::endl;
				//std::cout << "iter " << last_it << " resid " << resid << "\r";
				//printf("iter %3d resid %12g | %12g relative %12g | %12g\r", last_it, resid, atol, resid / resid0, rtol);
Kirill Terekhov's avatar
Kirill Terekhov committed
485
				printf("iter %3d resid %12g | %g\r", last_it, resid, atol);
Kirill Terekhov's avatar
Kirill Terekhov committed
486
487
488
				fflush(stdout);
			}
#endif
Alexander Danilov's avatar
Alexander Danilov committed
489
			INMOST_DATA_ENUM_TYPE i = 0;
Kirill Terekhov's avatar
Kirill Terekhov committed
490

Kirill Terekhov's avatar
Kirill Terekhov committed
491
492
493
494
495
			if( last_resid < atol || last_resid < rtol*resid0 ) 
			{
				reason = "initial solution satisfy tolerances";
				goto exit;
			}
Kirill Terekhov's avatar
Kirill Terekhov committed
496

Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
497
			long double tt, ts, tp;
Kirill Terekhov's avatar
Kirill Terekhov committed
498
			while( true )
Kirill Terekhov's avatar
Kirill Terekhov committed
499
500
501
502
503
504
505
			{
				ts = tp = 0;
				rho0 = -omega*rho0;
				
				tt = Timer();
				for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++)
				{
Kirill Terekhov's avatar
Kirill Terekhov committed
506
					rho1 = info->ScalarProd(r[j],r_tilde,vlocbeg,vlocend); // rho1 = dot(r[j],r_tilde)
Kirill Terekhov's avatar
Kirill Terekhov committed
507
508
					beta = alpha * (rho1/rho0);

Kirill Terekhov's avatar
Kirill Terekhov committed
509
					if( fabs(beta) > 1.0e+100 ) 
Kirill Terekhov's avatar
Kirill Terekhov committed
510
					{
Kirill Terekhov's avatar
Kirill Terekhov committed
511
						//std::cout << "alpha " << alpha << " rho1 " << rho1 << " rho0 " << rho0 << " beta " << beta << std::endl;
Kirill Terekhov's avatar
Kirill Terekhov committed
512
						reason = "multiplier(1) is too large";
Kirill Terekhov's avatar
Kirill Terekhov committed
513
514
						goto exit;
					}
Kirill Terekhov's avatar
Kirill Terekhov committed
515

Kirill Terekhov's avatar
Kirill Terekhov committed
516
517
518
519
520
521
					if( beta != beta )
					{
						reason = "multiplier(1) is NaN";
						goto exit;
					}

Kirill Terekhov's avatar
Kirill Terekhov committed
522
523
524
525
					rho0 = rho1;
					for(INMOST_DATA_ENUM_TYPE i = 0; i < j+1; i++)
						for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k)
							u[i][k] = r[i][k] - beta*u[i][k];
Kirill Terekhov's avatar
Kirill Terekhov committed
526
527
528

					ApplyOperator(u[j],u[j+1]); // u[j+1] = A*R*u[j]
					eta = info->ScalarProd(u[j+1],r_tilde,vlocbeg,vlocend); //eta = dot(u[j+1],r_tilde)
Kirill Terekhov's avatar
Kirill Terekhov committed
529
530
531
					
					alpha = rho0 / eta;

Kirill Terekhov's avatar
Kirill Terekhov committed
532
					if( fabs(alpha) > 1.0e+100 ) 
Kirill Terekhov's avatar
Kirill Terekhov committed
533
					{
Kirill Terekhov's avatar
Kirill Terekhov committed
534
						reason = "multiplier(2) is too large";
Kirill Terekhov's avatar
Kirill Terekhov committed
535
						goto exit;
Kirill Terekhov's avatar
Kirill Terekhov committed
536
					}
Kirill Terekhov's avatar
Kirill Terekhov committed
537
538
539
540
541
					if( alpha != alpha )
					{
						reason = "multiplier(2) is NaN";
						goto exit;
					}
Kirill Terekhov's avatar
Kirill Terekhov committed
542
543
544
545

					for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k)
						SOL[k] += alpha*u[0][k];

Kirill Terekhov's avatar
Kirill Terekhov committed
546
					for(INMOST_DATA_ENUM_TYPE i = 0; i < j+1; i++)
Kirill Terekhov's avatar
Kirill Terekhov committed
547
						for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k) //r[i] = r[i] - alpha * u[i+1]
Kirill Terekhov's avatar
Kirill Terekhov committed
548
							r[i][k] -= alpha*u[i+1][k];
Kirill Terekhov's avatar
Kirill Terekhov committed
549
550

					
551
552
					resid = info->ScalarProd(r[0],r[0],vlocbeg,vlocend); // resid = dot(r[j],r[j])
					resid = sqrt(resid/rhs_norm); // resid = sqrt(dot(r[j],r[j]))
Kirill Terekhov's avatar
Kirill Terekhov committed
553

Kirill Terekhov's avatar
Kirill Terekhov committed
554
					
Kirill Terekhov's avatar
Kirill Terekhov committed
555
					if( resid < atol || resid < rtol*resid0 ) 
Kirill Terekhov's avatar
Kirill Terekhov committed
556
					{
Kirill Terekhov's avatar
Kirill Terekhov committed
557
558
559
						reason = "early exit in bi-cg block";
						last_resid = resid;
						goto exit;
Kirill Terekhov's avatar
Kirill Terekhov committed
560
					}
Kirill Terekhov's avatar
Kirill Terekhov committed
561
562
563
564
565
					

					ApplyOperator(r[j],r[j+1]); // r[j+1] = A*R*r[j]

					
Kirill Terekhov's avatar
Kirill Terekhov committed
566
				}
Kirill Terekhov's avatar
Kirill Terekhov committed
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
				
				for(INMOST_DATA_ENUM_TYPE j = 1; j < l+1; j++)
				{
					for(INMOST_DATA_ENUM_TYPE m = 1; m < j+1; m++)
					{
						tau[(m-1) + (j-1)*l] = 0;
						for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k)
							tau[(m-1) + (j-1)*l] += r[j][k]*r[m][k];
						tau[(j-1) + (m-1)*l] = tau[(m-1) + (j-1)*l];
					}
					sigma[j-1] = 0;
					for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k)
						sigma[j-1] += r[0][k]*r[j][k];
				}
				info->Integrate(tau,l*l+l); //sigma is updated with tau
Kirill Terekhov's avatar
Kirill Terekhov committed
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649

#if defined(PSEUDOINVERSE)
				{
					int dgesvd_info = 0;


#if defined(USE_LAPACK_SVD)
					char c = 'A';
					INMOST_DATA_REAL_TYPE U[128*128], V[128*128], w[128];
					INMOST_DATA_REAL_TYPE work[5*128];
					int lwork = 5*128;
					int n = l;
					dgesvd_(&c,&c,&n,&n,tau,&n,w,U,&n,V,&n,work,&lwork,&dgesvd_info);
#else
					
					INMOST_DATA_REAL_TYPE U[128*128], V[128*128], S[128*128], w[128];
					svdnxn(tau,U,S,V,l);
					for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) w[j] = S[j*l+j];
#endif		
					/*
					printf("w ");
					for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++) printf("%20g ",w[j]);
					printf("\n");

					printf("U\n");
					for(INMOST_DATA_ENUM_TYPE j = 0; j < l*l; j++) 
					{
						printf("%20g ",U[j]);
						if( (j+1) % l == 0 ) printf("\n");
					}
					printf("\n");

					printf("VT\n");
					for(INMOST_DATA_ENUM_TYPE j = 0; j < l*l; j++) 
					{
						printf("%20g ",V[j]);
						if( (j+1) % l == 0 ) printf("\n");
					}
					printf("\n");
					*/
					if( dgesvd_info != 0 )
					{
						printf("(%s:%d) dgesvd %d\n",__FILE__,__LINE__,dgesvd_info);
						exit(-1);
					}
					
					INMOST_DATA_REAL_TYPE maxw = w[0], tol;
					for(INMOST_DATA_ENUM_TYPE j = 1; j < l; j++) if(w[j]>maxw) maxw = w[j];
					tol = l*maxw*1.0e-14;
					memset(gamma,0,sizeof(INMOST_DATA_REAL_TYPE)*l);
					for(INMOST_DATA_ENUM_TYPE j = 0; j < l; j++)
					{
						if( w[j] > tol )
						{
							INMOST_DATA_REAL_TYPE sum = 0;
							for(INMOST_DATA_ENUM_TYPE k = 0; k < l; ++k)
								sum += sigma[k]*U[j*l+k];
							for(INMOST_DATA_ENUM_TYPE k = 0; k < l; ++k)
								gamma[k] += sum/w[j]*V[k*l+j];
						}
					}
				}

				//svdnxn(tau,U,S,V,l);
				//INMOST_DATA_REAL_TYPE inv_tau[64];
				//pseudoinverse(tau,inv_tau,l);
				//matmul(inv_tau,sigma,gamma,l,l,1);
#else
Kirill Terekhov's avatar
Kirill Terekhov committed
650
651
652
653
654
655
656
657
				int order[128];
				int row = solvenxn(tau,gamma,sigma,l,order);
				if( row != 0 )
				{
					std::cout << "breakdown on row " << row << std::endl;
					reason = "breakdown in matrix inversion in polynomial part";
					break;
				}
Kirill Terekhov's avatar
Kirill Terekhov committed
658
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
659
				omega = gamma[l-1];
Kirill Terekhov's avatar
Kirill Terekhov committed
660
661
662
663
664
665
666
667
668
669
				if( fabs(omega) > 1.0e+100 )
				{
					reason = "multiplier(3) is too large";
					goto exit;
				}
				if( omega != omega )
				{
					reason = "multiplier(3) is NaN";
					goto exit;
				}
Kirill Terekhov's avatar
Kirill Terekhov committed
670
671
672
673
674
675
676
677
678
679
680
681
				for(INMOST_DATA_ENUM_TYPE j = 1; j < l+1; ++j)
				{
					for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k)
					{
						u[0][k] -= gamma[j-1]*u[j][k];
						SOL[k]  += gamma[j-1]*r[j-1][k];
						r[0][k] -= gamma[j-1]*r[j][k];
					}
				}
				
				
				/*
Kirill Terekhov's avatar
Kirill Terekhov committed
682
683
684
685
686
687
688
				for(INMOST_DATA_ENUM_TYPE j = 1; j < l+1; j++)
				{
					for(INMOST_DATA_ENUM_TYPE i = 1; i < j; i++)
					{
						tau[i-1 + (j-1)*l] = 0;
						for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k)
							tau[i-1 + (j-1)*l] += r[j][k]*r[i][k];
Kirill Terekhov's avatar
Kirill Terekhov committed
689
						info->Integrate(&tau[i-1 + (j-1)*l],1);
Kirill Terekhov's avatar
Kirill Terekhov committed
690
691
692
693
						tau[i-1 + (j-1)*l] /= sigma[i-1];
						for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k)
							r[j][k] -= tau[i-1 + (j-1)*l]*r[i][k];
					}
Kirill Terekhov's avatar
Kirill Terekhov committed
694
					INMOST_DATA_REAL_TYPE temp[2] = {0,0};
Kirill Terekhov's avatar
Kirill Terekhov committed
695
696
697
698
699
					for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k)
					{
						temp[0] += r[j][k]*r[j][k];
						temp[1] += r[0][k]*r[j][k];
					}
Kirill Terekhov's avatar
Kirill Terekhov committed
700
					info->Integrate(temp,2);
Kirill Terekhov's avatar
Kirill Terekhov committed
701
					sigma[j-1] = temp[0];//+1.0e-35; //REVIEW
Kirill Terekhov's avatar
Kirill Terekhov committed
702
703
704
705
706
					theta2[j-1] = temp[1]/sigma[j-1];
				}
				omega = theta1[l-1] = theta2[l-1];
				for(INMOST_DATA_ENUM_TYPE j = l-1; j > 0; j--)
				{
Kirill Terekhov's avatar
Kirill Terekhov committed
707
					eta = 0;
Kirill Terekhov's avatar
Kirill Terekhov committed
708
					for(INMOST_DATA_ENUM_TYPE i = j+1; i < l+1; i++)
Kirill Terekhov's avatar
Kirill Terekhov committed
709
710
						eta += tau[j-1 + (i-1)*l] * theta1[i-1];
					theta1[j-1] = theta2[j-1] - eta;
Kirill Terekhov's avatar
Kirill Terekhov committed
711
712
713
				}
				for(INMOST_DATA_ENUM_TYPE j = 1; j < l; j++)
				{
Kirill Terekhov's avatar
Kirill Terekhov committed
714
					eta = 0;
Kirill Terekhov's avatar
Kirill Terekhov committed
715
					for(INMOST_DATA_ENUM_TYPE i = j+1; i < l; i++)
Kirill Terekhov's avatar
Kirill Terekhov committed
716
717
						eta += tau[j-1 + (i-1)*l] * theta1[i];
					theta3[j-1] = theta1[j] + eta;
Kirill Terekhov's avatar
Kirill Terekhov committed
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
				}
				for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k)
				{
					SOL[k] += theta1[0]*r[0][k];
					r[0][k] -= theta2[l-1]*r[l][k];
					u[0][k] -= theta1[l-1]*u[l][k];
				}
				for(INMOST_DATA_ENUM_TYPE j = 1; j < l; j++)
				{
					for(INMOST_DATA_ENUM_TYPE k = vbeg; k < vend; ++k)
					{
						u[0][k] -= theta1[j-1]*u[j][k];
						SOL[k] += theta3[j-1]*r[j][k];
						r[0][k] -= theta2[j-1]*r[j][k];
					}
				}
Kirill Terekhov's avatar
Kirill Terekhov committed
734
				*/
Kirill Terekhov's avatar
Kirill Terekhov committed
735
736
				last_it = i+1;
				{
Kirill Terekhov's avatar
Kirill Terekhov committed
737
					resid = info->ScalarProd(r[0],r[0],vlocbeg,vlocend);
738
					resid = sqrt(resid/rhs_norm);
Kirill Terekhov's avatar
Kirill Terekhov committed
739
740
741
742
743
744
745
746
				}
				tt = Timer() - tt;
#if defined(REPORT_RESIDUAL)
				if( info->GetRank() == 0 ) 
				{
					//std::cout << "iter " << last_it << " residual " << resid << " time " << tt << " matvec " << ts*0.5/l << " precond " << tp*0.5/l << std::endl;
					//std::cout << "iter " << last_it << " resid " << resid << "\r";
					//printf("iter %3d resid %12g | %12g relative %12g | %12g\r", last_it, resid, atol, resid / resid0, rtol);
Kirill Terekhov's avatar
Kirill Terekhov committed
747
					printf("iter %3d resid %12g | %g\r", last_it, resid, atol);
Kirill Terekhov's avatar
Kirill Terekhov committed
748
749
750
751
					fflush(stdout);
				}
#endif
				last_resid = resid;
Kirill Terekhov's avatar
Kirill Terekhov committed
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
				if( resid != resid )
				{
					reason = "residual is NAN";
					break;
				}
				if( resid < atol )
				{
					reason = "converged due to absolute tolerance";
					break;
				}
				if( resid < rtol*resid0 )
				{
					reason = "converged due to relative tolerance";
					break;
				}
				if( resid > divtol )
				{
					reason = "diverged due to divergence tolerance";
					break;
				}
				if( i == maxits )
				{
					reason = "reached maximum iteration number";
					break;
				}
				i++;
Kirill Terekhov's avatar
Kirill Terekhov committed
778
			}
Kirill Terekhov's avatar
Kirill Terekhov committed
779
exit:
Kirill Terekhov's avatar
Kirill Terekhov committed
780
781
			if (prec != NULL)
			{
Kirill Terekhov's avatar
Kirill Terekhov committed
782
783
				prec->Solve(SOL, r_tilde); //undo right preconditioner
				std::copy(r_tilde.Begin(), r_tilde.End(), SOL.Begin());
Kirill Terekhov's avatar
Kirill Terekhov committed
784
			}
Kirill Terekhov's avatar
Kirill Terekhov committed
785
786
			for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k < vlocend; ++k) //undo shift
				SOL[k] += x0[k];
Kirill Terekhov's avatar
Kirill Terekhov committed
787
788
789
790
791
792
793
			//info->RestoreMatrix(A);
			info->RestoreVector(SOL);
			info->RestoreVector(RHS);
			if( last_resid < atol || last_resid < rtol*resid0 ) return true;
			return false;
		}
		bool ReplaceMAT(Solver::Matrix & A) { if (isInitialized()) Finalize(); if (prec != NULL) prec->ReplaceMAT(A);  Alink = &A; return true; }
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
794
795
		bool ReplaceRHS(Solver::Vector & RHS) { (void) RHS; return true; }
		bool ReplaceSOL(Solver::Vector & SOL) { (void) SOL; return true; }
Kirill Terekhov's avatar
Kirill Terekhov committed
796
		Method * Duplicate() { return new BCGSL_solver(*this);}
Kirill Terekhov's avatar
Kirill Terekhov committed
797
		std::string GetReason() {return reason;}
Kirill Terekhov's avatar
Kirill Terekhov committed
798
799
800
801
802
803
804
805
806
807
808
809
810
	};


	class BCGS_solver : public IterativeMethod
	{
		INMOST_DATA_REAL_TYPE rtol, atol, divtol, last_resid;
		INMOST_DATA_ENUM_TYPE iters, maxits, last_it;
		INMOST_DATA_REAL_TYPE resid;
		Solver::Vector r0, p, y, s, t, z, r, v;
		Solver::Matrix * Alink;
		Method * prec;
		Solver::OrderInfo * info;
		bool init;
Kirill Terekhov's avatar
Kirill Terekhov committed
811
		std::string reason;
Kirill Terekhov's avatar
Kirill Terekhov committed
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
	public:
		INMOST_DATA_ENUM_TYPE GetIterations() {return last_it;}
		INMOST_DATA_REAL_TYPE GetResidual() {return last_resid;}
		INMOST_DATA_REAL_TYPE & RealParameter(std::string name)
		{
			if (name[0] == ':')
			{
				if (prec != NULL) return prec->RealParameter(name.substr(1, name.size() - 1));
			}
			if (name == "rtol") return rtol;
			else if (name == "atol") return atol;
			else if (name == "divtol") return divtol;
			else if( prec != NULL ) return prec->RealParameter(name);
			throw - 1;
		}
		INMOST_DATA_ENUM_TYPE & EnumParameter(std::string name)
		{
			if (name[0] == ':')
			{
				if (prec != NULL) return prec->EnumParameter(name.substr(1, name.size() - 1));
			}
			if (name == "maxits") return maxits;
			else if (prec != NULL) return prec->EnumParameter(name);
			throw - 1;
		}
		BCGS_solver(Method * prec, Solver::OrderInfo & info)
			:rtol(1e-8), atol(1e-11), divtol(1e+40), iters(0), maxits(1500),prec(prec),info(&info)
		{
			init = false;
		}
		bool Initialize()
		{
			assert(Alink != NULL);
			if (isInitialized()) Finalize();
			if (prec != NULL && !prec->isInitialized()) prec->Initialize();
			info->PrepareVector(r);
			info->PrepareVector(v);
			info->PrepareVector(p);
			info->PrepareVector(y);
			info->PrepareVector(s);
			info->PrepareVector(t);
			info->PrepareVector(z);
			info->PrepareVector(r0);
			init = true;
			return true;
		}
		bool isInitialized() { return init && (prec == NULL || prec->isInitialized()); }
		bool Finalize()
		{
			if (prec != NULL && !prec->isFinalized()) prec->Finalize();
			init = false;
			return true;
		}
		bool isFinalized() { return !init && (prec == NULL || prec->isFinalized()); }
		void Copy(const Method * other)
		{
			const BCGS_solver * b = dynamic_cast<const BCGS_solver *>(other);
			assert(b != NULL);
			info = b->info;
			rtol = b->rtol;
			atol = b->atol;
			divtol = b->divtol;
			maxits = b->maxits;
			last_resid = b->last_resid;
			iters = b->iters;
			last_it = b->last_it;
			resid = b->resid;
			Alink = b->Alink;
			if (b->prec != NULL)
			{
				if (prec == NULL) prec = b->prec->Duplicate();
				else prec->Copy(b->prec);
			}
			if (b->init) Initialize();
		}
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
887
		BCGS_solver(const BCGS_solver & other) : IterativeMethod(other)
Kirill Terekhov's avatar
Kirill Terekhov committed
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
		{
			Copy(&other);
		}
		BCGS_solver & operator =(BCGS_solver const & other)
		{
			Copy(&other);
			return *this;
		}
		~BCGS_solver()
		{
			if (!isFinalized()) Finalize();
			if (prec != NULL) delete prec;
		}
		bool Solve(Solver::Vector & RHS, Solver::Vector & SOL)
		{
			assert(isInitialized());
Kirill Terekhov's avatar
Kirill Terekhov committed
904
			INMOST_DATA_REAL_TYPE tempa = 0.0, tempb=0.0;
Kirill Terekhov's avatar
Kirill Terekhov committed
905
			INMOST_DATA_ENUM_TYPE vbeg,vend, vlocbeg, vlocend;
Kirill Terekhov's avatar
Kirill Terekhov committed
906
			INMOST_DATA_INTEGER_TYPE ivbeg,ivend, ivlocbeg, ivlocend;
Kirill Terekhov's avatar
Kirill Terekhov committed
907
908
909
910
911
			INMOST_DATA_REAL_TYPE rho = 1, alpha = 1, beta, omega = 1;
			INMOST_DATA_REAL_TYPE resid0, resid, temp[2];
			bool is_parallel = info->GetSize() > 1;
			info->PrepareVector(SOL);
			info->PrepareVector(RHS);
Kirill Terekhov's avatar
Kirill Terekhov committed
912
913
			if( is_parallel ) info->Update(SOL);
			if( is_parallel ) info->Update(RHS);
Kirill Terekhov's avatar
Kirill Terekhov committed
914
915
916
917
			if (prec != NULL)prec->ReplaceSOL(SOL);
			if (prec != NULL)prec->ReplaceRHS(RHS);
			info->GetLocalRegion(info->GetRank(),vlocbeg,vlocend);
			info->GetVectorRegion(vbeg,vend);
Kirill Terekhov's avatar
Kirill Terekhov committed
918
			std::copy(RHS.Begin(),RHS.End(),r.Begin());
Kirill Terekhov's avatar
Kirill Terekhov committed
919
920
921
922
			{
				Alink->MatVec(-1,SOL,1,r); //global multiplication, r probably needs an update
				info->Update(r); // r is good
			}
Kirill Terekhov's avatar
Kirill Terekhov committed
923
			std::copy(r.Begin(),r.End(),r0.Begin());
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
924
925
			std::fill(v.Begin(),v.End(),0.0);
			std::fill(p.Begin(),p.End(),0.0);
Kirill Terekhov's avatar
Kirill Terekhov committed
926
927
928
929
930
931
932
933
			{
				resid = 0;
				for(INMOST_DATA_ENUM_TYPE k = vlocbeg; k != vlocend; k++) 
					resid += r[k]*r[k];
				if( is_parallel ) info->Integrate(&resid,1);
			}
			last_resid = resid = resid0 = sqrt(resid);
			last_it = 0;
Kirill Terekhov's avatar
Kirill Terekhov committed
934
935
936
937
			ivbeg = vbeg;
			ivend = vend;
			ivlocbeg = vlocbeg;
			ivlocend = vlocend;
Kirill Terekhov's avatar
Kirill Terekhov committed
938
939
940
941
942
943
#if defined(REPORT_RESIDUAL)
			if( info->GetRank() == 0 ) 
			{
				//std::cout << "iter " << last_it << " residual " << resid << std::endl;
				//std::cout << "iter " << last_it << " resid " << resid << "\r";
				//printf("iter %3d resid %12g | %12g relative %12g | %12g\r",last_it,resid,atol,resid/resid0,rtol);
Kirill Terekhov's avatar
Kirill Terekhov committed
944
				printf("iter %3d resid %12g | %g\r", last_it, resid, atol);
Kirill Terekhov's avatar
Kirill Terekhov committed
945
946
947
				fflush(stdout);
			}
#endif
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
948
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
949
#pragma omp parallel
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
950
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
951
			{
Kirill Terekhov's avatar
Kirill Terekhov committed
952
953
954
				long double tt, ts, tp, ttt;
				INMOST_DATA_ENUM_TYPE i = 0;
				while(true)
Kirill Terekhov's avatar
Kirill Terekhov committed
955
				{
Kirill Terekhov's avatar
Kirill Terekhov committed
956
957
958
					ts = tp = 0;
					tt = Timer();
					{
Kirill Terekhov's avatar
Kirill Terekhov committed
959
						/*
Kirill Terekhov's avatar
Kirill Terekhov committed
960
961
						if( fabs(rho) < 1.0e-31 )
						{
Kirill Terekhov's avatar
Kirill Terekhov committed
962
							std::cout << "rho " << rho << " alpha " << alpha << " omega " << omega << " beta " << 1.0 /rho * alpha / omega << std::endl;
Kirill Terekhov's avatar
Kirill Terekhov committed
963
964
965
966
967
							reason = "denominator(1) is zero";
							break;
						}
						if( fabs(omega) < 1.0e-31 )
						{
Kirill Terekhov's avatar
Kirill Terekhov committed
968
							std::cout << "rho " << rho << " alpha " << alpha << " omega " << omega << " beta " << 1.0 /rho * alpha / omega << std::endl;
Kirill Terekhov's avatar
Kirill Terekhov committed
969
970
971
							reason = "denominator(2) is zero";
							break;
						}
Kirill Terekhov's avatar
Kirill Terekhov committed
972
973
						*/
						//std::cout << "rho " << rho << " alpha " << alpha << " omega " << omega << " beta " << 1.0 /rho * alpha / omega << std::endl;
Kirill Terekhov's avatar
Kirill Terekhov committed
974
975
						beta = 1.0 /rho * alpha / omega;
						rho = 0;
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
976
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
977
#pragma omp for reduction(+:rho)
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
978
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
979
980
981
982
						for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; k++) 
							rho += r0[k]*r[k];
						if( is_parallel ) 
						{
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
983
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
984
#pragma omp single
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
985
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
986
987
988
							info->Integrate(&rho,1);
						}
						beta *= rho;
Kirill Terekhov's avatar
Kirill Terekhov committed
989

Kirill Terekhov's avatar
Kirill Terekhov committed
990
						if( fabs(beta) > 1.0e+100 )
Kirill Terekhov's avatar
Kirill Terekhov committed
991
992
993
994
995
						{
							//std::cout << "rho " << rho << " alpha " << alpha << " omega " << omega << " beta " << 1.0 /rho * alpha / omega << std::endl;
							reason = "multiplier(1) is too large";
							break;
						}
Kirill Terekhov's avatar
Kirill Terekhov committed
996
997
998
999
1000
						if( beta != beta )
						{
							reason = "multiplier(1) is NaN";
							break;
						}
Kirill Terekhov's avatar
Kirill Terekhov committed
1001
1002
					}
					{
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1003
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
1004
#pragma omp for
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1005
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
1006
1007
1008
1009
1010
1011
1012
						for(INMOST_DATA_INTEGER_TYPE k = ivbeg; k < ivend; ++k) 
							p[k] = r[k] + beta*(p[k] - omega*v[k]); //global indexes r, p, v
					}
					{
						ttt = Timer();
						if (prec != NULL)
						{
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1013
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
1014
#pragma omp single
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1015
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
1016
1017
1018
1019
1020
							prec->Solve(p, y);
						}
						tp += Timer() - ttt;
						if( is_parallel ) 
						{
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1021
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
1022
#pragma omp single
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1023
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
1024
1025
1026
1027
1028
1029
1030
							info->Update(y);
						}
						ttt = Timer();
						Alink->MatVec(1,y,0,v); // global multiplication, y should be updated, v probably needs an update
						ts += Timer() - ttt;
						if( is_parallel ) 
						{
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1031
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
1032
#pragma omp single
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1033
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
1034
1035
1036
1037
1038
							info->Update(v);
						}
					}
					{
						alpha = 0;
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1039
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
1040
#pragma omp for reduction(+:alpha)
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1041
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
1042
1043
1044
1045
						for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; k++)  
							alpha += r0[k]*v[k];
						if( is_parallel ) 
						{
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1046
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
1047
#pragma omp single
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1048
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
1049
1050
							info->Integrate(&alpha,1);
						}
Kirill Terekhov's avatar
Kirill Terekhov committed
1051

Kirill Terekhov's avatar
Kirill Terekhov committed
1052
1053
1054
1055
						if( alpha == 0 && rho == 0 ) 
							alpha = 0;
						else
							alpha = rho / alpha; //local indexes, r0, v
Kirill Terekhov's avatar
Kirill Terekhov committed
1056

Kirill Terekhov's avatar
Kirill Terekhov committed
1057
						if( fabs(alpha) > 1.0e+100 )
Kirill Terekhov's avatar
Kirill Terekhov committed
1058
						{
Kirill Terekhov's avatar
Kirill Terekhov committed
1059
1060
							reason = "multiplier(2) is too large";
							//std::cout << "alpha " << alpha << " rho " << rho << std::endl;
Kirill Terekhov's avatar
Kirill Terekhov committed
1061
1062
							break;
						}
Kirill Terekhov's avatar
Kirill Terekhov committed
1063
1064
1065
1066
1067
1068
						if( alpha != alpha )
						{
							reason = "multiplier(2) is NaN";
							//std::cout << "alpha " << alpha << " rho " << rho << std::endl;
							break;
						}
Kirill Terekhov's avatar
Kirill Terekhov committed
1069
						
Kirill Terekhov's avatar
Kirill Terekhov committed
1070
1071
					}
					{
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1072
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
1073
#pragma omp for
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1074
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
1075
1076
1077
						for(INMOST_DATA_INTEGER_TYPE k = ivbeg; k < ivend; ++k) 
							s[k] = r[k] - alpha * v[k]; //global indexes r, v
					}
Kirill Terekhov's avatar
Kirill Terekhov committed
1078
1079
				
					{
Kirill Terekhov's avatar
Kirill Terekhov committed
1080
1081
1082
						ttt = Timer();
						if (prec != NULL)
						{
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1083
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
1084
#pragma omp single
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1085
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
1086
1087
1088
1089
1090
							prec->Solve(s, z);
						}
						tp += Timer() - ttt;
						if( is_parallel ) 
						{
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1091
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
1092
#pragma omp single
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1093
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
1094
1095
1096
1097
1098
1099
1100
							info->Update(z);
						}
						ttt = Timer();
						Alink->MatVec(1.0,z,0,t); // global multiplication, z should be updated, t probably needs an update
						ts += Timer() - ttt;
						if( is_parallel ) 
						{
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1101
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
1102
#pragma omp single
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1103
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
1104
1105
							info->Update(t);
						}
Kirill Terekhov's avatar
Kirill Terekhov committed
1106
1107
					}
					{
Kirill Terekhov's avatar
Kirill Terekhov committed
1108
1109
						
						temp[0] = temp[1] = 0;
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1110
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
1111
#pragma omp for reduction(+:tempa) reduction(+:tempb)
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1112
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
1113
1114
1115
1116
1117
1118
1119
1120
1121
						for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; k++)
						{
							tempa += t[k]*s[k];
							tempb += t[k]*t[k];
						}
						temp[0] = tempa;
						temp[1] = tempb;
						if( is_parallel ) 
						{
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1122
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
1123
#pragma omp single
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1124
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
1125
1126
1127
							info->Integrate(temp,2);
						}
						/*
Kirill Terekhov's avatar
Kirill Terekhov committed
1128
						if (fabs(temp[1]) < 1.0e-35)
Kirill Terekhov's avatar
Kirill Terekhov committed
1129
						{
Kirill Terekhov's avatar
Kirill Terekhov committed
1130
							std::cout << "a " << temp[0] << " b " << temp[1] << " omega " << temp[0]/temp[1] << std::endl;
Kirill Terekhov's avatar
Kirill Terekhov committed
1131
1132
						}
						*/
Kirill Terekhov's avatar
Kirill Terekhov committed
1133
						//omega = temp[0] / (temp[1] + (temp[1] < 0.0 ? -1.0e-10 : 1.0e-10)); //local indexes t, s
Kirill Terekhov's avatar
Kirill Terekhov committed
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
						if( temp[0] == 0 && temp[1] == 0 )
							omega = 0;
						else
							omega = temp[0] / temp[1];

						if( fabs(omega) > 1.0e+100 )
						{
							reason = "multiplier(3) is too large";
							//std::cout << "alpha " << alpha << " rho " << rho << std::endl;
							break;
						}
						if( omega != omega )
						{
							reason = "multiplier(3) is NaN";
							//std::cout << "alpha " << alpha << " rho " << rho << std::endl;
							break;
						}
Kirill Terekhov's avatar
Kirill Terekhov committed
1151
					}
Kirill Terekhov's avatar
Kirill Terekhov committed
1152
					{
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1153
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
1154
#pragma omp for
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1155
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
1156
1157
1158
1159
						for(INMOST_DATA_INTEGER_TYPE k = ivbeg; k < ivend; ++k) 
							SOL[k] += alpha * y[k] + omega * z[k]; // global indexes SOL, y, z
					}
					{
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1160
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
1161
#pragma omp for
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1162
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
1163
1164
1165
1166
1167
1168
						for(INMOST_DATA_INTEGER_TYPE k = ivbeg; k < ivend; ++k) 
							r[k] = s[k] - omega * t[k]; // global indexes r, s, t
					}
					last_it = i+1;
					{
						resid = 0;
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1169
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
1170
#pragma omp for reduction(+:resid)
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1171
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
1172
1173
1174
1175
						for(INMOST_DATA_INTEGER_TYPE k = ivlocbeg; k < ivlocend; k++) 
							resid += r[k]*r[k];
						if( is_parallel ) 
						{
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1176
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
1177
#pragma omp single
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1178
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
1179
1180
1181
1182
1183
							info->Integrate(&resid,1);
						}
						resid = sqrt(resid);
					}
					tt = Timer() - tt;
Kirill Terekhov's avatar
Kirill Terekhov committed
1184
#if defined(REPORT_RESIDUAL)
Kirill Terekhov's avatar
Kirill Terekhov committed
1185
1186
1187
1188
1189
					if( info->GetRank() == 0 ) 
					{
						//std::cout << "iter " << last_it << " residual " << resid << " time " << tt << " matvec " << ts*0.5 << " precond " << tp*0.5 << std::endl;
						//std::cout << "iter " << last_it << " resid " << resid << "\r";
						//printf("iter %3d resid %12g | %12g relative %12g | %12g\r", last_it, resid, atol, resid / resid0, rtol);
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1190
#if defined(USE_OMP)
Kirill Terekhov's avatar
Kirill Terekhov committed
1191
#pragma omp single
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1192
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
1193
1194
1195
1196
1197
						{
							printf("iter %3d resid %12g | %g\r", last_it, resid, atol);
							fflush(stdout);
						}
					}
Kirill Terekhov's avatar
Kirill Terekhov committed
1198
#endif
Kirill Terekhov's avatar
Kirill Terekhov committed
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
					last_resid = resid;
					if( resid != resid )
					{
						reason = "residual is NAN";
						break;
					}
					if( resid > divtol )
					{
						reason = "diverged due to divergence tolerance";
						break;
					}
					if( resid < atol )
					{
						reason = "converged due to absolute tolerance";
						break;
					}
					if( resid < rtol*resid0 )
					{
						reason = "converged due to relative tolerance";
						break;
					}
					if( i == maxits )
					{
						reason = "reached maximum iteration number";
						break;
					}
					i++;
				}
Kirill Terekhov's avatar
Kirill Terekhov committed
1227
1228
1229
1230
1231
1232
1233
1234
			}
			//info->RestoreMatrix(A);
			info->RestoreVector(SOL);
			info->RestoreVector(RHS);
			if( last_resid < atol || last_resid < rtol*resid0 ) return true;
			return false;
		}
		bool ReplaceMAT(Solver::Matrix & A) { if (isInitialized()) Finalize();  if (prec != NULL) prec->ReplaceMAT(A);  Alink = &A; return true; }
Kirill Terekhov's avatar
Fixes    
Kirill Terekhov committed
1235
1236
		bool ReplaceRHS(Solver::Vector & RHS) {(void)RHS; return true; }
		bool ReplaceSOL(Solver::Vector & SOL) {(void)SOL; return true; }
Kirill Terekhov's avatar
Kirill Terekhov committed
1237
		Method * Duplicate() { return new BCGS_solver(*this);}
Kirill Terekhov's avatar
Kirill Terekhov committed
1238
		std::string GetReason() {return reason;}
Kirill Terekhov's avatar
Kirill Terekhov committed
1239
1240
1241
1242
1243
	};
}


#endif