SenK
SenK is a C++ library for high-performance linear solvers.
senk_gmres.hpp
Go to the documentation of this file.
1
7#ifndef SENK_GMRES_HPP
8#define SENK_GMRES_HPP
9
10#include "senk_sparse.hpp"
11#include "senk_blas1.hpp"
12#include "senk_blas2.hpp"
13
14namespace senk {
18namespace solver {
33template <typename T>
34void Gmresm(
35 T *val, int *cind, int *rptr,
36 T *b, T *x, T nrm_b,
37 int outer, int m, int N, T epsilon)
38{
39 T *c = new T[m];
40 T *s = new T[m];
41 T *e = new T[m+1];
42 T *H = new T[(m+1)*m];
43 T *V = new T[N*(m+1)];
44 T *y = new T[m];
45
46 int flag = 0;
47 for(int i=0; i<outer; i++) {
48 sparse::SpmvCsr<T>(val, cind, rptr, x, &V[0], N);
49 senk::blas1::Axpby<T>(1, b, -1, &V[0], N);
50 e[0] = blas1::Nrm2<T>(&V[0], N);
51 senk::blas1::Scal<T>(1/e[0], &V[0], N);
52 int j;
53 for(j=0; j<m; j++) {
54 sparse::SpmvCsr<T>(val, cind, rptr, &V[j*N], &V[(j+1)*N], N);
55 for(int k=0; k<=j; k++) {
56 H[j*(m+1)+k] = blas1::Dot<T>(&V[k*N], &V[(j+1)*N], N);
57 blas1::Axpy<T>(-H[j*(m+1)+k], &V[k*N], &V[(j+1)*N], N);
58 }
59 H[j*(m+1)+j+1] = blas1::Nrm2<T>(&V[(j+1)*N], N);
60 blas1::Scal<T>(1/H[j*(m+1)+j+1], &V[(j+1)*N], N);
61 for(int k=0; k<j; k++) {
62 blas1::Grot<T>(c[k], s[k], &H[j*(m+1)+k], &H[j*(m+1)+k+1]);
63 }
64 H[j*(m+1)+j] = blas1::Ggen<T>(H[j*(m+1)+j], H[j*(m+1)+j+1], &c[j], &s[j]);
65 H[j*(m+1)+j+1] = 0;
66 e[j+1] = s[j] * e[j];
67 e[j] = c[j] * e[j];
68#if PRINT_RES
69 printf("# e[%d] = %e\n", j+1, std::abs(e[j+1]/nrm_b));
70#endif
71 if(std::abs(e[j+1]) <= nrm_b*epsilon) {
72 printf("%s iter %d\n", ITER_SYMBOL, i*m+j+1);
73 printf("%s res %e\n", RES_SYMBOL, std::abs(e[j+1])/nrm_b);
74 j++;
75 flag = 1;
76 break;
77 }
78 }
79 blas2::Trsv<T>(H, e, y, m+1, j);
80 for(int k=0; k<j; k++) {
81 blas1::Axpy<T>(y[k], &V[k*N], x, N);
82 }
83 if(flag == 1) break;
84 }
85 if(!flag) {
86 printf("# iter %d\n", outer*m);
87 printf("# res : Check by using senk::test\n");
88 }
89 delete[] c;
90 delete[] s;
91 delete[] e;
92 delete[] H;
93 delete[] V;
94 delete[] y;
95}
96
113template <typename T, int bnl, int bnw>
115 T *bval, int *bcind, int *brptr,
116 T *b, T *x, T nrm_b,
117 int outer, int m, int N, T epsilon)
118{
119 T *c = new T[m];
120 T *s = new T[m];
121 T *e = new T[m+1];
122 T *H = new T[(m+1)*m];
123 T *V = new T[N*(m+1)];
124 T *y = new T[m];
125
126 int flag = 0;
127 for(int i=0; i<outer; i++) {
128 sparse::SpmvBcsr<T, bnl, bnw>(bval, bcind, brptr, x, &V[0], N);
129 senk::blas1::Axpby<T>(1, b, -1, &V[0], N);
130 e[0] = blas1::Nrm2<T>(&V[0], N);
131 senk::blas1::Scal<T>(1/e[0], &V[0], N);
132 int j;
133 for(j=0; j<m; j++) {
134 sparse::SpmvBcsr<T, bnl, bnw>(bval, bcind, brptr, &V[j*N], &V[(j+1)*N], N);
135 for(int k=0; k<=j; k++) {
136 H[j*(m+1)+k] = blas1::Dot<T>(&V[k*N], &V[(j+1)*N], N);
137 blas1::Axpy<T>(-H[j*(m+1)+k], &V[k*N], &V[(j+1)*N], N);
138 }
139 H[j*(m+1)+j+1] = blas1::Nrm2<T>(&V[(j+1)*N], N);
140 blas1::Scal<T>(1/H[j*(m+1)+j+1], &V[(j+1)*N], N);
141 for(int k=0; k<j; k++) {
142 blas1::Grot<T>(c[k], s[k], &H[j*(m+1)+k], &H[j*(m+1)+k+1]);
143 }
144 H[j*(m+1)+j] = blas1::Ggen<T>(H[j*(m+1)+j], H[j*(m+1)+j+1], &c[j], &s[j]);
145 H[j*(m+1)+j+1] = 0;
146 e[j+1] = s[j] * e[j];
147 e[j] = c[j] * e[j];
148#if PRINT_RES
149 printf("# e[%d] = %e\n", j+1, std::abs(e[j+1]/nrm_b));
150#endif
151 if(std::abs(e[j+1]) <= nrm_b*epsilon) {
152 printf("%s iter %d\n", ITER_SYMBOL, i*m+j+1);
153 printf("%s res %e\n", RES_SYMBOL, std::abs(e[j+1])/nrm_b);
154 j++;
155 flag = 1;
156 break;
157 }
158 }
159 blas2::Trsv<T>(H, e, y, m+1, j);
160 for(int k=0; k<j; k++) {
161 blas1::Axpy<T>(y[k], &V[k*N], x, N);
162 }
163 if(flag == 1) break;
164 }
165 if(!flag) {
166 printf("# iter %d\n", outer*m);
167 printf("# res : Check by using senk::test\n");
168 }
169 delete[] c;
170 delete[] s;
171 delete[] e;
172 delete[] H;
173 delete[] V;
174 delete[] y;
175}
176
197template <typename T>
199 T *val, int *cind, int *rptr,
200 T *lval, int *lcind, int *lrptr,
201 T *uval, int *ucind, int *urptr,
202 T *b, T *x, T nrm_b,
203 int outer, int m, int N, T epsilon)
204{
205 T *c = new T[m];
206 T *s = new T[m];
207 T *e = new T[m+1];
208 T *H = new T[(m+1)*m];
209 T *V = new T[N*(m+1)];
210 T *y = new T[m];
211 T *t = new T[N];
212
213 int flag = 0;
214 for(int i=0; i<outer; i++) {
215 sparse::SpmvCsr<T>(val, cind, rptr, x, &V[0], N);
216 blas1::Axpby<T>(1, b, -1, &V[0], N);
217 e[0] = blas1::Nrm2<T>(&V[0], N);
218 blas1::Scal<T>(1/e[0], &V[0], N);
219 int j;
220 for(j=0; j<m; j++) {
221 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, &V[j*N], t, N);
222 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, t, t, N);
223 sparse::SpmvCsr<T>(val, cind, rptr, t, &V[(j+1)*N], N);
224 for(int k=0; k<=j; k++) {
225 H[j*(m+1)+k] = blas1::Dot<T>(&V[k*N], &V[(j+1)*N], N);
226 blas1::Axpy<T>(-H[j*(m+1)+k], &V[k*N], &V[(j+1)*N], N);
227 }
228 H[j*(m+1)+j+1] = blas1::Nrm2<T>(&V[(j+1)*N], N);
229 blas1::Scal<T>(1/H[j*(m+1)+j+1], &V[(j+1)*N], N);
230 for(int k=0; k<j; k++) {
231 blas1::Grot<T>(c[k], s[k], &H[j*(m+1)+k], &H[j*(m+1)+k+1]);
232 }
233 H[j*(m+1)+j] = blas1::Ggen<T>(H[j*(m+1)+j], H[j*(m+1)+j+1], &c[j], &s[j]);
234 H[j*(m+1)+j+1] = 0;
235 e[j+1] = s[j] * e[j];
236 e[j] = c[j] * e[j];
237#if PRINT_RES
238 printf("# e[%d] = %e\n", j+1, std::abs(e[j+1]/nrm_b));
239#endif
240 if(std::abs(e[j+1]) <= nrm_b*epsilon) {
241 printf("%s iter %d\n", ITER_SYMBOL, i*m+j+1);
242 printf("%s res %e\n", RES_SYMBOL, std::abs(e[j+1])/nrm_b);
243 j++;
244 flag = 1;
245 break;
246 }
247 }
248 blas2::Trsv<T>(H, e, y, m+1, j);
249 blas1::Scal<T>(y[0], &V[0], N);
250 for(int k=1; k<j; k++) {
251 blas1::Axpy<T>(y[k], &V[k*N], &V[0], N);
252 }
253 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, &V[0], t, N);
254 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, t, t, N);
255 blas1::Axpy<T>(1, t, x, N);
256
257 if(flag == 1) break;
258 }
259 if(!flag) {
260 printf("# iter %d\n", outer*m);
261 printf("# res : Check by using senk::test\n");
262 }
263 delete[] c;
264 delete[] s;
265 delete[] e;
266 delete[] H;
267 delete[] V;
268 delete[] y;
269 delete[] t;
270}
293template <typename T>
295 T *val, int *cind, int *rptr,
296 T *lval, int *lcind, int *lrptr,
297 T *uval, int *ucind, int *urptr,
298 int *cptr, int cnum,
299 T *b, T *x, T nrm_b,
300 int outer, int m, int N, T epsilon)
301{
302 T *c = new T[m];
303 T *s = new T[m];
304 T *e = new T[m+1];
305 T *H = new T[(m+1)*m];
306 T *V = new T[N*(m+1)];
307 T *y = new T[m];
308 T *t = new T[N];
309
310 int flag = 0;
311 for(int i=0; i<outer; i++) {
312 sparse::SpmvCsr<T>(val, cind, rptr, x, &V[0], N);
313 blas1::Axpby<T>(1, b, -1, &V[0], N);
314 e[0] = blas1::Nrm2<T>(&V[0], N);
315 blas1::Scal<T>(1/e[0], &V[0], N);
316 int j;
317 for(j=0; j<m; j++) {
318 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, &V[j*N], t, N, cptr, cnum);
319 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, t, t, N, cptr, cnum);
320 sparse::SpmvCsr<T>(val, cind, rptr, t, &V[(j+1)*N], N);
321 for(int k=0; k<=j; k++) {
322 H[j*(m+1)+k] = blas1::Dot<T>(&V[k*N], &V[(j+1)*N], N);
323 blas1::Axpy<T>(-H[j*(m+1)+k], &V[k*N], &V[(j+1)*N], N);
324 }
325 H[j*(m+1)+j+1] = blas1::Nrm2<T>(&V[(j+1)*N], N);
326 blas1::Scal<T>(1/H[j*(m+1)+j+1], &V[(j+1)*N], N);
327 for(int k=0; k<j; k++) {
328 blas1::Grot<T>(c[k], s[k], &H[j*(m+1)+k], &H[j*(m+1)+k+1]);
329 }
330 H[j*(m+1)+j] = blas1::Ggen<T>(H[j*(m+1)+j], H[j*(m+1)+j+1], &c[j], &s[j]);
331 H[j*(m+1)+j+1] = 0;
332 e[j+1] = s[j] * e[j];
333 e[j] = c[j] * e[j];
334#if PRINT_RES
335 printf("# e[%d] = %e\n", j+1, std::abs(e[j+1]/nrm_b));
336#endif
337 if(std::abs(e[j+1]) <= nrm_b*epsilon) {
338 printf("%s iter %d\n", ITER_SYMBOL, i*m+j+1);
339 printf("%s res %e\n", RES_SYMBOL, std::abs(e[j+1])/nrm_b);
340 j++;
341 flag = 1;
342 break;
343 }
344 }
345 blas2::Trsv<T>(H, e, y, m+1, j);
346 blas1::Scal<T>(y[0], &V[0], N);
347 for(int k=1; k<j; k++) {
348 blas1::Axpy<T>(y[k], &V[k*N], &V[0], N);
349 }
350 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, &V[0], t, N, cptr, cnum);
351 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, t, t, N, cptr, cnum);
352 blas1::Axpy<T>(1, t, x, N);
353
354 if(flag == 1) break;
355 }
356 if(!flag) {
357 printf("# iter %d\n", outer*m);
358 printf("# res : Check by using senk::test\n");
359 }
360 delete[] c;
361 delete[] s;
362 delete[] e;
363 delete[] H;
364 delete[] V;
365 delete[] y;
366 delete[] t;
367}
391template <typename T>
393 T *val, int *cind, int *rptr,
394 T *lval, int *lcind, int *lrptr,
395 T *uval, int *ucind, int *urptr,
396 int *cptr, int cnum, int bsize,
397 T *b, T *x, T nrm_b,
398 int outer, int m, int N, T epsilon)
399{
400 T *c = new T[m];
401 T *s = new T[m];
402 T *e = new T[m+1];
403 T *H = new T[(m+1)*m];
404 T *V = new T[N*(m+1)];
405 T *y = new T[m];
406 T *t = new T[N];
407
408 int flag = 0;
409 for(int i=0; i<outer; i++) {
410 sparse::SpmvCsr<T>(val, cind, rptr, x, &V[0], N);
411 blas1::Axpby<T>(1, b, -1, &V[0], N);
412 e[0] = blas1::Nrm2<T>(&V[0], N);
413 blas1::Scal<T>(1/e[0], &V[0], N);
414 int j;
415 for(j=0; j<m; j++) {
416 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, &V[j*N], t, N, cptr, cnum, bsize);
417 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, t, t, N, cptr, cnum, bsize);
418 sparse::SpmvCsr<T>(val, cind, rptr, t, &V[(j+1)*N], N);
419 for(int k=0; k<=j; k++) {
420 H[j*(m+1)+k] = blas1::Dot<T>(&V[k*N], &V[(j+1)*N], N);
421 blas1::Axpy<T>(-H[j*(m+1)+k], &V[k*N], &V[(j+1)*N], N);
422 }
423 H[j*(m+1)+j+1] = blas1::Nrm2<T>(&V[(j+1)*N], N);
424 blas1::Scal<T>(1/H[j*(m+1)+j+1], &V[(j+1)*N], N);
425 for(int k=0; k<j; k++) {
426 blas1::Grot<T>(c[k], s[k], &H[j*(m+1)+k], &H[j*(m+1)+k+1]);
427 }
428 H[j*(m+1)+j] = blas1::Ggen<T>(H[j*(m+1)+j], H[j*(m+1)+j+1], &c[j], &s[j]);
429 H[j*(m+1)+j+1] = 0;
430 e[j+1] = s[j] * e[j];
431 e[j] = c[j] * e[j];
432#if PRINT_RES
433 printf("# e[%d] = %e\n", j+1, std::abs(e[j+1]/nrm_b));
434#endif
435 if(std::abs(e[j+1]) <= nrm_b*epsilon) {
436 printf("%s iter %d\n", ITER_SYMBOL, i*m+j+1);
437 printf("%s res %e\n", RES_SYMBOL, std::abs(e[j+1])/nrm_b);
438 j++;
439 flag = 1;
440 break;
441 }
442 }
443 blas2::Trsv<T>(H, e, y, m+1, j);
444 blas1::Scal<T>(y[0], &V[0], N);
445 for(int k=1; k<j; k++) {
446 blas1::Axpy<T>(y[k], &V[k*N], &V[0], N);
447 }
448 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, &V[0], t, N, cptr, cnum, bsize);
449 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, t, t, N, cptr, cnum, bsize);
450 blas1::Axpy<T>(1, t, x, N);
451
452 if(flag == 1) break;
453 }
454 if(!flag) {
455 printf("# iter %d\n", outer*m);
456 printf("# res : Check by using senk::test\n");
457 }
458 delete[] c;
459 delete[] s;
460 delete[] e;
461 delete[] H;
462 delete[] V;
463 delete[] y;
464 delete[] t;
465}
487template <typename T>
489 T *val, int *cind, int *rptr,
490 T *lval, int *lcind, int *lrptr,
491 T *uval, int *ucind, int *urptr,
492 int bnum,
493 T *b, T *x, T nrm_b,
494 int outer, int m, int N, T epsilon)
495{
496 T *c = new T[m];
497 T *s = new T[m];
498 T *e = new T[m+1];
499 T *H = new T[(m+1)*m];
500 T *V = new T[N*(m+1)];
501 T *y = new T[m];
502 T *t = new T[N];
503
504 int flag = 0;
505 for(int i=0; i<outer; i++) {
506 sparse::SpmvCsr<T>(val, cind, rptr, x, &V[0], N);
507 blas1::Axpby<T>(1, b, -1, &V[0], N);
508 e[0] = blas1::Nrm2<T>(&V[0], N);
509 blas1::Scal<T>(1/e[0], &V[0], N);
510 int j;
511 for(j=0; j<m; j++) {
512 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, &V[j*N], t, N, bnum);
513 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, t, t, N, bnum);
514 sparse::SpmvCsr<T>(val, cind, rptr, t, &V[(j+1)*N], N);
515 for(int k=0; k<=j; k++) {
516 H[j*(m+1)+k] = blas1::Dot<T>(&V[k*N], &V[(j+1)*N], N);
517 blas1::Axpy<T>(-H[j*(m+1)+k], &V[k*N], &V[(j+1)*N], N);
518 }
519 H[j*(m+1)+j+1] = blas1::Nrm2<T>(&V[(j+1)*N], N);
520 blas1::Scal<T>(1/H[j*(m+1)+j+1], &V[(j+1)*N], N);
521 for(int k=0; k<j; k++) {
522 blas1::Grot<T>(c[k], s[k], &H[j*(m+1)+k], &H[j*(m+1)+k+1]);
523 }
524 H[j*(m+1)+j] = blas1::Ggen<T>(H[j*(m+1)+j], H[j*(m+1)+j+1], &c[j], &s[j]);
525 H[j*(m+1)+j+1] = 0;
526 e[j+1] = s[j] * e[j];
527 e[j] = c[j] * e[j];
528#if PRINT_RES
529 printf("# e[%d] = %e\n", j+1, std::abs(e[j+1]/nrm_b));
530#endif
531 if(std::abs(e[j+1]) <= nrm_b*epsilon) {
532 printf("%s iter %d\n", ITER_SYMBOL, i*m+j+1);
533 printf("%s res %e\n", RES_SYMBOL, std::abs(e[j+1])/nrm_b);
534 j++;
535 flag = 1;
536 break;
537 }
538 }
539 blas2::Trsv<T>(H, e, y, m+1, j);
540 blas1::Scal<T>(y[0], &V[0], N);
541 for(int k=1; k<j; k++) {
542 blas1::Axpy<T>(y[k], &V[k*N], &V[0], N);
543 }
544 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, &V[0], t, N, bnum);
545 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, t, t, N, bnum);
546 blas1::Axpy<T>(1, t, x, N);
547
548 if(flag == 1) break;
549 }
550 if(!flag) {
551 printf("# iter %d\n", outer*m);
552 printf("# res : Check by using senk::test\n");
553 }
554 delete[] c;
555 delete[] s;
556 delete[] e;
557 delete[] H;
558 delete[] V;
559 delete[] y;
560 delete[] t;
561}
582template <typename T, int bnl, int bnw>
584 T *val, int *cind, int *rptr,
585 T *blval, int *blcind, int *blrptr,
586 T *buval, int *bucind, int *burptr,
587 T *b, T *x, T nrm_b,
588 int outer, int m, int N, T epsilon)
589{
590 T *c = new T[m];
591 T *s = new T[m];
592 T *e = new T[m+1];
593 T *H = new T[(m+1)*m];
594 T *V = new T[N*(m+1)];
595 T *y = new T[m];
596 T *t = new T[N];
597
598 int flag = 0;
599 for(int i=0; i<outer; i++) {
600 sparse::SpmvCsr<T>(val, cind, rptr, x, &V[0], N);
601 blas1::Axpby<T>(1, b, -1, &V[0], N);
602 e[0] = blas1::Nrm2<T>(&V[0], N);
603 blas1::Scal(1/e[0], &V[0], N);
604 int j;
605 for(j=0; j<m; j++) {
606 sparse::SptrsvBcsr_l<T, bnl, bnw>(blval, blcind, blrptr, &V[j*N], t, N);
607 sparse::SptrsvBcsr_u<T, bnl, bnw>(buval, bucind, burptr, t, t, N);
608 sparse::SpmvCsr<T>(val, cind, rptr, t, &V[(j+1)*N], N);
609 for(int k=0; k<=j; k++) {
610 H[j*(m+1)+k] = blas1::Dot<T>(&V[k*N], &V[(j+1)*N], N);
611 blas1::Axpy<T>(-H[j*(m+1)+k], &V[k*N], &V[(j+1)*N], N);
612 }
613 H[j*(m+1)+j+1] = blas1::Nrm2<T>(&V[(j+1)*N], N);
614 blas1::Scal<T>(1/H[j*(m+1)+j+1], &V[(j+1)*N], N);
615 for(int k=0; k<j; k++) {
616 blas1::Grot<T>(c[k], s[k], &H[j*(m+1)+k], &H[j*(m+1)+k+1]);
617 }
618 H[j*(m+1)+j] = blas1::Ggen<T>(H[j*(m+1)+j], H[j*(m+1)+j+1], &c[j], &s[j]);
619 H[j*(m+1)+j+1] = 0;
620 e[j+1] = s[j] * e[j];
621 e[j] = c[j] * e[j];
622#if PRINT_RES
623 printf("# e[%d] = %e\n", j+1, std::abs(e[j+1]/nrm_b));
624#endif
625 if(std::abs(e[j+1]) <= nrm_b*epsilon) {
626 printf("%s iter %d\n", ITER_SYMBOL, i*m+j+1);
627 printf("%s res %e\n", RES_SYMBOL, std::abs(e[j+1])/nrm_b);
628 j++;
629 flag = 1;
630 break;
631 }
632 }
633 blas2::Trsv<T>(H, e, y, m+1, j);
634 blas1::Scal<T>(y[0], &V[0], N);
635 for(int k=1; k<j; k++) {
636 blas1::Axpy<T>(y[k], &V[k*N], &V[0], N);
637 }
638 sparse::SptrsvBcsr_l<T, bnl, bnw>(blval, blcind, blrptr, &V[0], t, N);
639 sparse::SptrsvBcsr_u<T, bnl, bnw>(buval, bucind, burptr, t, t, N);
640 blas1::Axpy<T>(1, t, x, N);
641
642 if(flag == 1) break;
643 }
644 if(!flag) {
645 printf("# iter %d\n", outer*m);
646 printf("# res : Check by using senk::test\n");
647 }
648 delete[] c;
649 delete[] s;
650 delete[] e;
651 delete[] H;
652 delete[] V;
653 delete[] y;
654 delete[] t;
655}
679template <typename T, int bnl, int bnw>
681 T *val, int *cind, int *rptr,
682 T *blval, int *blcind, int *blrptr,
683 T *buval, int *bucind, int *burptr,
684 int *cptr, int cnum, int bsize,
685 T *b, T *x, T nrm_b,
686 int outer, int m, int N, T epsilon)
687{
688 T *c = new T[m];
689 T *s = new T[m];
690 T *e = new T[m+1];
691 T *H = new T[(m+1)*m];
692 T *V = new T[N*(m+1)];
693 T *y = new T[m];
694 T *t = new T[N];
695
696 int flag = 0;
697 for(int i=0; i<outer; i++) {
698 sparse::SpmvCsr<T>(val, cind, rptr, x, &V[0], N);
699 blas1::Axpby<T>(1, b, -1, &V[0], N);
700 e[0] = blas1::Nrm2<T>(&V[0], N);
701 blas1::Scal(1/e[0], &V[0], N);
702 int j;
703 for(j=0; j<m; j++) {
704 sparse::SptrsvBcsr_l<T, bnl, bnw>(blval, blcind, blrptr, &V[j*N], t, N, cptr, cnum, bsize);
705 sparse::SptrsvBcsr_u<T, bnl, bnw>(buval, bucind, burptr, t, t, N, cptr, cnum, bsize);
706 sparse::SpmvCsr<T>(val, cind, rptr, t, &V[(j+1)*N], N);
707 for(int k=0; k<=j; k++) {
708 H[j*(m+1)+k] = blas1::Dot<T>(&V[k*N], &V[(j+1)*N], N);
709 blas1::Axpy<T>(-H[j*(m+1)+k], &V[k*N], &V[(j+1)*N], N);
710 }
711 H[j*(m+1)+j+1] = blas1::Nrm2<T>(&V[(j+1)*N], N);
712 blas1::Scal<T>(1/H[j*(m+1)+j+1], &V[(j+1)*N], N);
713 for(int k=0; k<j; k++) {
714 blas1::Grot<T>(c[k], s[k], &H[j*(m+1)+k], &H[j*(m+1)+k+1]);
715 }
716 H[j*(m+1)+j] = blas1::Ggen<T>(H[j*(m+1)+j], H[j*(m+1)+j+1], &c[j], &s[j]);
717 H[j*(m+1)+j+1] = 0;
718 e[j+1] = s[j] * e[j];
719 e[j] = c[j] * e[j];
720#if PRINT_RES
721 printf("# e[%d] = %e\n", j+1, std::abs(e[j+1]/nrm_b));
722#endif
723 if(std::abs(e[j+1]) <= nrm_b*epsilon) {
724 printf("%s iter %d\n", ITER_SYMBOL, i*m+j+1);
725 printf("%s res %e\n", RES_SYMBOL, std::abs(e[j+1])/nrm_b);
726 j++;
727 flag = 1;
728 break;
729 }
730 }
731 blas2::Trsv<T>(H, e, y, m+1, j);
732 blas1::Scal<T>(y[0], &V[0], N);
733 for(int k=1; k<j; k++) {
734 blas1::Axpy<T>(y[k], &V[k*N], &V[0], N);
735 }
736 sparse::SptrsvBcsr_l<T, bnl, bnw>(blval, blcind, blrptr, &V[0], t, N, cptr, cnum, bsize);
737 sparse::SptrsvBcsr_u<T, bnl, bnw>(buval, bucind, burptr, t, t, N, cptr, cnum, bsize);
738 blas1::Axpy<T>(1, t, x, N);
739
740 if(flag == 1) break;
741 }
742 if(!flag) {
743 printf("# iter %d\n", outer*m);
744 printf("# res : Check by using senk::test\n");
745 }
746 delete[] c;
747 delete[] s;
748 delete[] e;
749 delete[] H;
750 delete[] V;
751 delete[] y;
752 delete[] t;
753}
754
755} // namespace solver
756
757} // namespace senk
758
759#endif
void Scal(T a, T *x, int N)
Multiply x by a.
Definition: senk_blas1.hpp:39
void AbmcIluGmresm(T *val, int *cind, int *rptr, T *lval, int *lcind, int *lrptr, T *uval, int *ucind, int *urptr, int *cptr, int cnum, int bsize, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The ILU preconditioned GMRES(m) solver parallelized by ABMC ordering.
Definition: senk_gmres.hpp:392
void Gmresm(T *val, int *cind, int *rptr, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The Non-preconditioned GMRES(m) solver.
Definition: senk_gmres.hpp:34
void IluGmresm(T *val, int *cind, int *rptr, T *lval, int *lcind, int *lrptr, T *uval, int *ucind, int *urptr, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The ILU preconditioned GMRES(m) solver.
Definition: senk_gmres.hpp:198
void IlubGmresm(T *val, int *cind, int *rptr, T *blval, int *blcind, int *blrptr, T *buval, int *bucind, int *burptr, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The ILUB preconditioned GMRES(m) solver.
Definition: senk_gmres.hpp:583
void BjIluGmresm(T *val, int *cind, int *rptr, T *lval, int *lcind, int *lrptr, T *uval, int *ucind, int *urptr, int bnum, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The ILU preconditioned GMRES(m) solver parallelized by the block Jacobi method.
Definition: senk_gmres.hpp:488
void AbmcIlubGmresm(T *val, int *cind, int *rptr, T *blval, int *blcind, int *blrptr, T *buval, int *bucind, int *burptr, int *cptr, int cnum, int bsize, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The ILUB preconditioned GMRES(m) solver parallelized by ABMC ordering.
Definition: senk_gmres.hpp:680
void AmcIluGmresm(T *val, int *cind, int *rptr, T *lval, int *lcind, int *lrptr, T *uval, int *ucind, int *urptr, int *cptr, int cnum, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The ILU preconditioned GMRES(m) solver parallelized by AMC ordering.
Definition: senk_gmres.hpp:294
The top-level namespace of SenK.
Level1 BLAS-style functions are written.
Level2 BLAS-style functions are written.
Functions related to sparse matrices are written.