35 T *val,
int *cind,
int *rptr,
37 int outer,
int m,
int N, T epsilon)
42 T *H =
new T[(m+1)*m];
43 T *V =
new T[N*(m+1)];
47 for(
int i=0; i<outer; i++) {
48 sparse::SpmvCsr<T>(val, cind, rptr, x, &V[0], N);
49 senk::blas1::Axpby<T>(1, b, -1, &V[0], N);
50 e[0] = blas1::Nrm2<T>(&V[0], N);
51 senk::blas1::Scal<T>(1/e[0], &V[0], N);
54 sparse::SpmvCsr<T>(val, cind, rptr, &V[j*N], &V[(j+1)*N], N);
55 for(
int k=0; k<=j; k++) {
56 H[j*(m+1)+k] = blas1::Dot<T>(&V[k*N], &V[(j+1)*N], N);
57 blas1::Axpy<T>(-H[j*(m+1)+k], &V[k*N], &V[(j+1)*N], N);
59 H[j*(m+1)+j+1] = blas1::Nrm2<T>(&V[(j+1)*N], N);
60 blas1::Scal<T>(1/H[j*(m+1)+j+1], &V[(j+1)*N], N);
61 for(
int k=0; k<j; k++) {
62 blas1::Grot<T>(c[k], s[k], &H[j*(m+1)+k], &H[j*(m+1)+k+1]);
64 H[j*(m+1)+j] = blas1::Ggen<T>(H[j*(m+1)+j], H[j*(m+1)+j+1], &c[j], &s[j]);
69 printf(
"# e[%d] = %e\n", j+1, std::abs(e[j+1]/nrm_b));
71 if(std::abs(e[j+1]) <= nrm_b*epsilon) {
72 printf(
"%s iter %d\n", ITER_SYMBOL, i*m+j+1);
73 printf(
"%s res %e\n", RES_SYMBOL, std::abs(e[j+1])/nrm_b);
79 blas2::Trsv<T>(H, e, y, m+1, j);
80 for(
int k=0; k<j; k++) {
81 blas1::Axpy<T>(y[k], &V[k*N], x, N);
86 printf(
"# iter %d\n", outer*m);
87 printf(
"# res : Check by using senk::test\n");
113template <
typename T,
int bnl,
int bnw>
115 T *bval,
int *bcind,
int *brptr,
117 int outer,
int m,
int N, T epsilon)
122 T *H =
new T[(m+1)*m];
123 T *V =
new T[N*(m+1)];
127 for(
int i=0; i<outer; i++) {
128 sparse::SpmvBcsr<T, bnl, bnw>(bval, bcind, brptr, x, &V[0], N);
129 senk::blas1::Axpby<T>(1, b, -1, &V[0], N);
130 e[0] = blas1::Nrm2<T>(&V[0], N);
131 senk::blas1::Scal<T>(1/e[0], &V[0], N);
134 sparse::SpmvBcsr<T, bnl, bnw>(bval, bcind, brptr, &V[j*N], &V[(j+1)*N], N);
135 for(
int k=0; k<=j; k++) {
136 H[j*(m+1)+k] = blas1::Dot<T>(&V[k*N], &V[(j+1)*N], N);
137 blas1::Axpy<T>(-H[j*(m+1)+k], &V[k*N], &V[(j+1)*N], N);
139 H[j*(m+1)+j+1] = blas1::Nrm2<T>(&V[(j+1)*N], N);
140 blas1::Scal<T>(1/H[j*(m+1)+j+1], &V[(j+1)*N], N);
141 for(
int k=0; k<j; k++) {
142 blas1::Grot<T>(c[k], s[k], &H[j*(m+1)+k], &H[j*(m+1)+k+1]);
144 H[j*(m+1)+j] = blas1::Ggen<T>(H[j*(m+1)+j], H[j*(m+1)+j+1], &c[j], &s[j]);
146 e[j+1] = s[j] * e[j];
149 printf(
"# e[%d] = %e\n", j+1, std::abs(e[j+1]/nrm_b));
151 if(std::abs(e[j+1]) <= nrm_b*epsilon) {
152 printf(
"%s iter %d\n", ITER_SYMBOL, i*m+j+1);
153 printf(
"%s res %e\n", RES_SYMBOL, std::abs(e[j+1])/nrm_b);
159 blas2::Trsv<T>(H, e, y, m+1, j);
160 for(
int k=0; k<j; k++) {
161 blas1::Axpy<T>(y[k], &V[k*N], x, N);
166 printf(
"# iter %d\n", outer*m);
167 printf(
"# res : Check by using senk::test\n");
199 T *val,
int *cind,
int *rptr,
200 T *lval,
int *lcind,
int *lrptr,
201 T *uval,
int *ucind,
int *urptr,
203 int outer,
int m,
int N, T epsilon)
208 T *H =
new T[(m+1)*m];
209 T *V =
new T[N*(m+1)];
214 for(
int i=0; i<outer; i++) {
215 sparse::SpmvCsr<T>(val, cind, rptr, x, &V[0], N);
216 blas1::Axpby<T>(1, b, -1, &V[0], N);
217 e[0] = blas1::Nrm2<T>(&V[0], N);
218 blas1::Scal<T>(1/e[0], &V[0], N);
221 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, &V[j*N], t, N);
222 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, t, t, N);
223 sparse::SpmvCsr<T>(val, cind, rptr, t, &V[(j+1)*N], N);
224 for(
int k=0; k<=j; k++) {
225 H[j*(m+1)+k] = blas1::Dot<T>(&V[k*N], &V[(j+1)*N], N);
226 blas1::Axpy<T>(-H[j*(m+1)+k], &V[k*N], &V[(j+1)*N], N);
228 H[j*(m+1)+j+1] = blas1::Nrm2<T>(&V[(j+1)*N], N);
229 blas1::Scal<T>(1/H[j*(m+1)+j+1], &V[(j+1)*N], N);
230 for(
int k=0; k<j; k++) {
231 blas1::Grot<T>(c[k], s[k], &H[j*(m+1)+k], &H[j*(m+1)+k+1]);
233 H[j*(m+1)+j] = blas1::Ggen<T>(H[j*(m+1)+j], H[j*(m+1)+j+1], &c[j], &s[j]);
235 e[j+1] = s[j] * e[j];
238 printf(
"# e[%d] = %e\n", j+1, std::abs(e[j+1]/nrm_b));
240 if(std::abs(e[j+1]) <= nrm_b*epsilon) {
241 printf(
"%s iter %d\n", ITER_SYMBOL, i*m+j+1);
242 printf(
"%s res %e\n", RES_SYMBOL, std::abs(e[j+1])/nrm_b);
248 blas2::Trsv<T>(H, e, y, m+1, j);
249 blas1::Scal<T>(y[0], &V[0], N);
250 for(
int k=1; k<j; k++) {
251 blas1::Axpy<T>(y[k], &V[k*N], &V[0], N);
253 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, &V[0], t, N);
254 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, t, t, N);
255 blas1::Axpy<T>(1, t, x, N);
260 printf(
"# iter %d\n", outer*m);
261 printf(
"# res : Check by using senk::test\n");
295 T *val,
int *cind,
int *rptr,
296 T *lval,
int *lcind,
int *lrptr,
297 T *uval,
int *ucind,
int *urptr,
300 int outer,
int m,
int N, T epsilon)
305 T *H =
new T[(m+1)*m];
306 T *V =
new T[N*(m+1)];
311 for(
int i=0; i<outer; i++) {
312 sparse::SpmvCsr<T>(val, cind, rptr, x, &V[0], N);
313 blas1::Axpby<T>(1, b, -1, &V[0], N);
314 e[0] = blas1::Nrm2<T>(&V[0], N);
315 blas1::Scal<T>(1/e[0], &V[0], N);
318 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, &V[j*N], t, N, cptr, cnum);
319 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, t, t, N, cptr, cnum);
320 sparse::SpmvCsr<T>(val, cind, rptr, t, &V[(j+1)*N], N);
321 for(
int k=0; k<=j; k++) {
322 H[j*(m+1)+k] = blas1::Dot<T>(&V[k*N], &V[(j+1)*N], N);
323 blas1::Axpy<T>(-H[j*(m+1)+k], &V[k*N], &V[(j+1)*N], N);
325 H[j*(m+1)+j+1] = blas1::Nrm2<T>(&V[(j+1)*N], N);
326 blas1::Scal<T>(1/H[j*(m+1)+j+1], &V[(j+1)*N], N);
327 for(
int k=0; k<j; k++) {
328 blas1::Grot<T>(c[k], s[k], &H[j*(m+1)+k], &H[j*(m+1)+k+1]);
330 H[j*(m+1)+j] = blas1::Ggen<T>(H[j*(m+1)+j], H[j*(m+1)+j+1], &c[j], &s[j]);
332 e[j+1] = s[j] * e[j];
335 printf(
"# e[%d] = %e\n", j+1, std::abs(e[j+1]/nrm_b));
337 if(std::abs(e[j+1]) <= nrm_b*epsilon) {
338 printf(
"%s iter %d\n", ITER_SYMBOL, i*m+j+1);
339 printf(
"%s res %e\n", RES_SYMBOL, std::abs(e[j+1])/nrm_b);
345 blas2::Trsv<T>(H, e, y, m+1, j);
346 blas1::Scal<T>(y[0], &V[0], N);
347 for(
int k=1; k<j; k++) {
348 blas1::Axpy<T>(y[k], &V[k*N], &V[0], N);
350 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, &V[0], t, N, cptr, cnum);
351 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, t, t, N, cptr, cnum);
352 blas1::Axpy<T>(1, t, x, N);
357 printf(
"# iter %d\n", outer*m);
358 printf(
"# res : Check by using senk::test\n");
393 T *val,
int *cind,
int *rptr,
394 T *lval,
int *lcind,
int *lrptr,
395 T *uval,
int *ucind,
int *urptr,
396 int *cptr,
int cnum,
int bsize,
398 int outer,
int m,
int N, T epsilon)
403 T *H =
new T[(m+1)*m];
404 T *V =
new T[N*(m+1)];
409 for(
int i=0; i<outer; i++) {
410 sparse::SpmvCsr<T>(val, cind, rptr, x, &V[0], N);
411 blas1::Axpby<T>(1, b, -1, &V[0], N);
412 e[0] = blas1::Nrm2<T>(&V[0], N);
413 blas1::Scal<T>(1/e[0], &V[0], N);
416 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, &V[j*N], t, N, cptr, cnum, bsize);
417 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, t, t, N, cptr, cnum, bsize);
418 sparse::SpmvCsr<T>(val, cind, rptr, t, &V[(j+1)*N], N);
419 for(
int k=0; k<=j; k++) {
420 H[j*(m+1)+k] = blas1::Dot<T>(&V[k*N], &V[(j+1)*N], N);
421 blas1::Axpy<T>(-H[j*(m+1)+k], &V[k*N], &V[(j+1)*N], N);
423 H[j*(m+1)+j+1] = blas1::Nrm2<T>(&V[(j+1)*N], N);
424 blas1::Scal<T>(1/H[j*(m+1)+j+1], &V[(j+1)*N], N);
425 for(
int k=0; k<j; k++) {
426 blas1::Grot<T>(c[k], s[k], &H[j*(m+1)+k], &H[j*(m+1)+k+1]);
428 H[j*(m+1)+j] = blas1::Ggen<T>(H[j*(m+1)+j], H[j*(m+1)+j+1], &c[j], &s[j]);
430 e[j+1] = s[j] * e[j];
433 printf(
"# e[%d] = %e\n", j+1, std::abs(e[j+1]/nrm_b));
435 if(std::abs(e[j+1]) <= nrm_b*epsilon) {
436 printf(
"%s iter %d\n", ITER_SYMBOL, i*m+j+1);
437 printf(
"%s res %e\n", RES_SYMBOL, std::abs(e[j+1])/nrm_b);
443 blas2::Trsv<T>(H, e, y, m+1, j);
444 blas1::Scal<T>(y[0], &V[0], N);
445 for(
int k=1; k<j; k++) {
446 blas1::Axpy<T>(y[k], &V[k*N], &V[0], N);
448 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, &V[0], t, N, cptr, cnum, bsize);
449 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, t, t, N, cptr, cnum, bsize);
450 blas1::Axpy<T>(1, t, x, N);
455 printf(
"# iter %d\n", outer*m);
456 printf(
"# res : Check by using senk::test\n");
489 T *val,
int *cind,
int *rptr,
490 T *lval,
int *lcind,
int *lrptr,
491 T *uval,
int *ucind,
int *urptr,
494 int outer,
int m,
int N, T epsilon)
499 T *H =
new T[(m+1)*m];
500 T *V =
new T[N*(m+1)];
505 for(
int i=0; i<outer; i++) {
506 sparse::SpmvCsr<T>(val, cind, rptr, x, &V[0], N);
507 blas1::Axpby<T>(1, b, -1, &V[0], N);
508 e[0] = blas1::Nrm2<T>(&V[0], N);
509 blas1::Scal<T>(1/e[0], &V[0], N);
512 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, &V[j*N], t, N, bnum);
513 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, t, t, N, bnum);
514 sparse::SpmvCsr<T>(val, cind, rptr, t, &V[(j+1)*N], N);
515 for(
int k=0; k<=j; k++) {
516 H[j*(m+1)+k] = blas1::Dot<T>(&V[k*N], &V[(j+1)*N], N);
517 blas1::Axpy<T>(-H[j*(m+1)+k], &V[k*N], &V[(j+1)*N], N);
519 H[j*(m+1)+j+1] = blas1::Nrm2<T>(&V[(j+1)*N], N);
520 blas1::Scal<T>(1/H[j*(m+1)+j+1], &V[(j+1)*N], N);
521 for(
int k=0; k<j; k++) {
522 blas1::Grot<T>(c[k], s[k], &H[j*(m+1)+k], &H[j*(m+1)+k+1]);
524 H[j*(m+1)+j] = blas1::Ggen<T>(H[j*(m+1)+j], H[j*(m+1)+j+1], &c[j], &s[j]);
526 e[j+1] = s[j] * e[j];
529 printf(
"# e[%d] = %e\n", j+1, std::abs(e[j+1]/nrm_b));
531 if(std::abs(e[j+1]) <= nrm_b*epsilon) {
532 printf(
"%s iter %d\n", ITER_SYMBOL, i*m+j+1);
533 printf(
"%s res %e\n", RES_SYMBOL, std::abs(e[j+1])/nrm_b);
539 blas2::Trsv<T>(H, e, y, m+1, j);
540 blas1::Scal<T>(y[0], &V[0], N);
541 for(
int k=1; k<j; k++) {
542 blas1::Axpy<T>(y[k], &V[k*N], &V[0], N);
544 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, &V[0], t, N, bnum);
545 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, t, t, N, bnum);
546 blas1::Axpy<T>(1, t, x, N);
551 printf(
"# iter %d\n", outer*m);
552 printf(
"# res : Check by using senk::test\n");
582template <
typename T,
int bnl,
int bnw>
584 T *val,
int *cind,
int *rptr,
585 T *blval,
int *blcind,
int *blrptr,
586 T *buval,
int *bucind,
int *burptr,
588 int outer,
int m,
int N, T epsilon)
593 T *H =
new T[(m+1)*m];
594 T *V =
new T[N*(m+1)];
599 for(
int i=0; i<outer; i++) {
600 sparse::SpmvCsr<T>(val, cind, rptr, x, &V[0], N);
601 blas1::Axpby<T>(1, b, -1, &V[0], N);
602 e[0] = blas1::Nrm2<T>(&V[0], N);
606 sparse::SptrsvBcsr_l<T, bnl, bnw>(blval, blcind, blrptr, &V[j*N], t, N);
607 sparse::SptrsvBcsr_u<T, bnl, bnw>(buval, bucind, burptr, t, t, N);
608 sparse::SpmvCsr<T>(val, cind, rptr, t, &V[(j+1)*N], N);
609 for(
int k=0; k<=j; k++) {
610 H[j*(m+1)+k] = blas1::Dot<T>(&V[k*N], &V[(j+1)*N], N);
611 blas1::Axpy<T>(-H[j*(m+1)+k], &V[k*N], &V[(j+1)*N], N);
613 H[j*(m+1)+j+1] = blas1::Nrm2<T>(&V[(j+1)*N], N);
614 blas1::Scal<T>(1/H[j*(m+1)+j+1], &V[(j+1)*N], N);
615 for(
int k=0; k<j; k++) {
616 blas1::Grot<T>(c[k], s[k], &H[j*(m+1)+k], &H[j*(m+1)+k+1]);
618 H[j*(m+1)+j] = blas1::Ggen<T>(H[j*(m+1)+j], H[j*(m+1)+j+1], &c[j], &s[j]);
620 e[j+1] = s[j] * e[j];
623 printf(
"# e[%d] = %e\n", j+1, std::abs(e[j+1]/nrm_b));
625 if(std::abs(e[j+1]) <= nrm_b*epsilon) {
626 printf(
"%s iter %d\n", ITER_SYMBOL, i*m+j+1);
627 printf(
"%s res %e\n", RES_SYMBOL, std::abs(e[j+1])/nrm_b);
633 blas2::Trsv<T>(H, e, y, m+1, j);
634 blas1::Scal<T>(y[0], &V[0], N);
635 for(
int k=1; k<j; k++) {
636 blas1::Axpy<T>(y[k], &V[k*N], &V[0], N);
638 sparse::SptrsvBcsr_l<T, bnl, bnw>(blval, blcind, blrptr, &V[0], t, N);
639 sparse::SptrsvBcsr_u<T, bnl, bnw>(buval, bucind, burptr, t, t, N);
640 blas1::Axpy<T>(1, t, x, N);
645 printf(
"# iter %d\n", outer*m);
646 printf(
"# res : Check by using senk::test\n");
679template <
typename T,
int bnl,
int bnw>
681 T *val,
int *cind,
int *rptr,
682 T *blval,
int *blcind,
int *blrptr,
683 T *buval,
int *bucind,
int *burptr,
684 int *cptr,
int cnum,
int bsize,
686 int outer,
int m,
int N, T epsilon)
691 T *H =
new T[(m+1)*m];
692 T *V =
new T[N*(m+1)];
697 for(
int i=0; i<outer; i++) {
698 sparse::SpmvCsr<T>(val, cind, rptr, x, &V[0], N);
699 blas1::Axpby<T>(1, b, -1, &V[0], N);
700 e[0] = blas1::Nrm2<T>(&V[0], N);
704 sparse::SptrsvBcsr_l<T, bnl, bnw>(blval, blcind, blrptr, &V[j*N], t, N, cptr, cnum, bsize);
705 sparse::SptrsvBcsr_u<T, bnl, bnw>(buval, bucind, burptr, t, t, N, cptr, cnum, bsize);
706 sparse::SpmvCsr<T>(val, cind, rptr, t, &V[(j+1)*N], N);
707 for(
int k=0; k<=j; k++) {
708 H[j*(m+1)+k] = blas1::Dot<T>(&V[k*N], &V[(j+1)*N], N);
709 blas1::Axpy<T>(-H[j*(m+1)+k], &V[k*N], &V[(j+1)*N], N);
711 H[j*(m+1)+j+1] = blas1::Nrm2<T>(&V[(j+1)*N], N);
712 blas1::Scal<T>(1/H[j*(m+1)+j+1], &V[(j+1)*N], N);
713 for(
int k=0; k<j; k++) {
714 blas1::Grot<T>(c[k], s[k], &H[j*(m+1)+k], &H[j*(m+1)+k+1]);
716 H[j*(m+1)+j] = blas1::Ggen<T>(H[j*(m+1)+j], H[j*(m+1)+j+1], &c[j], &s[j]);
718 e[j+1] = s[j] * e[j];
721 printf(
"# e[%d] = %e\n", j+1, std::abs(e[j+1]/nrm_b));
723 if(std::abs(e[j+1]) <= nrm_b*epsilon) {
724 printf(
"%s iter %d\n", ITER_SYMBOL, i*m+j+1);
725 printf(
"%s res %e\n", RES_SYMBOL, std::abs(e[j+1])/nrm_b);
731 blas2::Trsv<T>(H, e, y, m+1, j);
732 blas1::Scal<T>(y[0], &V[0], N);
733 for(
int k=1; k<j; k++) {
734 blas1::Axpy<T>(y[k], &V[k*N], &V[0], N);
736 sparse::SptrsvBcsr_l<T, bnl, bnw>(blval, blcind, blrptr, &V[0], t, N, cptr, cnum, bsize);
737 sparse::SptrsvBcsr_u<T, bnl, bnw>(buval, bucind, burptr, t, t, N, cptr, cnum, bsize);
738 blas1::Axpy<T>(1, t, x, N);
743 printf(
"# iter %d\n", outer*m);
744 printf(
"# res : Check by using senk::test\n");
void Scal(T a, T *x, int N)
Multiply x by a.
void AbmcIluGmresm(T *val, int *cind, int *rptr, T *lval, int *lcind, int *lrptr, T *uval, int *ucind, int *urptr, int *cptr, int cnum, int bsize, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The ILU preconditioned GMRES(m) solver parallelized by ABMC ordering.
void Gmresm(T *val, int *cind, int *rptr, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The Non-preconditioned GMRES(m) solver.
void IluGmresm(T *val, int *cind, int *rptr, T *lval, int *lcind, int *lrptr, T *uval, int *ucind, int *urptr, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The ILU preconditioned GMRES(m) solver.
void IlubGmresm(T *val, int *cind, int *rptr, T *blval, int *blcind, int *blrptr, T *buval, int *bucind, int *burptr, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The ILUB preconditioned GMRES(m) solver.
void BjIluGmresm(T *val, int *cind, int *rptr, T *lval, int *lcind, int *lrptr, T *uval, int *ucind, int *urptr, int bnum, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The ILU preconditioned GMRES(m) solver parallelized by the block Jacobi method.
void AbmcIlubGmresm(T *val, int *cind, int *rptr, T *blval, int *blcind, int *blrptr, T *buval, int *bucind, int *burptr, int *cptr, int cnum, int bsize, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The ILUB preconditioned GMRES(m) solver parallelized by ABMC ordering.
void AmcIluGmresm(T *val, int *cind, int *rptr, T *lval, int *lcind, int *lrptr, T *uval, int *ucind, int *urptr, int *cptr, int cnum, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The ILU preconditioned GMRES(m) solver parallelized by AMC ordering.
The top-level namespace of SenK.
Level1 BLAS-style functions are written.
Level2 BLAS-style functions are written.
Functions related to sparse matrices are written.