32 T *val,
int *cind,
int *rptr,
34 int outer,
int m,
int N, T epsilon)
37 T *p =
new T[N*(m+1)];
38 T *Ap =
new T[N*(m+1)];
40 T *dot_Ap =
new T[m+1];
43 for(
int i=0; i<outer; i++) {
44 sparse::SpmvCsr<T>(val, cind, rptr, x, r, N);
45 blas1::Axpby<T>(1, b, -1, r, N);
46 blas1::Copy<T>(r, &p[0], N);
47 nrm_r = blas1::Nrm2<T>(r, N);
48 printf(
"%d %e\n", i*m, nrm_r/nrm_b);
49 if(nrm_r < nrm_b * epsilon)
break;
50 sparse::SpmvCsr<T>(val, cind, rptr, &p[0], &Ap[0], N);
51 for(
int j=0; j<m; j++) {
52 dot_Ap[j] = blas1::Dot<T>(&Ap[j*N], &Ap[j*N], N);
53 alpha = blas1::Dot<T>(&Ap[j*N], r, N) / dot_Ap[j];
54 blas1::Axpy<T>(alpha, &p[j*N], x, N);
55 blas1::Axpy<T>(-alpha, &Ap[j*N], r, N);
56 sparse::SpmvCsr<T>(val, cind, rptr, r, Ar, N);
57 beta = -blas1::Dot<T>(&Ap[0], Ar, N) / dot_Ap[0];
58 blas1::Axpyz<T>(beta, &p[0], r, &p[(j+1)*N], N);
59 blas1::Axpyz<T>(beta, &Ap[0], Ar, &Ap[(j+1)*N], N);
60 for(
int k=1; k<=j; k++) {
61 beta = -blas1::Dot<T>(&Ap[k*N], Ar, N) / dot_Ap[k];
62 blas1::Axpy<T>(beta, &p[k*N], &p[(j+1)*N], N);
63 blas1::Axpy<T>(beta, &Ap[k*N], &Ap[(j+1)*N], N);
95 T *val,
int *cind,
int *rptr,
96 T *lval,
int *lcind,
int *lrptr,
97 T *uval,
int *ucind,
int *urptr,
99 int outer,
int m,
int N, T epsilon)
103 T *p =
new T[N*(m+1)];
104 T *Ap =
new T[N*(m+1)];
106 T *dot_Ap =
new T[m+1];
107 T alpha, beta, nrm_r;
109 for(
int i=0; i<outer; i++) {
110 sparse::SpmvCsr<T>(val, cind, rptr, x, r, N);
111 blas1::Axpby<T>(1, b, -1, r, N);
112 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, r, &p[0], N);
113 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, &p[0], &p[0], N);
114 nrm_r = blas1::Nrm2<T>(r, N);
115 printf(
"%d %e\n", i*m, nrm_r/nrm_b);
116 if(nrm_r < nrm_b * epsilon)
break;
117 sparse::SpmvCsr<T>(val, cind, rptr, &p[0], &Ap[0], N);
118 for(
int j=0; j<m; j++) {
119 dot_Ap[j] = blas1::Dot<T>(&Ap[j*N], &Ap[j*N], N);
120 alpha = blas1::Dot<T>(&Ap[j*N], r, N) / dot_Ap[j];
121 blas1::Axpy<T>(alpha, &p[j*N], x, N);
122 blas1::Axpy<T>(-alpha, &Ap[j*N], r, N);
123 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, r, Kr, N);
124 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, Kr, Kr, N);
125 sparse::SpmvCsr<T>(val, cind, rptr, Kr, AKr, N);
126 beta = -blas1::Dot<T>(&Ap[0], AKr, N) / dot_Ap[0];
127 blas1::Axpyz<T>(beta, &p[0], Kr, &p[(j+1)*N], N);
128 blas1::Axpyz<T>(beta, &Ap[0], AKr, &Ap[(j+1)*N], N);
129 for(
int k=1; k<=j; k++) {
130 beta = -blas1::Dot<T>(&Ap[k*N], AKr, N) / dot_Ap[k];
131 blas1::Axpy<T>(beta, &p[k*N], &p[(j+1)*N], N);
132 blas1::Axpy<T>(beta, &Ap[k*N], &Ap[(j+1)*N], N);
163template <
typename T,
int bnl,
int bnw>
165 T *val,
int *cind,
int *rptr,
166 T *blval,
int *blcind,
int *blrptr,
167 T *buval,
int *bucind,
int *burptr,
169 int outer,
int m,
int N, T epsilon)
173 T *p =
new T[N*(m+1)];
174 T *Ap =
new T[N*(m+1)];
176 T *dot_Ap =
new T[m+1];
177 T alpha, beta, nrm_r;
179 for(
int i=0; i<outer; i++) {
180 sparse::SpmvCsr<T>(val, cind, rptr, x, r, N);
181 blas1::Axpby<T>(1, b, -1, r, N);
182 sparse::SptrsvBcsr_l<T, bnl, bnw>(blval, blcind, blrptr, r, &p[0], N);
183 sparse::SptrsvBcsr_u<T, bnl, bnw>(buval, bucind, burptr, &p[0], &p[0], N);
184 nrm_r = blas1::Nrm2<T>(r, N);
185 printf(
"%d %e\n", i*m, nrm_r/nrm_b);
186 if(nrm_r < nrm_b * epsilon)
break;
187 sparse::SpmvCsr<T>(val, cind, rptr, &p[0], &Ap[0], N);
188 for(
int j=0; j<m; j++) {
189 dot_Ap[j] = blas1::Dot<T>(&Ap[j*N], &Ap[j*N], N);
190 alpha = blas1::Dot<T>(&Ap[j*N], r, N) / dot_Ap[j];
191 blas1::Axpy<T>(alpha, &p[j*N], x, N);
192 blas1::Axpy<T>(-alpha, &Ap[j*N], r, N);
193 sparse::SptrsvBcsr_l<T, bnl, bnw>(blval, blcind, blrptr, r, Kr, N);
194 sparse::SptrsvBcsr_u<T, bnl, bnw>(buval, bucind, burptr, Kr, Kr, N);
195 sparse::SpmvCsr<T>(val, cind, rptr, Kr, AKr, N);
196 beta = -blas1::Dot<T>(&Ap[0], AKr, N) / dot_Ap[0];
197 blas1::Axpyz<T>(beta, &p[0], Kr, &p[(j+1)*N], N);
198 blas1::Axpyz<T>(beta, &Ap[0], AKr, &Ap[(j+1)*N], N);
199 for(
int k=1; k<=j; k++) {
200 beta = -blas1::Dot<T>(&Ap[k*N], AKr, N) / dot_Ap[k];
201 blas1::Axpy<T>(beta, &p[k*N], &p[(j+1)*N], N);
202 blas1::Axpy<T>(beta, &Ap[k*N], &Ap[(j+1)*N], N);
void Gcrm(T *val, int *cind, int *rptr, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The Non-preconditioned GCR(m) solver.
void IluGcrm(T *val, int *cind, int *rptr, T *lval, int *lcind, int *lrptr, T *uval, int *ucind, int *urptr, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The ILU preconditioned GCR(m) solver.
void IlubGcrm(T *val, int *cind, int *rptr, T *blval, int *blcind, int *blrptr, T *buval, int *bucind, int *burptr, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The ILUB preconditioned GCR(m) solver.
The top-level namespace of SenK.
Level1 BLAS-style functions are written.
Functions related to sparse matrices are written.