SenK
SenK is a C++ library for high-performance linear solvers.
All Classes Namespaces Files Functions Variables Enumerations Pages
senk_bicgstab.hpp
Go to the documentation of this file.
1
7#ifndef SENK_BICGSTAB_HPP
8#define SENK_BICGSTAB_HPP
9
10#include "senk_sparse.hpp"
11#include "senk_blas1.hpp"
12
13namespace senk {
14
15namespace solver {
29template <typename T>
31 T *val, int *cind, int *rptr,
32 T *b, T *x, T nrm_b,
33 int max_iter, int N, T epsilon)
34{
35 int i;
36 int flag = 0;
37 T *r = new T[N];
38 T *rstr = new T[N];
39 T *p = new T[N];
40 T *Ap = new T[N];
41 T *s = new T[N];
42 T *As = new T[N];
43 T *temp = new T[N];
44 T alpha, beta, omega;
45 T r_rstr, prev;
46 T nrm_r = nrm_b;
47
48 sparse::SpmvCsr<T>(val, cind, rptr, x, r, N);
49 blas1::Axpby<T>(1, b, -1, r, N);
50 blas1::Copy<T>(r, rstr, N);
51 blas1::Copy<T>(r, p, N);
52 r_rstr = blas1::Dot<T>(r, rstr, N);
53 for(i=0; i<max_iter; i++) {
54 sparse::SpmvCsr<T>(val, cind, rptr, p, Ap, N);
55 alpha = r_rstr / blas1::Dot<T>(Ap, rstr, N);
56 blas1::Axpyz<T>(-alpha, Ap, r, s, N);
57 sparse::SpmvCsr<T>(val, cind, rptr, s, As, N);
58 omega = blas1::Dot<T>(As, s, N) / blas1::Dot<T>(As, As, N);
59 blas1::Axpy<T>(alpha, p, x, N);
60 blas1::Axpy<T>(omega, s, x, N);
61 blas1::Axpyz<T>(-omega, As, s, r, N);
62 nrm_r = blas1::Nrm2<T>(r, N);
63 printf("%d %e\n", i+1, nrm_r/nrm_b);
64 if(nrm_r < epsilon * nrm_b) {
65 printf("# iter %d\n", i+1);
66 printf("# res %e\n", nrm_r/nrm_b);
67 flag = 1;
68 break;
69 }
70 prev = r_rstr;
71 r_rstr = blas1::Dot<T>(r, rstr, N);
72 beta = alpha / omega * r_rstr / prev;
73 blas1::Axpyz<T>(-omega, Ap, p, temp, N);
74 blas1::Axpyz<T>(beta, temp, r, p, N);
75 }
76 if(!flag) {
77 printf("# iter %d (max)\n", i);
78 printf("# res %e\n", nrm_r/nrm_b);
79 }
80 delete[] r;
81 delete[] rstr;
82 delete[] p;
83 delete[] Ap;
84 delete[] s;
85 delete[] As;
86 delete[] temp;
87}
107template <typename T>
109 T *val, int *cind, int *rptr,
110 T *lval, int *lcind, int *lrptr,
111 T *uval, int *ucind, int *urptr,
112 T *b, T *x, T nrm_b,
113 int max_iter, int N, T epsilon)
114{
115 int i;
116 int flag = 0;
117 T *r = new T[N];
118 T *rstr = new T[N];
119 T *p = new T[N];
120 T *Kp = new T[N];
121 T *AKp = new T[N];
122 T *s = new T[N];
123 T *Ks = new T[N];
124 T *AKs = new T[N];
125 T *temp = new T[N];
126
127 T alpha, beta, omega;
128 T r_rstr, prev;
129 T nrm_r = nrm_b;
130
131 sparse::SpmvCsr<T>(val, cind, rptr, x, r, N);
132 blas1::Axpby<T>(1, b, -1, r, N);
133 blas1::Copy<T>(r, rstr, N);
134 blas1::Copy<T>(r, p, N);
135 r_rstr = blas1::Dot<T>(r, rstr, N);
136 for(i=0; i<max_iter; i++) {
137 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, p, Kp, N);
138 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, Kp, Kp, N);
139 sparse::SpmvCsr<T>(val, cind, rptr, Kp, AKp, N);
140 alpha = r_rstr / blas1::Dot<T>(AKp, rstr, N);
141 blas1::Axpyz(-alpha, AKp, r, s, N);
142 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, s, Ks, N);
143 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, Ks, Ks, N);
144 sparse::SpmvCsr<T>(val, cind, rptr, Ks, AKs, N);
145 omega = blas1::Dot<T>(AKs, s, N) / blas1::Dot<T>(AKs, AKs, N);
146 blas1::Axpy<T>(alpha, Kp, x, N);
147 blas1::Axpy<T>(omega, Ks, x, N);
148 blas1::Axpyz<T>(-omega, AKs, s, r, N);
149 nrm_r = blas1::Nrm2<T>(r, N);
150 printf("%d %e\n", i+1, nrm_r/nrm_b);
151 if(nrm_r < epsilon * nrm_b) {
152 printf("# iter %d\n", i+1);
153 printf("# res %e\n", nrm_r/nrm_b);
154 flag = 1;
155 break;
156 }
157 prev = r_rstr;
158 r_rstr = blas1::Dot<T>(r, rstr, N);
159 beta = alpha / omega * r_rstr / prev;
160 blas1::Axpyz<T>(-omega, AKp, p, temp, N);
161 blas1::Axpyz<T>(beta, temp, r, p, N);
162 }
163 if(!flag) {
164 printf("# iter %d (max)\n", i);
165 printf("# res %e\n", nrm_r/nrm_b);
166 }
167 delete[] r;
168 delete[] rstr;
169 delete[] p;
170 delete[] Kp;
171 delete[] AKp;
172 delete[] s;
173 delete[] Ks;
174 delete[] AKs;
175 delete[] temp;
176}
198template <typename T, int bnl, int bnw>
200 T *val, int *cind, int *rptr,
201 T *blval, int *blcind, int *blrptr,
202 T *buval, int *bucind, int *burptr,
203 T *b, T *x, T nrm_b,
204 int max_iter, int N, T epsilon)
205{
206 int i;
207 int flag = 0;
208 T *r = new T[N];
209 T *rstr = new T[N];
210 T *p = new T[N];
211 T *Kp = new T[N];
212 T *AKp = new T[N];
213 T *s = new T[N];
214 T *Ks = new T[N];
215 T *AKs = new T[N];
216 T *temp = new T[N];
217
218 T alpha, beta, omega;
219 T r_rstr, prev;
220 T nrm_r = nrm_b;
221
222 sparse::SpmvCsr<T>(val, cind, rptr, x, r, N);
223 blas1::Axpby<T>(1, b, -1, r, N);
224 blas1::Copy<T>(r, rstr, N);
225 blas1::Copy<T>(r, p, N);
226 r_rstr = blas1::Dot<T>(r, rstr, N);
227 for(i=0; i<max_iter; i++) {
228 sparse::SptrsvBcsr_l<T, bnl, bnw>(blval, blcind, blrptr, p, Kp, N);
229 sparse::SptrsvBcsr_u<T, bnl, bnw>(buval, bucind, burptr, Kp, Kp, N);
230 sparse::SpmvCsr<T>(val, cind, rptr, Kp, AKp, N);
231 alpha = r_rstr / blas1::Dot(AKp, rstr, N);
232 blas1::Axpyz(-alpha, AKp, r, s, N);
233 sparse::SptrsvBcsr_l<T, bnl, bnw>(blval, blcind, blrptr, s, Ks, N);
234 sparse::SptrsvBcsr_u<T, bnl, bnw>(buval, bucind, burptr, Ks, Ks, N);
235 sparse::SpmvCsr<T>(val, cind, rptr, Ks, AKs, N);
236 omega = blas1::Dot<T>(AKs, s, N) / blas1::Dot<T>(AKs, AKs, N);
237 blas1::Axpy<T>(alpha, Kp, x, N);
238 blas1::Axpy<T>(omega, Ks, x, N);
239 blas1::Axpyz<T>(-omega, AKs, s, r, N);
240 nrm_r = blas1::Nrm2<T>(r, N);
241 printf("%d %e\n", i+1, nrm_r/nrm_b);
242 if(nrm_r < epsilon * nrm_b) {
243 printf("# iter %d\n", i+1);
244 printf("# res %e\n", nrm_r/nrm_b);
245 flag = 1;
246 break;
247 }
248 prev = r_rstr;
249 r_rstr = blas1::Dot<T>(r, rstr, N);
250 beta = alpha / omega * r_rstr / prev;
251 blas1::Axpyz<T>(-omega, AKp, p, temp, N);
252 blas1::Axpyz<T>(beta, temp, r, p, N);
253 }
254 if(!flag) {
255 printf("# iter %d (max)\n", i);
256 printf("# res %e\n", nrm_r/nrm_b);
257 }
258 delete[] r;
259 delete[] rstr;
260 delete[] p;
261 delete[] Kp;
262 delete[] AKp;
263 delete[] s;
264 delete[] Ks;
265 delete[] AKs;
266 delete[] temp;
267}
268
269//
270//void Bicgstab_IR(
271// double *val, int *fval, int *cind, int *rptr,
272// double *b, double *x, double nrm_b,
273// int max_iter, int N, double epsilon);
274//void Bicgstab_IR(
275// double *val, short *fval, int *cind, int *rptr,
276// double *b, double *x, double nrm_b,
277// int max_iter, int N, double epsilon);
278//
279//void IluBicgstab(
280// float *val, int *cind, int *rptr,
281// float *lval, int *lcind, int *lrptr,
282// float *uval, int *ucind, int *urptr,
283// float *b, float *x, double nrm_b,
284// int max_iter, int N, double epsilon);
285//void IluBicgstab_IR(
286// double *val, float *fval, int *cind, int *rptr,
287// float *lval, int *lcind, int *lrptr,
288// float *uval, int *ucind, int *urptr,
289// double *b, double *x, double nrm_b,
290// int max_iter, int N, double epsilon);
291//void IluBicgstab_IR(
292// double *val, int *fval, int *cind, int *rptr,
293// int *lval, int *lcind, int *lrptr,
294// int *uval, int *ucind, int *urptr,
295// double *b, double *x, double nrm_b,
296// int max_iter, int N, double epsilon);
297
298} // namespace solver
299
300} // namespace senk
301
302#endif
T Dot(T *x, T *y, int N)
Compute the dot product of x and y.
Definition: senk_blas1.hpp:93
void Axpyz(T a, T *x, T *y, T *z, int N)
Compute z = a * x + y.
Definition: senk_blas1.hpp:80
void IlubBicgstab(T *val, int *cind, int *rptr, T *blval, int *blcind, int *blrptr, T *buval, int *bucind, int *burptr, T *b, T *x, T nrm_b, int max_iter, int N, T epsilon)
ILUB preconditioned BiCGStab solver.
void IluBicgstab(T *val, int *cind, int *rptr, T *lval, int *lcind, int *lrptr, T *uval, int *ucind, int *urptr, T *b, T *x, T nrm_b, int max_iter, int N, T epsilon)
ILU preconditioned BiCGStab solver.
void Bicgstab(T *val, int *cind, int *rptr, T *b, T *x, T nrm_b, int max_iter, int N, T epsilon)
Non-preconditioned BiCGStab solver.
The top-level namespace of SenK.
Level1 BLAS-style functions are written.
Functions related to sparse matrices are written.