SenK
SenK is a C++ library for high-performance linear solvers.
senk_gcr.hpp
Go to the documentation of this file.
1
7#ifndef SENK_GCR_HPP
8#define SENK_GCR_HPP
9
10#include "senk_sparse.hpp"
11#include "senk_blas1.hpp"
12
13namespace senk {
14
15namespace solver {
30template <typename T>
31void Gcrm(
32 T *val, int *cind, int *rptr,
33 T *b, T *x, T nrm_b,
34 int outer, int m, int N, T epsilon)
35{
36 T *r = new T[N];
37 T *p = new T[N*(m+1)];
38 T *Ap = new T[N*(m+1)];
39 T *Ar = new T[N];
40 T *dot_Ap = new T[m+1];
41 T alpha, beta, nrm_r;
42
43 for(int i=0; i<outer; i++) {
44 sparse::SpmvCsr<T>(val, cind, rptr, x, r, N);
45 blas1::Axpby<T>(1, b, -1, r, N);
46 blas1::Copy<T>(r, &p[0], N);
47 nrm_r = blas1::Nrm2<T>(r, N);
48 printf("%d %e\n", i*m, nrm_r/nrm_b);
49 if(nrm_r < nrm_b * epsilon) break;
50 sparse::SpmvCsr<T>(val, cind, rptr, &p[0], &Ap[0], N);
51 for(int j=0; j<m; j++) {
52 dot_Ap[j] = blas1::Dot<T>(&Ap[j*N], &Ap[j*N], N);
53 alpha = blas1::Dot<T>(&Ap[j*N], r, N) / dot_Ap[j];
54 blas1::Axpy<T>(alpha, &p[j*N], x, N);
55 blas1::Axpy<T>(-alpha, &Ap[j*N], r, N);
56 sparse::SpmvCsr<T>(val, cind, rptr, r, Ar, N);
57 beta = -blas1::Dot<T>(&Ap[0], Ar, N) / dot_Ap[0];
58 blas1::Axpyz<T>(beta, &p[0], r, &p[(j+1)*N], N);
59 blas1::Axpyz<T>(beta, &Ap[0], Ar, &Ap[(j+1)*N], N);
60 for(int k=1; k<=j; k++) {
61 beta = -blas1::Dot<T>(&Ap[k*N], Ar, N) / dot_Ap[k];
62 blas1::Axpy<T>(beta, &p[k*N], &p[(j+1)*N], N);
63 blas1::Axpy<T>(beta, &Ap[k*N], &Ap[(j+1)*N], N);
64 }
65 }
66 }
67 delete[] r;
68 delete[] p;
69 delete[] Ap;
70 delete[] Ar;
71 delete[] dot_Ap;
72}
93template <typename T>
95 T *val, int *cind, int *rptr,
96 T *lval, int *lcind, int *lrptr,
97 T *uval, int *ucind, int *urptr,
98 T *b, T *x, T nrm_b,
99 int outer, int m, int N, T epsilon)
100{
101 T *r = new T[N];
102 T *Kr = new T[N];
103 T *p = new T[N*(m+1)];
104 T *Ap = new T[N*(m+1)];
105 T *AKr = new T[N];
106 T *dot_Ap = new T[m+1];
107 T alpha, beta, nrm_r;
108
109 for(int i=0; i<outer; i++) {
110 sparse::SpmvCsr<T>(val, cind, rptr, x, r, N);
111 blas1::Axpby<T>(1, b, -1, r, N);
112 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, r, &p[0], N);
113 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, &p[0], &p[0], N);
114 nrm_r = blas1::Nrm2<T>(r, N);
115 printf("%d %e\n", i*m, nrm_r/nrm_b);
116 if(nrm_r < nrm_b * epsilon) break;
117 sparse::SpmvCsr<T>(val, cind, rptr, &p[0], &Ap[0], N);
118 for(int j=0; j<m; j++) {
119 dot_Ap[j] = blas1::Dot<T>(&Ap[j*N], &Ap[j*N], N);
120 alpha = blas1::Dot<T>(&Ap[j*N], r, N) / dot_Ap[j];
121 blas1::Axpy<T>(alpha, &p[j*N], x, N);
122 blas1::Axpy<T>(-alpha, &Ap[j*N], r, N);
123 sparse::SptrsvCsr_l<T>(lval, lcind, lrptr, r, Kr, N);
124 sparse::SptrsvCsr_u<T>(uval, ucind, urptr, Kr, Kr, N);
125 sparse::SpmvCsr<T>(val, cind, rptr, Kr, AKr, N);
126 beta = -blas1::Dot<T>(&Ap[0], AKr, N) / dot_Ap[0];
127 blas1::Axpyz<T>(beta, &p[0], Kr, &p[(j+1)*N], N);
128 blas1::Axpyz<T>(beta, &Ap[0], AKr, &Ap[(j+1)*N], N);
129 for(int k=1; k<=j; k++) {
130 beta = -blas1::Dot<T>(&Ap[k*N], AKr, N) / dot_Ap[k];
131 blas1::Axpy<T>(beta, &p[k*N], &p[(j+1)*N], N);
132 blas1::Axpy<T>(beta, &Ap[k*N], &Ap[(j+1)*N], N);
133 }
134 }
135 }
136 delete[] r;
137 delete[] Kr;
138 delete[] p;
139 delete[] Ap;
140 delete[] AKr;
141 delete[] dot_Ap;
142}
163template <typename T, int bnl, int bnw>
165 T *val, int *cind, int *rptr,
166 T *blval, int *blcind, int *blrptr,
167 T *buval, int *bucind, int *burptr,
168 T *b, T *x, T nrm_b,
169 int outer, int m, int N, T epsilon)
170{
171 T *r = new T[N];
172 T *Kr = new T[N];
173 T *p = new T[N*(m+1)];
174 T *Ap = new T[N*(m+1)];
175 T *AKr = new T[N];
176 T *dot_Ap = new T[m+1];
177 T alpha, beta, nrm_r;
178
179 for(int i=0; i<outer; i++) {
180 sparse::SpmvCsr<T>(val, cind, rptr, x, r, N);
181 blas1::Axpby<T>(1, b, -1, r, N);
182 sparse::SptrsvBcsr_l<T, bnl, bnw>(blval, blcind, blrptr, r, &p[0], N);
183 sparse::SptrsvBcsr_u<T, bnl, bnw>(buval, bucind, burptr, &p[0], &p[0], N);
184 nrm_r = blas1::Nrm2<T>(r, N);
185 printf("%d %e\n", i*m, nrm_r/nrm_b);
186 if(nrm_r < nrm_b * epsilon) break;
187 sparse::SpmvCsr<T>(val, cind, rptr, &p[0], &Ap[0], N);
188 for(int j=0; j<m; j++) {
189 dot_Ap[j] = blas1::Dot<T>(&Ap[j*N], &Ap[j*N], N);
190 alpha = blas1::Dot<T>(&Ap[j*N], r, N) / dot_Ap[j];
191 blas1::Axpy<T>(alpha, &p[j*N], x, N);
192 blas1::Axpy<T>(-alpha, &Ap[j*N], r, N);
193 sparse::SptrsvBcsr_l<T, bnl, bnw>(blval, blcind, blrptr, r, Kr, N);
194 sparse::SptrsvBcsr_u<T, bnl, bnw>(buval, bucind, burptr, Kr, Kr, N);
195 sparse::SpmvCsr<T>(val, cind, rptr, Kr, AKr, N);
196 beta = -blas1::Dot<T>(&Ap[0], AKr, N) / dot_Ap[0];
197 blas1::Axpyz<T>(beta, &p[0], Kr, &p[(j+1)*N], N);
198 blas1::Axpyz<T>(beta, &Ap[0], AKr, &Ap[(j+1)*N], N);
199 for(int k=1; k<=j; k++) {
200 beta = -blas1::Dot<T>(&Ap[k*N], AKr, N) / dot_Ap[k];
201 blas1::Axpy<T>(beta, &p[k*N], &p[(j+1)*N], N);
202 blas1::Axpy<T>(beta, &Ap[k*N], &Ap[(j+1)*N], N);
203 }
204 }
205 }
206 delete[] r;
207 delete[] Kr;
208 delete[] p;
209 delete[] Ap;
210 delete[] AKr;
211 delete[] dot_Ap;
212}
213
214} // namespace solver
215
216} // namespace senk
217
218#endif
void Gcrm(T *val, int *cind, int *rptr, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The Non-preconditioned GCR(m) solver.
Definition: senk_gcr.hpp:31
void IluGcrm(T *val, int *cind, int *rptr, T *lval, int *lcind, int *lrptr, T *uval, int *ucind, int *urptr, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The ILU preconditioned GCR(m) solver.
Definition: senk_gcr.hpp:94
void IlubGcrm(T *val, int *cind, int *rptr, T *blval, int *blcind, int *blrptr, T *buval, int *bucind, int *burptr, T *b, T *x, T nrm_b, int outer, int m, int N, T epsilon)
The ILUB preconditioned GCR(m) solver.
Definition: senk_gcr.hpp:164
The top-level namespace of SenK.
Level1 BLAS-style functions are written.
Functions related to sparse matrices are written.