SenK
SenK is a C++ library for high-performance linear solvers.
senk_sparse.hpp
Go to the documentation of this file.
1
7#ifndef SENK_SPARSE_HPP
8#define SENK_SPARSE_HPP
9
10namespace senk {
14namespace sparse {
25template <typename T> inline
26void SpmvCsr(T *val, int *cind, int *rptr, T *x, T *y, int N) {
27 #pragma omp parallel for
28 for(int i=0; i<N; i++) {
29 T temp = 0;
30 for(int j=rptr[i]; j<rptr[i+1]; j++) {
31 temp += val[j] * x[cind[j]];
32 }
33 y[i] = temp;
34 }
35}
47template <typename T> inline
48void SpmvCsr(T *val, int *cind, int *rptr, T *diag, T *x, T *y, int N) {
49 #pragma omp parallel for
50 for(int i=0; i<N; i++) {
51 T temp = x[i] * diag[i];
52 for(int j=rptr[i]; j<rptr[i+1]; j++) {
53 temp += val[j] * x[cind[j]];
54 }
55 y[i] = temp;
56 }
57}
70template <typename T, int bnl, int bnw> inline
71void SpmvBcsr(T *bval, int *bcind, int *brptr, T *x, T *y, int N) {
72 int b_size = bnl * bnw;
73 #pragma omp parallel for
74 for(int i=0; i<N; i+=bnl) {
75 int bidx = i / bnl;
76 #pragma omp simd simdlen(bnl)
77 for(int j=0; j<bnl; j++) {
78 y[i+j] = 0;
79 }
80 for(int j=brptr[bidx]; j<brptr[bidx+1]; j++) {
81 int x_ind = bcind[j]*bnw;
82 for(int l=0; l<bnw; l++) {
83 int off = j*b_size+l*bnl;
84 #pragma omp simd simdlen(bnl)
85 for(int k=0; k<bnl; k++) {
86 y[i+k] += bval[off+k] * x[x_ind+l];
87 }
88 }
89 }
90 }
91}
103template <typename T> inline
104void SpmvSell(T *val, int *cind, int *wid, int len, T *x, T *y, int N)
105{
106 int block = (N+len-1)/len;
107 #pragma omp parallel for
108 for(int i=0; i<block; i++) {
109 int start = wid[i] * len;
110 int temp = (i==len-1 && N%len!=0) ? N % len : len;
111 for(int k=0; k<temp; k++) {
112 y[i*len+k] = val[start+k] * x[cind[start+k]];
113 }
114 for(int j=1; j<wid[i+1]-wid[i]; j++) {
115 int off = start+j*len;
116 for(int k=0; k<temp; k++) {
117 y[i*len+k] += val[off+k] * x[cind[off+k]];
118 }
119 }
120 }
121}
132template <typename T> inline
133void SptrsvCsr_l(T *val, int *cind, int *rptr, T *x, T *y, int N)
134{
135 // L is assumed to be unit lower triangular.
136 for(int i=0; i<N; i++) {
137 T temp = x[i];
138 for(int j=rptr[i]; j<rptr[i+1]; j++) {
139 temp -= val[j] * y[cind[j]];
140 }
141 y[i] = temp;
142 }
143}
154template <typename T> inline
155void SptrsvCsr_u(T *val, int *cind, int *rptr, T *x, T *y, int N)
156{
157 // U is assumed to be general upper triangular.
158 // Diagonal has been inverted.
159 for(int i=N-1; i>=0; i--) {
160 T temp = x[i];
161 int j;
162 for(j=rptr[i+1]-1; j>=rptr[i]+1; j--) {
163 temp -= val[j] * y[cind[j]];
164 }
165 y[i] = temp * val[j];
166 }
167}
180template <typename T> inline
182 T *val, int *cind, int *rptr, T *x, T *y,
183 int N, int *cptr, int cnum)
184{
185 // L is assumed to be unit lower triangular.
186 #pragma omp parallel
187 {
188 for(int k=0; k<cnum; k++) {
189 int start = cptr[k];
190 int end = cptr[k+1];
191 #pragma omp for
192 for(int i=start; i<end; i++) {
193 T temp = x[i];
194 for(int j=rptr[i]; j<rptr[i+1]; j++) {
195 temp -= val[j] * y[cind[j]];
196 }
197 y[i] = temp;
198 }
199 }
200 }
201}
214template <typename T> inline
215void SptrsvCsr_u(T *val, int *cind, int *rptr, T *x, T *y,
216 int N, int *cptr, int cnum)
217{
218 // U is assumed to be general upper triangular.
219 // Diagonal has been inverted.
220 #pragma omp parallel
221 {
222 for(int k=cnum-1; k>=0; k--) {
223 int start = cptr[k];
224 int end = cptr[k+1];
225 #pragma omp for
226 for(int i=end-1; i>=start; i--) {
227 T temp = x[i];
228 int j;
229 for(j=rptr[i+1]-1; j>=rptr[i]+1; j--) {
230 temp -= val[j] * y[cind[j]];
231 }
232 y[i] = temp * val[j];
233 }
234 }
235 }
236}
250template <typename T> inline
252 T *val, int *cind, int *rptr, T *x, T *y,
253 int N, int *cptr, int cnum, int bsize)
254{
255 // L is assumed to be unit lower triangular.
256 #pragma omp parallel
257 {
258 for(int k=0; k<cnum; k++) {
259 int start = cptr[k];
260 int end = cptr[k+1];
261 #pragma omp for
262 for(int i=start; i<end; i++) {
263 int base = i*bsize;
264 for(int l=0; l<bsize; l++) {
265 int idx = base+l;
266 T temp = x[idx];
267 for(int j=rptr[idx]; j<rptr[idx+1]; j++) {
268 temp -= val[j] * y[cind[j]];
269 }
270 y[idx] = temp;
271 }
272 }
273 }
274 }
275}
289template <typename T> inline
290void SptrsvCsr_u(T *val, int *cind, int *rptr, T *x, T *y,
291 int N, int *cptr, int cnum, int bsize)
292{
293 // U is assumed to be general upper triangular.
294 // Diagonal has been inverted.
295 #pragma omp parallel
296 {
297 for(int k=cnum-1; k>=0; k--) {
298 int start = cptr[k];
299 int end = cptr[k+1];
300 #pragma omp for
301 for(int i=end-1; i>=start; i--) {
302 int base = i*bsize;
303 for(int l=bsize-1; l>=0; l--) {
304 int idx = base+l;
305 T temp = x[idx];
306 int j;
307 for(j=rptr[idx+1]-1; j>=rptr[idx]+1; j--) {
308 temp -= val[j] * y[cind[j]];
309 }
310 y[idx] = temp * val[j];
311 }
312 }
313 }
314 }
315}
328template <typename T> inline
329void SptrsvCsr_l(T *val, int *cind, int *rptr, T *x, T *y,
330 int N, int bnum)
331{
332 // L is assumed to be unit lower triangular.
333 int bsize = N / bnum;
334 #pragma omp parallel for num_threads(bnum)
335 for(int k=0; k<bnum; k++) {
336 int start = k*bsize;
337 int end = (k+1)*bsize;
338 for(int i=start; i<end; i++) {
339 T temp = x[i];
340 for(int j=rptr[i]; j<rptr[i+1]; j++) {
341 temp -= val[j] * y[cind[j]];
342 }
343 y[i] = temp;
344 }
345 }
346}
359template <typename T> inline
360void SptrsvCsr_u(T *val, int *cind, int *rptr, T *x, T *y,
361 int N, int bnum)
362{
363 // U is assumed to be general upper triangular.
364 // Diagonal has been inverted.
365 int bsize = N / bnum;
366 #pragma omp parallel for num_threads(bnum)
367 for(int k=0; k<bnum; k++) {
368 int start = k*bsize;
369 int end = (k+1)*bsize;
370 for(int i=end-1; i>=start; i--) {
371 T temp = x[i];
372 int j;
373 for(j=rptr[i+1]-1; j>=rptr[i]+1; j--) {
374 temp -= val[j] * y[cind[j]];
375 }
376 y[i] = temp * val[j];
377 }
378 }
379}
392template <typename T, int bnl, int bnw> inline
394 T *bval, int *bcind, int *brptr,
395 T *x, T *y, int N)
396{
397 // L is assumed to be unit lower triangular.
398 int b_size = bnl * bnw;
399 for(int i=0; i<N; i+=bnl) {
400 int bidx = i / bnl;
401 #pragma omp simd simdlen(bnl)
402 for(int j=0; j<bnl; j++) {
403 y[i+j] = x[i+j];
404 }
405 for(int j=brptr[bidx]; j<brptr[bidx+1]; j++) {
406 int x_ind = bcind[j]*bnw;
407 for(int l=0; l<bnw; l++) {
408 int off = j*b_size+l*bnl;
409 #pragma omp simd simdlen(bnl)
410 for(int k=0; k<bnl; k++) {
411 y[i+k] -= bval[off+k] * y[x_ind+l];
412 }
413 }
414 }
415 }
416}
429template <typename T, int bnl, int bnw> inline
431 T *bval, int *bcind, int *brptr,
432 T *x, T *y, int N)
433{
434 int b_size = bnl * bnw;
435 int b_rem = bnl / bnw;
436 for(int i=N-bnl; i>=0; i-=bnl) {
437 int bidx = i / bnl;
438 #pragma omp simd simdlen(bnl)
439 for(int j=0; j<bnl; j++) {
440 y[i+j] = x[i+j];
441 }
442 for(int j=brptr[bidx+1]-1; j>=brptr[bidx]+b_rem; j--) {
443 int x_ind = bcind[j]*bnw;
444 for(int l=0; l<bnw; l++) {
445 int off = j*b_size+l*bnl;
446 #pragma omp simd simdlen(bnl)
447 for(int k=0; k<bnl; k++) {
448 y[i+k] -= bval[off+k] * y[x_ind+l];
449 }
450 }
451 }
452 int pos = brptr[bidx]+b_rem-1;
453 for(int k=b_rem-1; k>=0; k--) {
454 for(int j=bnw-1; j>=0; j--) {
455 int off = pos*b_size+j*bnl;
456 int idx = k*bnw+j;
457 y[i+idx] *= bval[off+idx];
458 for(int l=k*bnw+j-1; l>=0; l--) {
459 y[i+l] -= bval[off+l] * y[i+idx];
460 }
461 }
462 pos--;
463 }
464 }
465}
481template <typename T, int bnl, int bnw> inline
483 T *bval, int *bcind, int *brptr, T *x, T *y,
484 int N, int *cptr, int cnum, int bsize)
485{
486 // L is assumed to be unit lower triangular.
487 int b_size = bnl * bnw;
488 #pragma omp parallel
489 {
490 for(int k=0; k<cnum; k++) {
491 int start = cptr[k];
492 int end = cptr[k+1];
493 #pragma omp for
494 for(int i=start; i<end; i++) {
495 int base = i*bsize;
496 for(int l=0; l<bsize; l+=bnl) {
497 int idx = base+l;
498 int bidx = idx / bnl;
499 #pragma omp simd simdlen(bnl)
500 for(int j=0; j<bnl; j++) {
501 y[idx+j] = x[idx+j];
502 }
503 for(int j=brptr[bidx]; j<brptr[bidx+1]; j++) {
504 int x_ind = bcind[j]*bnw;
505 for(int m=0; m<bnw; m++) {
506 int off = j*b_size+m*bnl;
507 #pragma omp simd simdlen(bnl)
508 for(int n=0; n<bnl; n++) {
509 y[idx+n] -= bval[off+n] * y[x_ind+m];
510 }
511 }
512 }
513 }
514 }
515 }
516 }
517}
533template <typename T, int bnl, int bnw> inline
535 T *bval, int *bcind, int *brptr, T *x, T *y,
536 int N, int *cptr, int cnum, int bsize)
537{
538 int b_size = bnl * bnw;
539 int b_rem = bnl / bnw;
540 #pragma omp parallel
541 {
542 for(int k=cnum-1; k>=0; k--) {
543 int start = cptr[k];
544 int end = cptr[k+1];
545 #pragma omp for
546 for(int i=end-1; i>=start; i--) {
547 int base = i*bsize;
548 for(int l=bsize-bnl; l>=0; l-=bnl) {
549 int idx = base+l;
550 int bidx = idx / bnl;
551 #pragma omp simd simdlen(bnl)
552 for(int j=0; j<bnl; j++) {
553 y[idx+j] = x[idx+j];
554 }
555 for(int j=brptr[bidx+1]-1; j>=brptr[bidx]+b_rem; j--) {
556 int x_ind = bcind[j]*bnw;
557 for(int n=0; n<bnw; n++) {
558 int off = j*b_size+n*bnl;
559 #pragma omp simd simdlen(bnl)
560 for(int m=0; m<bnl; m++) {
561 y[idx+m] -= bval[off+m] * y[x_ind+n];
562 }
563 }
564 }
565 int pos = brptr[bidx]+b_rem-1;
566 for(int m=b_rem-1; m>=0; m--) {
567 for(int j=bnw-1; j>=0; j--) {
568 int off = pos*b_size+j*bnl;
569 int ind = m*bnw+j;
570 y[idx+ind] *= bval[off+ind];
571 for(int n=m*bnw+j-1; n>=0; n--) {
572 y[idx+n] -= bval[off+n] * y[idx+ind];
573 }
574 }
575 pos--;
576 }
577 }
578 }
579 }
580 }
581}
582// ---- experimental ---- //
583/*
584void SpmmCscCsc(
585 double *l_val, int *l_rind, int *l_cptr,
586 double *r_val, int *r_rind, int *r_cptr,
587 double **val, int **rind, int **cptr,
588 int L, int M, int R); // -> L x R matrix
589
590// ---- integer ---- //
591
592template <int bit>
593void SpmvCsr(
594 int *val, int *cind, int *rptr,
595 int *x, int *y, int N)
596{
597 #pragma omp parallel for
598 for(int i=0; i<N; i++) {
599 long temp = 0;
600 for(int j=rptr[i]; j<rptr[i+1]; j++) {
601 temp += (long)val[j] * (long)x[cind[j]];
602 }
603 y[i] = (int)(temp >> bit);
604 }
605}
606
607template <int bit>
608void SpmvCsr(
609 short *val, int *cind, int *rptr,
610 int *x, int *y, int N)
611{
612 #pragma omp parallel for
613 for(int i=0; i<N; i++) {
614 long temp = 0;
615 for(int j=rptr[i]; j<rptr[i+1]; j++) {
616 temp += (long)val[j] * (long)x[cind[j]];
617 }
618 y[i] = (int)(temp >> bit);
619 }
620}
621
622template <int bit>
623void SptrsvCsr(
624 int *val, int *cind, int *rptr,
625 int *x, int *y, Triangle type, int N)
626{
627 switch (type) {
628 case Upper:
629 for(int i=N-1; i>=0; i--) {
630 long temp = (long)x[i] << bit;
631 int j;
632 for(j=rptr[i+1]-1; j>=rptr[i]+1; j--) {
633 temp -= (long)val[j] * (long)y[cind[j]];
634 }
635 y[i] = (int)((temp >> bit) * (long)val[j] >> bit);
636 }
637 break;
638 case Lower:
639 for(int i=0; i<N; i++) {
640 long temp = (long)x[i] << bit;
641 int j;
642 for(j=rptr[i]; j<rptr[i+1]-1; j++) {
643 temp -= (long)val[j] * (long)y[cind[j]];
644 }
645 y[i] = (int)((temp >> bit) * (long)val[j] >> bit);
646 }
647 break;
648 case UnitLower:
649 for(int i=0; i<N; i++) {
650 long temp = (long)x[i] << bit;
651 int j;
652 for(j=rptr[i]; j<rptr[i+1]; j++) {
653 temp -= (long)val[j] * (long)y[cind[j]];
654 }
655 y[i] = (int)(temp >> bit);
656 }
657 break;
658 default:
659 std::cerr << "SptrsvCsr: type is not valit." << std::endl;
660 std::exit(1);
661 }
662}
663*/
664}
665
666}
667
668#endif
void SpmvCsr(T *val, int *cind, int *rptr, T *x, T *y, int N)
Perform SpMV using the CSR format.
Definition: senk_sparse.hpp:26
void SptrsvBcsr_u(T *bval, int *bcind, int *brptr, T *x, T *y, int N)
Perform the sparse upper triangular solve for a matrix stored in the CSR format.
void SpmvBcsr(T *bval, int *bcind, int *brptr, T *x, T *y, int N)
Perform SpMV using the BCSR format.
Definition: senk_sparse.hpp:71
void SpmvSell(T *val, int *cind, int *wid, int len, T *x, T *y, int N)
Perform SpMV using the sliced-ELLPACK (SELL-c) format.
void SptrsvCsr_l(T *val, int *cind, int *rptr, T *x, T *y, int N)
Perform the sparse lower triangular solve on a matrix stored in the CSR format.
void SptrsvBcsr_l(T *bval, int *bcind, int *brptr, T *x, T *y, int N)
Perform the sparse lower triangular solve for a matrix stored in the BCSR format.
void SptrsvCsr_u(T *val, int *cind, int *rptr, T *x, T *y, int N)
Perform the sparse upper triangular solve on a matrix stored in the CSR format.
The top-level namespace of SenK.