33 for(
int i=0; i<N; i++) {
34 for(
int j=rptr[i]+off; j<rptr[i+1]; j++) {
35 if((*val)[j] == 0) { off++;
continue; }
36 (*val)[j-off] = (*val)[j];
37 (*cind)[j-off] = (*cind)[j];
41 *val = utils::SafeRealloc<T>(*val, rptr[N]);
42 *cind = utils::SafeRealloc<int>(*cind, rptr[N]);
46bool CheckStructure(T *val,
int *cind,
int *rptr,
int N)
48 for(
int i=0; i<N; i++) {
50 for(
int j=rptr[i]; j<rptr[i+1]; j++) {
51 if(cind[j] == i) { flag =
true;
break; }
53 if(!flag) {
return false; }
60 T *val,
int *cind,
int *rptr,
61 T **cval,
int **crind,
int **ccptr,
66 *cval = utils::SafeMalloc<T>(nnz);
67 *crind = utils::SafeMalloc<int>(nnz);
68 *ccptr = utils::SafeMalloc<int>(M+1);
69 int *num = utils::SafeCalloc<int>(M);
70 for(
int i=0; i<N; i++) {
71 for(
int j=rptr[i]; j<rptr[i+1]; j++) { num[cind[j]]++; }
74 for(
int i=0; i<M; i++) {
75 (*ccptr)[i+1] = (*ccptr)[i] + num[i];
78 for(
int i=0; i<N; i++) {
79 for(
int j=rptr[i]; j<rptr[i+1]; j++) {
80 int off = (*ccptr)[cind[j]];
81 int pos = num[cind[j]];
82 (*crind)[off+pos] = i;
83 (*cval)[off+pos] = val[j];
92 T *val,
int *cind,
int *rptr,
93 T **s_val,
int **s_cind,
int **s_len,
96 int num_slice = (N+size-1)/size;
97 *s_len = utils::SafeMalloc<int>(num_slice+1);
105 for(
int i=0; i<num_slice; i++) {
107 if(i==num_slice-1 && N%size!=0) temp_slice = N % size;
109 for(
int j=0; j<temp_slice; j++) {
111 if(row_max < rptr[row_id+1] - rptr[row_id]) {
112 row_max = rptr[row_id+1] - rptr[row_id];
115 (*s_len)[i+1] = (*s_len)[i] + row_max;
116 nnz += row_max * temp_slice;
118 *s_val = utils::SafeMalloc<T>(nnz);
119 *s_cind = utils::SafeMalloc<int>(nnz);
120 for(
int i=0; i<num_slice; i++) {
122 if(i==num_slice-1 && N%size!=0) temp_slice = N % size;
123 for(
int j=0; j<temp_slice; j++) {
126 for(
int k=rptr[row_id]; k<rptr[row_id+1]; k++) {
127 (*s_val)[(*s_len)[i]*size+row_count*temp_slice+j] = val[k];
128 (*s_cind)[(*s_len)[i]*size+row_count*temp_slice+j] = cind[k];
131 if(row_count < (*s_len)[i+1] - (*s_len)[i]) {
132 int temp = (*s_len)[i+1] - (*s_len)[i] - row_count;
133 for(
int k=0; k<temp; k++) {
134 (*s_val)[(*s_len)[i]*size+row_count*temp_slice+j] = 0;
135 (*s_cind)[(*s_len)[i]*size+row_count*temp_slice+j] = 0;
146 T *val,
int *cind,
int *rptr,
147 T **bval,
int **bcind,
int **brptr,
148 int N,
int bnl,
int bnw)
150 if(N % bnl || N % bnw) {
151 printf(
"Error: Csr2Bcsr\n");
154 *brptr = utils::SafeMalloc<int>(N/bnl+1);
157 int *ptr = utils::SafeMalloc<int>(bnl);
159 for(
int i=0; i<N; i+=bnl) {
161 for(
int j=0; j<bnl; j++) { ptr[j] = rptr[i+j]; }
165 for(
int j=0; j<bnl; j++) {
166 if(ptr[j] != -1 && cind[ptr[j]] < min)
171 for(
int j=0; j<bnl; j++) {
172 if(ptr[j] == -1)
continue;
173 while(cind[ptr[j]]/bnw == min/bnw) {
175 if(ptr[j] >= rptr[i+j+1]) {ptr[j] = -1;
break;}
180 (*brptr)[i/bnl+1] = cnt;
182 *bcind = utils::SafeMalloc<int>(cnt);
183 *bval = utils::SafeCalloc<T>(cnt*bnl*bnw);
186 for(
int i=0; i<N; i+=bnl) {
188 for(
int j=0; j<bnl; j++) { ptr[j] = rptr[i+j]; }
191 for(
int j=0; j<bnl; j++) {
192 if(ptr[j] != -1 && cind[ptr[j]] < min)
196 for(
int j=0; j<bnl; j++) {
197 if(ptr[j] == -1)
continue;
198 while(cind[ptr[j]]/bnw == min/bnw) {
199 int off = cind[ptr[j]] % bnw;
200 (*bval)[cnt*bnl*bnw+off*bnl+j] = val[ptr[j]];
203 if(ptr[j] >= rptr[i+j+1]) {ptr[j] = -1;
break;}
206 (*bcind)[cnt] = min/bnw;
215 T *bval,
int *bcind,
int *brptr,
216 T **val,
int **cind,
int **rptr,
217 int N,
int bnl,
int bnw)
219 int bsize = bnl * bnw;
220 int num_block = brptr[N/bnl];
222 *val = senk::utils::SafeMalloc<T>(num_block*bnl*bnw);
223 *cind = senk::utils::SafeMalloc<int>(num_block*bnl*bnw);
224 *rptr = senk::utils::SafeMalloc<int>(N+1);
227 for(
int i=0; i<N; i++) {
230 for(
int bj=brptr[bid]; bj<brptr[bid+1]; bj++) {
231 for(
int j=0; j<bnw; j++) {
232 (*cind)[count] = (bcind[bj])*bnw+j;
233 (*val)[count] = bval[bj*bsize+j*bnl+id];
237 (*rptr)[i+1] = count;
244void Padding(T **val,
int **cind,
int **rptr,
int size,
int *N)
246 int remain = (N[0] % size == 0) ? 0 : size - N[0] % size;
247 int NNZ = rptr[0][N[0]];
248 *val = utils::SafeRealloc<T>(*val, NNZ+remain);
249 *cind = utils::SafeRealloc<int>(*cind, NNZ+remain);
250 *rptr = utils::SafeRealloc<int>(*rptr, N[0]+1+remain);
251 for(
int i=0; i<remain; i++) val[0][NNZ+i] = 1;
252 for(
int i=0; i<remain; i++) cind[0][NNZ+i] = N[0]+i;
253 for(
int i=0; i<remain; i++) rptr[0][N[0]+1+i] = rptr[0][N[0]+i]+1;
259 T *val,
int *cind,
int *rptr,
260 T **lval,
int **lcind,
int **lrptr,
261 T **uval,
int **ucind,
int **urptr,
262 T **diag,
int N,
const char *key,
bool invDiag)
267 int *l_ptr, *u_ptr, *d_ptr;
268 T *lv_ptr, *uv_ptr, *dv_ptr;
269 int *lc_ptr, *uc_ptr, *dc_ptr;
270 int *lr_ptr, *ur_ptr;
271 if(std::strcmp(key,
"LD-U") == 0) {
272 l_ptr=&lNNZ; u_ptr=&uNNZ; d_ptr=&lNNZ;
273 }
else if(std::strcmp(key,
"L-DU") == 0) {
274 l_ptr=&lNNZ; u_ptr=&uNNZ; d_ptr=&uNNZ;
275 }
else if(std::strcmp(key,
"L-D-U") == 0) {
276 l_ptr=&lNNZ; u_ptr=&uNNZ; d_ptr=&dNNZ;
277 }
else if(std::strcmp(key,
"LU-D") == 0) {
278 l_ptr=&lNNZ; u_ptr=&lNNZ; d_ptr=&dNNZ;
279 }
else { printf(
"Split: Keyword is not valid."); exit(1); }
280 for(
int i=0; i<N; i++) {
281 for(
int j=rptr[i]; j<rptr[i+1]; j++) {
282 if(cind[j] < i) { (*l_ptr)++; }
283 else if(cind[j] > i) { (*u_ptr)++; }
287 *lval = utils::SafeMalloc<T>(lNNZ);
288 *lcind = utils::SafeMalloc<int>(lNNZ);
289 *lrptr = utils::SafeMalloc<int>(N+1);
291 *uval = utils::SafeMalloc<T>(uNNZ);
292 *ucind = utils::SafeMalloc<int>(uNNZ);
293 *urptr = utils::SafeMalloc<int>(N+1);
295 if(dNNZ == N) { *diag = utils::SafeMalloc<T>(dNNZ); }
296 lNNZ = 0; uNNZ = 0; dNNZ = 0;
297 if(std::strcmp(key,
"LD-U") == 0) {
298 lv_ptr = *lval; uv_ptr = *uval; dv_ptr = *lval;
299 lc_ptr = *lcind; uc_ptr = *ucind; dc_ptr = *lcind;
300 lr_ptr = *lrptr; ur_ptr = *urptr;
301 }
else if(std::strcmp(key,
"L-DU") == 0) {
302 lv_ptr = *lval; uv_ptr = *uval; dv_ptr = *uval;
303 lc_ptr = *lcind; uc_ptr = *ucind; dc_ptr = *ucind;
304 lr_ptr = *lrptr; ur_ptr = *urptr;
305 }
else if(std::strcmp(key,
"L-D-U") == 0) {
306 lv_ptr = *lval; uv_ptr = *uval; dv_ptr = *diag;
307 lc_ptr = *lcind; uc_ptr = *ucind; dc_ptr =
nullptr;
308 lr_ptr = *lrptr; ur_ptr = *urptr;
309 }
else if(std::strcmp(key,
"LU-D") == 0) {
310 lv_ptr = *lval; uv_ptr = *lval; dv_ptr = *diag;
311 lc_ptr = *lcind; uc_ptr = *lcind; dc_ptr =
nullptr;
312 lr_ptr = *lrptr; ur_ptr = *lrptr;
316 for(
int i=0; i<N; i++) {
317 for(
int j=rptr[i]; j<rptr[i+1]; j++) {
319 lv_ptr[*l_ptr] = val[j]; lc_ptr[*l_ptr] = cind[j];
321 }
else if(cind[j] > i) {
322 uv_ptr[*u_ptr] = val[j]; uc_ptr[*u_ptr] = cind[j];
325 dv_ptr[*d_ptr] = (invDiag) ? 1/val[j] : val[j];
326 if(dc_ptr) { dc_ptr[*d_ptr] = cind[j]; }
330 lr_ptr[i+1] = *l_ptr;
331 ur_ptr[i+1] = *u_ptr;
337 T *tval,
int *tcind,
int *trptr,
338 T **val,
int **cind,
int **rptr,
339 int N,
const char *key)
341 T *lval, *uval, *tval2;
342 int *lcind, *ucind, *tcind2;
343 int *lrptr, *urptr, *trptr2;
344 int nnz = trptr[N]*2 - N;
345 Csr2Csc<T>(tval, tcind, trptr, &tval2, &tcind2, &trptr2, N, N);
346 if(std::strcmp(key,
"L") == 0) {
347 lval = tval; lcind = tcind; lrptr = trptr;
348 uval = tval2; ucind = tcind2; urptr = trptr2;
349 }
else if(std::strcmp(key,
"U") == 0) {
350 lval = tval2; lcind = tcind2; lrptr = trptr2;
351 uval = tval; ucind = tcind; urptr = trptr;
352 }
else { printf(
"Expand: Keyword is not valid."); exit(1); }
353 *val = utils::SafeMalloc<T>(nnz);
354 *cind = utils::SafeMalloc<int>(nnz);
355 *rptr = utils::SafeMalloc<int>(N+1);
356 int cnt = 0; (*rptr)[0] = cnt;
357 for(
int i=0; i<N; i++) {
358 for(
int j=lrptr[i]; j<lrptr[i+1]; j++) {
359 (*val)[cnt] = lval[j]; (*cind)[cnt] = lcind[j];
362 for(
int j=urptr[i]+1; j<urptr[i+1]; j++) {
363 (*val)[cnt] = uval[j]; (*cind)[cnt] = ucind[j];
375 T *tval,
int *tcind,
int *trptr,
376 T **val,
int **cind,
int **rptr,
379 *val = utils::SafeMalloc<T>(trptr[N]);
380 *cind = utils::SafeMalloc<int>(trptr[N]);
381 *rptr = utils::SafeMalloc<int>(N+1);
382 (*rptr)[0] = trptr[0];
383 for(
int i=0; i<N; i++) {
384 (*rptr)[i+1] = trptr[i+1];
385 for(
int j=trptr[i]; j<trptr[i+1]; j++) {
386 (*val)[j] = tval[j]; (*cind)[j] = tcind[j];
392void GetDiag(T *val,
int *cind,
int *rptr, T **diag,
int N)
394 *diag = utils::SafeMalloc<T>(N);
395 #pragma omp parallel for
396 for(
int i=0; i<N; i++) {
397 for(
int j=rptr[i]; j<rptr[i+1]; j++) {
407void Scaling(T *val,
int *cind,
int *rptr, T *ld, T *rd,
int N)
409 #pragma omp parallel for
410 for(
int i=0; i<N; i++) {
412 for(
int j=rptr[i]; j<rptr[i+1]; j++) {
413 val[j] = left * val[j] * rd[cind[j]];
419void Ilu0(T *val,
int *cind,
int *rptr,
int N)
421 for(
int i=1; i<N; i++) {
422 for(
int k=rptr[i]; k<rptr[i+1]; k++) {
423 if(cind[k] >= i)
break;
424 for(
int l=rptr[cind[k]]; l<rptr[cind[k]+1]; l++) {
425 if(cind[l] == cind[k]) {
427 printf(
"Error: Ilu0, 0 pivot\n");
430 val[k] = val[k] / val[l];
434 int pos = rptr[cind[k]];
435 for(
int j=k+1; j<rptr[i+1]; j++) {
436 for(
int l=pos; l<rptr[cind[k]+1]; l++) {
437 if(cind[l] < cind[j])
continue;
439 if(cind[l] == cind[j]) {
440 val[j] -= val[k] * val[l];
451void Ilup(T **val,
int **cind,
int **rptr,
int N,
int p)
455 sparse::SpVec<T, int> temp;
456 sparse::SpVec<T, int> temp2;
457 int *zeros = utils::SafeCalloc<int>(N);
462 int first_len = (*rptr)[1] - (*rptr)[0];
463 T *new_val = utils::SafeMalloc<T>(first_len);
464 int *new_cind = utils::SafeMalloc<int>(first_len);
465 int *new_lev = utils::SafeCalloc<int>(first_len);
466 int *new_rptr = utils::SafeMalloc<int>(N+1);
467 int new_len = first_len;
469 memcpy(new_val, *val,
sizeof(T)*first_len);
470 memcpy(new_cind, *cind,
sizeof(
int)*first_len);
472 new_rptr[1] = first_len;
475 int now_len = (*rptr)[i+1] - (*rptr)[i];
477 temp.Append(&(*val)[(*rptr)[i]], zeros, &(*cind)[(*rptr)[i]], now_len);
479 while(count < temp.GetLen() && temp.GetIdx(count) < i) {
481 if(temp.GetLev(k_ptr) > p) { count++;
continue; }
482 int k = temp.GetIdx(k_ptr);
484 for(j=0; j<count; j++) {
485 temp2.Append(temp.GetVal(j), temp.GetLev(j), temp.GetIdx(j));
487 for(j=new_rptr[k]; j<new_rptr[k+1]; j++) {
488 if(new_cind[j] == k) {
489 pivot = temp.GetVal(k_ptr) / new_val[j];
490 pivot_lev = temp.GetLev(k_ptr);
496 temp2.Append(pivot, 0, k);
497 while(k_ptr<temp.GetLen() || j<new_rptr[k+1]) {
500 if(k_ptr < temp.GetLen()) t_col1 = temp.GetIdx(k_ptr);
501 if(j < new_rptr[k+1]) t_col2 = new_cind[j];
503 T t_val = temp.GetVal(k_ptr);
504 int t_lev = temp.GetLev(k_ptr);
505 int t_col = temp.GetIdx(k_ptr);
506 temp2.Append(t_val, t_lev, t_col);
508 }
else if(t_col1==t_col2) {
509 T t_val = temp.GetVal(k_ptr) - pivot*new_val[j];
510 int t_lev = temp.GetLev(k_ptr);
511 int t_col = temp.GetIdx(k_ptr);
512 if(t_lev > pivot_lev+new_lev[j]+1) {
513 t_lev = pivot_lev+new_lev[j]+1;
515 temp2.Append(t_val, t_lev, t_col);
519 T t_val = -pivot*new_val[j];
520 int t_lev = pivot_lev+new_lev[j]+1;
521 int t_col = new_cind[j];
523 temp2.Append(t_val, t_lev, t_col);
529 for(j=0; j<temp2.GetLen(); j++) {
530 temp.Append(temp2.GetVal(j), temp2.GetLev(j), temp2.GetIdx(j));
533 new_val = utils::SafeRealloc<T>(new_val, new_len+temp.GetLen());
534 new_cind = utils::SafeRealloc<int>(new_cind, new_len+temp.GetLen());
535 new_lev = utils::SafeRealloc<int>(new_lev, new_len+temp.GetLen());
536 for(j=0; j<temp.GetLen(); j++) {
537 if(temp.GetLev(j) <= p) {
538 new_val[new_len] = temp.GetVal(j);
539 new_cind[new_len] = temp.GetIdx(j);
540 new_lev[new_len] = temp.GetLev(j);
544 new_rptr[i+1] = new_len;
546 (*val) = utils::SafeRealloc<T>(new_val, new_len);
547 (*cind) = utils::SafeRealloc<int>(new_cind, new_len);
548 (*rptr) = utils::SafeRealloc<int>(new_rptr, N+1);
552void AllocLevelZero(T **val,
int **cind,
int **rptr,
int N,
int p)
556 sparse::SpVec<T, int> temp;
557 sparse::SpVec<T, int> temp2;
558 int *zeros = utils::SafeCalloc<int>(N);
563 int first_len = (*rptr)[1] - (*rptr)[0];
564 T *new_val = utils::SafeMalloc<T>(first_len);
565 int *new_cind = utils::SafeMalloc<int>(first_len);
566 int *new_lev = utils::SafeCalloc<int>(first_len);
567 int *new_rptr = utils::SafeMalloc<int>(N+1);
568 int new_len = first_len;
570 memcpy(new_val, *val,
sizeof(T)*first_len);
571 memcpy(new_cind, *cind,
sizeof(
int)*first_len);
573 new_rptr[1] = first_len;
576 int now_len = (*rptr)[i+1] - (*rptr)[i];
578 temp.Append(&(*val)[(*rptr)[i]], zeros, &(*cind)[(*rptr)[i]], now_len);
580 while(count < temp.GetLen() && temp.GetIdx(count) < i) {
582 if(temp.GetLev(k_ptr) > p) { count++;
continue; }
583 int k = temp.GetIdx(k_ptr);
585 for(j=0; j<count; j++) {
586 temp2.Append(temp.GetVal(j), temp.GetLev(j), temp.GetIdx(j));
588 for(j=new_rptr[k]; j<new_rptr[k+1]; j++) {
589 if(new_cind[j] == k) {
590 pivot = temp.GetVal(k_ptr);
591 pivot_lev = temp.GetLev(k_ptr);
597 temp2.Append(pivot, 0, k);
598 while(k_ptr<temp.GetLen() || j<new_rptr[k+1]) {
601 if(k_ptr < temp.GetLen()) t_col1 = temp.GetIdx(k_ptr);
602 if(j < new_rptr[k+1]) t_col2 = new_cind[j];
604 T t_val = temp.GetVal(k_ptr);
605 int t_lev = temp.GetLev(k_ptr);
606 int t_col = temp.GetIdx(k_ptr);
607 temp2.Append(t_val, t_lev, t_col);
609 }
else if(t_col1==t_col2) {
610 T t_val = temp.GetVal(k_ptr);
611 int t_lev = temp.GetLev(k_ptr);
612 int t_col = temp.GetIdx(k_ptr);
613 if(t_lev > pivot_lev+new_lev[j]+1) {
614 t_lev = pivot_lev+new_lev[j]+1;
616 temp2.Append(t_val, t_lev, t_col);
621 int t_lev = pivot_lev+new_lev[j]+1;
622 int t_col = new_cind[j];
624 temp2.Append(t_val, t_lev, t_col);
630 for(j=0; j<temp2.GetLen(); j++) {
631 temp.Append(temp2.GetVal(j), temp2.GetLev(j), temp2.GetIdx(j));
634 new_val = utils::SafeRealloc<T>(new_val, new_len+temp.GetLen());
635 new_cind = utils::SafeRealloc<int>(new_cind, new_len+temp.GetLen());
636 new_lev = utils::SafeRealloc<int>(new_lev, new_len+temp.GetLen());
637 for(j=0; j<temp.GetLen(); j++) {
638 if(temp.GetLev(j) <= p) {
639 new_val[new_len] = temp.GetVal(j);
640 new_cind[new_len] = temp.GetIdx(j);
641 new_lev[new_len] = temp.GetLev(j);
645 new_rptr[i+1] = new_len;
647 (*val) = utils::SafeRealloc<T>(new_val, new_len);
648 (*cind) = utils::SafeRealloc<int>(new_cind, new_len);
649 (*rptr) = utils::SafeRealloc<int>(new_rptr, N+1);
653void AllocBlockZero(T **val,
int **cind,
int **rptr,
int N,
int bnl,
int bnw)
658 Csr2Bcsr<T>(*val, *cind, *rptr, &bval, &bcind, &brptr, N, bnl, bnw);
662 Bcsr2Csr<T>(bval, bcind, brptr, val, cind, rptr, N, bnl, bnw);
669void RemoveOffDiagonal(T **val,
int **cind,
int **rptr,
int N,
int bnum)
671 int bsize = N / bnum;
673 for(
int i=0; i<N; i++) {
674 int start = (*rptr)[i]+off;
675 int min = i / bsize * bsize;
676 int max = min + bsize;
677 for(
int j=start; j<(*rptr)[i+1]; j++) {
678 if((*cind)[j] < min || max <= (*cind)[j]) {
682 (*val)[j-off] = (*val)[j];
683 (*cind)[j-off] = (*cind)[j];
687 *val = utils::SafeRealloc<double>(*val, (*rptr)[N]);
688 *cind = utils::SafeRealloc<int>(*cind, (*rptr)[N]);
692void Reordering(T *val,
int *cind,
int *rptr,
int *RP,
int N)
694 #pragma omp parallel for
695 for(
int i=0; i<N; i++) {
696 for(
int j=rptr[i]; j<rptr[i+1]; j++) {
697 cind[j] = RP[cind[j]];
699 helper::QuickSort<int, double>(cind, val, rptr[i], rptr[i+1]-1);
704void Reordering(T *val,
int *cind,
int *rptr,
int *LP,
int *RP,
int N)
707 double *tval = utils::SafeMalloc<double>(NNZ);
708 int *tcind = utils::SafeMalloc<int>(NNZ);
709 int *trptr = utils::SafeMalloc<int>(N+1);
711 for(
int i=0; i<N; i++) {
713 trptr[i+1] = trptr[i] + (rptr[
id+1] - rptr[id]);
715 #pragma omp parallel for
716 for(
int i=0; i<N; i++) {
718 for(
int j=0; j<rptr[
id+1]-rptr[id]; j++) {
719 tval[trptr[i]+j] = val[rptr[id]+j];
720 tcind[trptr[i]+j] = RP[cind[rptr[id]+j]];
722 helper::QuickSort<int, double>(tcind, tval, trptr[i], trptr[i+1]-1);
724 utils::Copy<double>(tval, val, NNZ);
725 utils::Copy<int>(tcind, cind, NNZ);
726 utils::Copy<int>(trptr, rptr, N+1);
void RemoveZeros(T **val, int **cind, int *rptr, int N)
Remove zero elements form an input matrix.
The top-level namespace of SenK.
Classes used in SenK are written.
Utility functions are defined.