SenK
SenK is a C++ library for high-performance linear solvers.
senk_matrix.hpp
Go to the documentation of this file.
1
7#ifndef SENK_MATRIX_HPP
8#define SENK_MATRIX_HPP
9
10#include <cstdio>
11#include <cstring>
12
13#include "senk_utils.hpp"
14#include "senk_class.hpp"
15
16namespace senk {
20namespace matrix {
29template <typename T>
30void RemoveZeros(T **val, int **cind, int *rptr, int N)
31{
32 int off = 0;
33 for(int i=0; i<N; i++) {
34 for(int j=rptr[i]+off; j<rptr[i+1]; j++) {
35 if((*val)[j] == 0) { off++; continue; }
36 (*val)[j-off] = (*val)[j];
37 (*cind)[j-off] = (*cind)[j];
38 }
39 rptr[i+1] -= off;
40 }
41 *val = utils::SafeRealloc<T>(*val, rptr[N]);
42 *cind = utils::SafeRealloc<int>(*cind, rptr[N]);
43}
44
45template <typename T>
46bool CheckStructure(T *val, int *cind, int *rptr, int N)
47{
48 for(int i=0; i<N; i++) {
49 bool flag = false;
50 for(int j=rptr[i]; j<rptr[i+1]; j++) {
51 if(cind[j] == i) { flag = true; break; }
52 }
53 if(!flag) { return false; }
54 }
55 return true;
56}
57
58template <typename T>
59void Csr2Csc(
60 T *val, int *cind, int *rptr,
61 T **cval, int **crind, int **ccptr,
62 int N, int M)
63{
64 // The size of the input matrix is N * M
65 int nnz = rptr[N];
66 *cval = utils::SafeMalloc<T>(nnz);
67 *crind = utils::SafeMalloc<int>(nnz);
68 *ccptr = utils::SafeMalloc<int>(M+1);
69 int *num = utils::SafeCalloc<int>(M);
70 for(int i=0; i<N; i++) {
71 for(int j=rptr[i]; j<rptr[i+1]; j++) { num[cind[j]]++; }
72 }
73 (*ccptr)[0] = 0;
74 for(int i=0; i<M; i++) {
75 (*ccptr)[i+1] = (*ccptr)[i] + num[i];
76 num[i] = 0;
77 }
78 for(int i=0; i<N; i++) {
79 for(int j=rptr[i]; j<rptr[i+1]; j++) {
80 int off = (*ccptr)[cind[j]];
81 int pos = num[cind[j]];
82 (*crind)[off+pos] = i;
83 (*cval)[off+pos] = val[j];
84 num[cind[j]]++;
85 }
86 }
87 free(num);
88}
89
90template <typename T>
91int Csr2Sell(
92 T *val, int *cind, int *rptr,
93 T **s_val, int **s_cind, int **s_len,
94 int size, int N)
95{
96 int num_slice = (N+size-1)/size;
97 *s_len = utils::SafeMalloc<int>(num_slice+1);
98 int temp_slice;
99 int row_id;
100 int row_max;
101 int row_count;
102 int nnz = 0;
103
104 (*s_len)[0] = 0;
105 for(int i=0; i<num_slice; i++) {
106 temp_slice = size;
107 if(i==num_slice-1 && N%size!=0) temp_slice = N % size;
108 row_max = 0;
109 for(int j=0; j<temp_slice; j++) {
110 row_id = i*size+j;
111 if(row_max < rptr[row_id+1] - rptr[row_id]) {
112 row_max = rptr[row_id+1] - rptr[row_id];
113 }
114 }
115 (*s_len)[i+1] = (*s_len)[i] + row_max;
116 nnz += row_max * temp_slice;
117 }
118 *s_val = utils::SafeMalloc<T>(nnz);
119 *s_cind = utils::SafeMalloc<int>(nnz);
120 for(int i=0; i<num_slice; i++) {
121 temp_slice = size;
122 if(i==num_slice-1 && N%size!=0) temp_slice = N % size;
123 for(int j=0; j<temp_slice; j++) {
124 row_id = i*size+j;
125 row_count = 0;
126 for(int k=rptr[row_id]; k<rptr[row_id+1]; k++) {
127 (*s_val)[(*s_len)[i]*size+row_count*temp_slice+j] = val[k];
128 (*s_cind)[(*s_len)[i]*size+row_count*temp_slice+j] = cind[k];
129 row_count++;
130 }
131 if(row_count < (*s_len)[i+1] - (*s_len)[i]) {
132 int temp = (*s_len)[i+1] - (*s_len)[i] - row_count;
133 for(int k=0; k<temp; k++) {
134 (*s_val)[(*s_len)[i]*size+row_count*temp_slice+j] = 0;
135 (*s_cind)[(*s_len)[i]*size+row_count*temp_slice+j] = 0;
136 row_count++;
137 }
138 }
139 }
140 }
141 return nnz;
142}
143
144template <typename T>
145void Csr2Bcsr(
146 T *val, int *cind, int *rptr,
147 T **bval, int **bcind, int **brptr,
148 int N, int bnl, int bnw)
149{
150 if(N % bnl || N % bnw) {
151 printf("Error: Csr2Bcsr\n");
152 exit(EXIT_FAILURE);
153 }
154 *brptr = utils::SafeMalloc<int>(N/bnl+1);
155 int cnt = 0;
156 (*brptr)[0] = 0;
157 int *ptr = utils::SafeMalloc<int>(bnl);
158// Count the number of block
159 for(int i=0; i<N; i+=bnl) {
160 // Initialize "ptr"
161 for(int j=0; j<bnl; j++) { ptr[j] = rptr[i+j]; }
162 while(true) {
163 // Find minimum col value
164 int min = N;
165 for(int j=0; j<bnl; j++) {
166 if(ptr[j] != -1 && cind[ptr[j]] < min)
167 min = cind[ptr[j]];
168 }
169 if(min == N) break;
170 // Increment ptr[j] whose col-idx is in min block
171 for(int j=0; j<bnl; j++) {
172 if(ptr[j] == -1) continue;
173 while(cind[ptr[j]]/bnw == min/bnw) {
174 ptr[j]++;
175 if(ptr[j] >= rptr[i+j+1]) {ptr[j] = -1; break;}
176 }
177 }
178 cnt++;
179 }
180 (*brptr)[i/bnl+1] = cnt;
181 }
182 *bcind = utils::SafeMalloc<int>(cnt);
183 *bval = utils::SafeCalloc<T>(cnt*bnl*bnw);
184// Assign val to bval
185 cnt = 0;
186 for(int i=0; i<N; i+=bnl) {
187 // Initialize "ptr"
188 for(int j=0; j<bnl; j++) { ptr[j] = rptr[i+j]; }
189 while(true) {
190 int min = N;
191 for(int j=0; j<bnl; j++) {
192 if(ptr[j] != -1 && cind[ptr[j]] < min)
193 min = cind[ptr[j]];
194 }
195 if(min == N) break;
196 for(int j=0; j<bnl; j++) {
197 if(ptr[j] == -1) continue;
198 while(cind[ptr[j]]/bnw == min/bnw) {
199 int off = cind[ptr[j]] % bnw;
200 (*bval)[cnt*bnl*bnw+off*bnl+j] = val[ptr[j]];
201 //printf("%e\n", (*bval)[cnt*bnl*bnw+off*bnl+j]);
202 ptr[j]++;
203 if(ptr[j] >= rptr[i+j+1]) {ptr[j] = -1; break;}
204 }
205 }
206 (*bcind)[cnt] = min/bnw;
207 cnt++;
208 }
209 }
210 free(ptr);
211}
212
213template <typename T>
214int Bcsr2Csr(
215 T *bval, int *bcind, int *brptr,
216 T **val, int **cind, int **rptr,
217 int N, int bnl, int bnw)
218{
219 int bsize = bnl * bnw;
220 int num_block = brptr[N/bnl];
221 int nnz;
222 *val = senk::utils::SafeMalloc<T>(num_block*bnl*bnw);
223 *cind = senk::utils::SafeMalloc<int>(num_block*bnl*bnw);
224 *rptr = senk::utils::SafeMalloc<int>(N+1);
225 (*rptr)[0] = 0;
226 int count = 0;
227 for(int i=0; i<N; i++) {
228 int bid = i / bnl;
229 int id = i % bnl; // 0 to bnl-1
230 for(int bj=brptr[bid]; bj<brptr[bid+1]; bj++) {
231 for(int j=0; j<bnw; j++) {
232 (*cind)[count] = (bcind[bj])*bnw+j;
233 (*val)[count] = bval[bj*bsize+j*bnl+id];
234 count++;
235 }
236 }
237 (*rptr)[i+1] = count;
238 }
239 nnz = (*rptr)[N];
240 return nnz;
241}
242
243template <typename T>
244void Padding(T **val, int **cind, int **rptr, int size, int *N)
245{
246 int remain = (N[0] % size == 0) ? 0 : size - N[0] % size;
247 int NNZ = rptr[0][N[0]];
248 *val = utils::SafeRealloc<T>(*val, NNZ+remain);
249 *cind = utils::SafeRealloc<int>(*cind, NNZ+remain);
250 *rptr = utils::SafeRealloc<int>(*rptr, N[0]+1+remain);
251 for(int i=0; i<remain; i++) val[0][NNZ+i] = 1;
252 for(int i=0; i<remain; i++) cind[0][NNZ+i] = N[0]+i;
253 for(int i=0; i<remain; i++) rptr[0][N[0]+1+i] = rptr[0][N[0]+i]+1;
254 N[0] += remain;
255}
256
257template <typename T>
258void Split(
259 T *val, int *cind, int *rptr,
260 T **lval, int **lcind, int **lrptr,
261 T **uval, int **ucind, int **urptr,
262 T **diag, int N, const char *key, bool invDiag)
263{
264 int lNNZ = 0;
265 int uNNZ = 0;
266 int dNNZ = 0;
267 int *l_ptr, *u_ptr, *d_ptr;
268 T *lv_ptr, *uv_ptr, *dv_ptr;
269 int *lc_ptr, *uc_ptr, *dc_ptr;
270 int *lr_ptr, *ur_ptr;
271 if(std::strcmp(key, "LD-U") == 0) {
272 l_ptr=&lNNZ; u_ptr=&uNNZ; d_ptr=&lNNZ;
273 }else if(std::strcmp(key, "L-DU") == 0) {
274 l_ptr=&lNNZ; u_ptr=&uNNZ; d_ptr=&uNNZ;
275 }else if(std::strcmp(key, "L-D-U") == 0) {
276 l_ptr=&lNNZ; u_ptr=&uNNZ; d_ptr=&dNNZ;
277 }else if(std::strcmp(key, "LU-D") == 0) {
278 l_ptr=&lNNZ; u_ptr=&lNNZ; d_ptr=&dNNZ;
279 }else { printf("Split: Keyword is not valid."); exit(1); }
280 for(int i=0; i<N; i++) {
281 for(int j=rptr[i]; j<rptr[i+1]; j++) {
282 if(cind[j] < i) { (*l_ptr)++; }
283 else if(cind[j] > i) { (*u_ptr)++; }
284 else { (*d_ptr)++; }
285 }
286 }
287 *lval = utils::SafeMalloc<T>(lNNZ);
288 *lcind = utils::SafeMalloc<int>(lNNZ);
289 *lrptr = utils::SafeMalloc<int>(N+1);
290 if(uNNZ != 0) {
291 *uval = utils::SafeMalloc<T>(uNNZ);
292 *ucind = utils::SafeMalloc<int>(uNNZ);
293 *urptr = utils::SafeMalloc<int>(N+1);
294 }
295 if(dNNZ == N) { *diag = utils::SafeMalloc<T>(dNNZ); }
296 lNNZ = 0; uNNZ = 0; dNNZ = 0;
297 if(std::strcmp(key, "LD-U") == 0) {
298 lv_ptr = *lval; uv_ptr = *uval; dv_ptr = *lval;
299 lc_ptr = *lcind; uc_ptr = *ucind; dc_ptr = *lcind;
300 lr_ptr = *lrptr; ur_ptr = *urptr;
301 }else if(std::strcmp(key, "L-DU") == 0) {
302 lv_ptr = *lval; uv_ptr = *uval; dv_ptr = *uval;
303 lc_ptr = *lcind; uc_ptr = *ucind; dc_ptr = *ucind;
304 lr_ptr = *lrptr; ur_ptr = *urptr;
305 }else if(std::strcmp(key, "L-D-U") == 0) {
306 lv_ptr = *lval; uv_ptr = *uval; dv_ptr = *diag;
307 lc_ptr = *lcind; uc_ptr = *ucind; dc_ptr = nullptr;
308 lr_ptr = *lrptr; ur_ptr = *urptr;
309 }else if(std::strcmp(key, "LU-D") == 0) {
310 lv_ptr = *lval; uv_ptr = *lval; dv_ptr = *diag;
311 lc_ptr = *lcind; uc_ptr = *lcind; dc_ptr = nullptr;
312 lr_ptr = *lrptr; ur_ptr = *lrptr;
313 }else { return; }
314 lr_ptr[0] = 0;
315 ur_ptr[0] = 0;
316 for(int i=0; i<N; i++) {
317 for(int j=rptr[i]; j<rptr[i+1]; j++) {
318 if(cind[j] < i) {
319 lv_ptr[*l_ptr] = val[j]; lc_ptr[*l_ptr] = cind[j];
320 (*l_ptr)++;
321 }else if(cind[j] > i) {
322 uv_ptr[*u_ptr] = val[j]; uc_ptr[*u_ptr] = cind[j];
323 (*u_ptr)++;
324 }else {
325 dv_ptr[*d_ptr] = (invDiag) ? 1/val[j] : val[j];
326 if(dc_ptr) { dc_ptr[*d_ptr] = cind[j]; }
327 (*d_ptr)++;
328 }
329 }
330 lr_ptr[i+1] = *l_ptr;
331 ur_ptr[i+1] = *u_ptr;
332 }
333}
334
335template <typename T>
336void Expand(
337 T *tval, int *tcind, int *trptr,
338 T **val, int **cind, int **rptr,
339 int N, const char *key)
340{
341 T *lval, *uval, *tval2;
342 int *lcind, *ucind, *tcind2;
343 int *lrptr, *urptr, *trptr2;
344 int nnz = trptr[N]*2 - N;
345 Csr2Csc<T>(tval, tcind, trptr, &tval2, &tcind2, &trptr2, N, N);
346 if(std::strcmp(key, "L") == 0) {
347 lval = tval; lcind = tcind; lrptr = trptr;
348 uval = tval2; ucind = tcind2; urptr = trptr2;
349 }else if(std::strcmp(key, "U") == 0) {
350 lval = tval2; lcind = tcind2; lrptr = trptr2;
351 uval = tval; ucind = tcind; urptr = trptr;
352 }else { printf("Expand: Keyword is not valid."); exit(1); }
353 *val = utils::SafeMalloc<T>(nnz);
354 *cind = utils::SafeMalloc<int>(nnz);
355 *rptr = utils::SafeMalloc<int>(N+1);
356 int cnt = 0; (*rptr)[0] = cnt;
357 for(int i=0; i<N; i++) {
358 for(int j=lrptr[i]; j<lrptr[i+1]; j++) {
359 (*val)[cnt] = lval[j]; (*cind)[cnt] = lcind[j];
360 cnt++;
361 }
362 for(int j=urptr[i]+1; j<urptr[i+1]; j++) {
363 (*val)[cnt] = uval[j]; (*cind)[cnt] = ucind[j];
364 cnt++;
365 }
366 (*rptr)[i+1] = cnt;
367 }
368 free(tval2);
369 free(tcind2);
370 free(trptr2);
371}
372
373template <typename T>
374void Duplicate(
375 T *tval, int *tcind, int *trptr,
376 T **val, int **cind, int **rptr,
377 int N)
378{
379 *val = utils::SafeMalloc<T>(trptr[N]);
380 *cind = utils::SafeMalloc<int>(trptr[N]);
381 *rptr = utils::SafeMalloc<int>(N+1);
382 (*rptr)[0] = trptr[0];
383 for(int i=0; i<N; i++) {
384 (*rptr)[i+1] = trptr[i+1];
385 for(int j=trptr[i]; j<trptr[i+1]; j++) {
386 (*val)[j] = tval[j]; (*cind)[j] = tcind[j];
387 }
388 }
389}
390
391template <typename T>
392void GetDiag(T *val, int *cind, int *rptr, T **diag, int N)
393{
394 *diag = utils::SafeMalloc<T>(N);
395 #pragma omp parallel for
396 for(int i=0; i<N; i++) {
397 for(int j=rptr[i]; j<rptr[i+1]; j++) {
398 if(cind[j] == i) {
399 (*diag)[i] = val[j];
400 break;
401 }
402 }
403 }
404}
405
406template <typename T>
407void Scaling(T *val, int *cind, int *rptr, T *ld, T *rd, int N)
408{
409 #pragma omp parallel for
410 for(int i=0; i<N; i++) {
411 T left = ld[i];
412 for(int j=rptr[i]; j<rptr[i+1]; j++) {
413 val[j] = left * val[j] * rd[cind[j]];
414 }
415 }
416}
417
418template <typename T>
419void Ilu0(T *val, int *cind, int *rptr, int N)
420{
421 for(int i=1; i<N; i++) {
422 for(int k=rptr[i]; k<rptr[i+1]; k++) {
423 if(cind[k] >= i) break;
424 for(int l=rptr[cind[k]]; l<rptr[cind[k]+1]; l++) {
425 if(cind[l] == cind[k]) {
426 if(val[l] == 0) {
427 printf("Error: Ilu0, 0 pivot\n");
428 exit(1);
429 }
430 val[k] = val[k] / val[l];
431 break;
432 }
433 }
434 int pos = rptr[cind[k]];
435 for(int j=k+1; j<rptr[i+1]; j++) {
436 for(int l=pos; l<rptr[cind[k]+1]; l++) {
437 if(cind[l] < cind[j]) continue;
438 pos = l;
439 if(cind[l] == cind[j]) {
440 val[j] -= val[k] * val[l];
441 pos++;
442 }
443 break;
444 }
445 }
446 }
447 }
448}
449
450template <typename T>
451void Ilup(T **val, int **cind, int **rptr, int N, int p)
452{
453 int i, j;
454 //int NNZ = (*rptr)[N];
455 sparse::SpVec<T, int> temp;
456 sparse::SpVec<T, int> temp2;
457 int *zeros = utils::SafeCalloc<int>(N);
458
459 T pivot = 0;
460 int pivot_lev = 0;
461
462 int first_len = (*rptr)[1] - (*rptr)[0];
463 T *new_val = utils::SafeMalloc<T>(first_len);
464 int *new_cind = utils::SafeMalloc<int>(first_len);
465 int *new_lev = utils::SafeCalloc<int>(first_len);
466 int *new_rptr = utils::SafeMalloc<int>(N+1);
467 int new_len = first_len;
468 //一行目
469 memcpy(new_val, *val, sizeof(T)*first_len);
470 memcpy(new_cind, *cind, sizeof(int)*first_len);
471 new_rptr[0] = 0;
472 new_rptr[1] = first_len;
473
474 for(i=1; i<N; i++) {
475 int now_len = (*rptr)[i+1] - (*rptr)[i];
476 temp.Clear();
477 temp.Append(&(*val)[(*rptr)[i]], zeros, &(*cind)[(*rptr)[i]], now_len);
478 int count = 0;
479 while(count < temp.GetLen() && temp.GetIdx(count) < i) {
480 int k_ptr = count;
481 if(temp.GetLev(k_ptr) > p) { count++; continue; }
482 int k = temp.GetIdx(k_ptr);
483 temp2.Clear();
484 for(j=0; j<count; j++) {
485 temp2.Append(temp.GetVal(j), temp.GetLev(j), temp.GetIdx(j));
486 }
487 for(j=new_rptr[k]; j<new_rptr[k+1]; j++) {
488 if(new_cind[j] == k) {
489 pivot = temp.GetVal(k_ptr) / new_val[j];
490 pivot_lev = temp.GetLev(k_ptr);
491 break;
492 }
493 }
494 j++;
495 k_ptr++;
496 temp2.Append(pivot, 0, k);
497 while(k_ptr<temp.GetLen() || j<new_rptr[k+1]) {
498 int t_col1 = N+1;
499 int t_col2 = N+1;
500 if(k_ptr < temp.GetLen()) t_col1 = temp.GetIdx(k_ptr);
501 if(j < new_rptr[k+1]) t_col2 = new_cind[j];
502 if(t_col1<t_col2) {
503 T t_val = temp.GetVal(k_ptr);
504 int t_lev = temp.GetLev(k_ptr);
505 int t_col = temp.GetIdx(k_ptr);
506 temp2.Append(t_val, t_lev, t_col);
507 k_ptr++;
508 }else if(t_col1==t_col2) {
509 T t_val = temp.GetVal(k_ptr) - pivot*new_val[j];
510 int t_lev = temp.GetLev(k_ptr);
511 int t_col = temp.GetIdx(k_ptr);
512 if(t_lev > pivot_lev+new_lev[j]+1) {
513 t_lev = pivot_lev+new_lev[j]+1;
514 }
515 temp2.Append(t_val, t_lev, t_col);
516 k_ptr++;
517 j++;
518 }else { // (k>j) fill-in
519 T t_val = -pivot*new_val[j];
520 int t_lev = pivot_lev+new_lev[j]+1;
521 int t_col = new_cind[j];
522 //printf("lev %d\n", t_lev);
523 temp2.Append(t_val, t_lev, t_col);
524 j++;
525 }
526 }
527 count++;
528 temp.Clear();
529 for(j=0; j<temp2.GetLen(); j++) {
530 temp.Append(temp2.GetVal(j), temp2.GetLev(j), temp2.GetIdx(j));
531 }
532 }
533 new_val = utils::SafeRealloc<T>(new_val, new_len+temp.GetLen());
534 new_cind = utils::SafeRealloc<int>(new_cind, new_len+temp.GetLen());
535 new_lev = utils::SafeRealloc<int>(new_lev, new_len+temp.GetLen());
536 for(j=0; j<temp.GetLen(); j++) {
537 if(temp.GetLev(j) <= p) {
538 new_val[new_len] = temp.GetVal(j);
539 new_cind[new_len] = temp.GetIdx(j);
540 new_lev[new_len] = temp.GetLev(j);
541 new_len++;
542 }
543 }
544 new_rptr[i+1] = new_len;
545 }
546 (*val) = utils::SafeRealloc<T>(new_val, new_len);
547 (*cind) = utils::SafeRealloc<int>(new_cind, new_len);
548 (*rptr) = utils::SafeRealloc<int>(new_rptr, N+1);
549}
550
551template <typename T>
552void AllocLevelZero(T **val, int **cind, int **rptr, int N, int p)
553{
554 int i, j;
555 //int NNZ = (*rptr)[N];
556 sparse::SpVec<T, int> temp;
557 sparse::SpVec<T, int> temp2;
558 int *zeros = utils::SafeCalloc<int>(N);
559
560 T pivot = 0;
561 int pivot_lev = 0;
562
563 int first_len = (*rptr)[1] - (*rptr)[0];
564 T *new_val = utils::SafeMalloc<T>(first_len);
565 int *new_cind = utils::SafeMalloc<int>(first_len);
566 int *new_lev = utils::SafeCalloc<int>(first_len);
567 int *new_rptr = utils::SafeMalloc<int>(N+1);
568 int new_len = first_len;
569 //一行目
570 memcpy(new_val, *val, sizeof(T)*first_len);
571 memcpy(new_cind, *cind, sizeof(int)*first_len);
572 new_rptr[0] = 0;
573 new_rptr[1] = first_len;
574
575 for(i=1; i<N; i++) {
576 int now_len = (*rptr)[i+1] - (*rptr)[i];
577 temp.Clear();
578 temp.Append(&(*val)[(*rptr)[i]], zeros, &(*cind)[(*rptr)[i]], now_len);
579 int count = 0;
580 while(count < temp.GetLen() && temp.GetIdx(count) < i) {
581 int k_ptr = count;
582 if(temp.GetLev(k_ptr) > p) { count++; continue; }
583 int k = temp.GetIdx(k_ptr);
584 temp2.Clear();
585 for(j=0; j<count; j++) {
586 temp2.Append(temp.GetVal(j), temp.GetLev(j), temp.GetIdx(j));
587 }
588 for(j=new_rptr[k]; j<new_rptr[k+1]; j++) {
589 if(new_cind[j] == k) {
590 pivot = temp.GetVal(k_ptr);// / new_val[j];
591 pivot_lev = temp.GetLev(k_ptr);
592 break;
593 }
594 }
595 j++;
596 k_ptr++;
597 temp2.Append(pivot, 0, k);
598 while(k_ptr<temp.GetLen() || j<new_rptr[k+1]) {
599 int t_col1 = N+1;
600 int t_col2 = N+1;
601 if(k_ptr < temp.GetLen()) t_col1 = temp.GetIdx(k_ptr);
602 if(j < new_rptr[k+1]) t_col2 = new_cind[j];
603 if(t_col1<t_col2) {
604 T t_val = temp.GetVal(k_ptr);
605 int t_lev = temp.GetLev(k_ptr);
606 int t_col = temp.GetIdx(k_ptr);
607 temp2.Append(t_val, t_lev, t_col);
608 k_ptr++;
609 }else if(t_col1==t_col2) {
610 T t_val = temp.GetVal(k_ptr);
611 int t_lev = temp.GetLev(k_ptr);
612 int t_col = temp.GetIdx(k_ptr);
613 if(t_lev > pivot_lev+new_lev[j]+1) {
614 t_lev = pivot_lev+new_lev[j]+1;
615 }
616 temp2.Append(t_val, t_lev, t_col);
617 k_ptr++;
618 j++;
619 }else { // (k>j) fill-in
620 T t_val = 0;
621 int t_lev = pivot_lev+new_lev[j]+1;
622 int t_col = new_cind[j];
623 //printf("lev %d\n", t_lev);
624 temp2.Append(t_val, t_lev, t_col);
625 j++;
626 }
627 }
628 count++;
629 temp.Clear();
630 for(j=0; j<temp2.GetLen(); j++) {
631 temp.Append(temp2.GetVal(j), temp2.GetLev(j), temp2.GetIdx(j));
632 }
633 }
634 new_val = utils::SafeRealloc<T>(new_val, new_len+temp.GetLen());
635 new_cind = utils::SafeRealloc<int>(new_cind, new_len+temp.GetLen());
636 new_lev = utils::SafeRealloc<int>(new_lev, new_len+temp.GetLen());
637 for(j=0; j<temp.GetLen(); j++) {
638 if(temp.GetLev(j) <= p) {
639 new_val[new_len] = temp.GetVal(j);
640 new_cind[new_len] = temp.GetIdx(j);
641 new_lev[new_len] = temp.GetLev(j);
642 new_len++;
643 }
644 }
645 new_rptr[i+1] = new_len;
646 }
647 (*val) = utils::SafeRealloc<T>(new_val, new_len);
648 (*cind) = utils::SafeRealloc<int>(new_cind, new_len);
649 (*rptr) = utils::SafeRealloc<int>(new_rptr, N+1);
650}
651
652template <typename T>
653void AllocBlockZero(T **val, int **cind, int **rptr, int N, int bnl, int bnw)
654{
655 T *bval;
656 int *bcind;
657 int *brptr;
658 Csr2Bcsr<T>(*val, *cind, *rptr, &bval, &bcind, &brptr, N, bnl, bnw);
659 free(*val);
660 free(*cind);
661 free(*rptr);
662 Bcsr2Csr<T>(bval, bcind, brptr, val, cind, rptr, N, bnl, bnw);
663 free(bval);
664 free(bcind);
665 free(brptr);
666}
667
668template <typename T>
669void RemoveOffDiagonal(T **val, int **cind, int **rptr, int N, int bnum)
670{
671 int bsize = N / bnum;
672 int off = 0;
673 for(int i=0; i<N; i++) {
674 int start = (*rptr)[i]+off;
675 int min = i / bsize * bsize;
676 int max = min + bsize;
677 for(int j=start; j<(*rptr)[i+1]; j++) {
678 if((*cind)[j] < min || max <= (*cind)[j]) {
679 off++;
680 continue;
681 }
682 (*val)[j-off] = (*val)[j];
683 (*cind)[j-off] = (*cind)[j];
684 }
685 (*rptr)[i+1] -= off;
686 }
687 *val = utils::SafeRealloc<double>(*val, (*rptr)[N]);
688 *cind = utils::SafeRealloc<int>(*cind, (*rptr)[N]);
689}
690
691template <typename T>
692void Reordering(T *val, int *cind, int *rptr, int *RP, int N)
693{
694 #pragma omp parallel for
695 for(int i=0; i<N; i++) {
696 for(int j=rptr[i]; j<rptr[i+1]; j++) {
697 cind[j] = RP[cind[j]];
698 }
699 helper::QuickSort<int, double>(cind, val, rptr[i], rptr[i+1]-1);
700 }
701}
702
703template <typename T>
704void Reordering(T *val, int *cind, int *rptr, int *LP, int *RP, int N)
705{
706 int NNZ = rptr[N];
707 double *tval = utils::SafeMalloc<double>(NNZ);
708 int *tcind = utils::SafeMalloc<int>(NNZ);
709 int *trptr = utils::SafeMalloc<int>(N+1);
710 trptr[0] = 0;
711 for(int i=0; i<N; i++) {
712 int id = LP[i];
713 trptr[i+1] = trptr[i] + (rptr[id+1] - rptr[id]);
714 }
715 #pragma omp parallel for
716 for(int i=0; i<N; i++) {
717 int id = LP[i];
718 for(int j=0; j<rptr[id+1]-rptr[id]; j++) {
719 tval[trptr[i]+j] = val[rptr[id]+j];
720 tcind[trptr[i]+j] = RP[cind[rptr[id]+j]];
721 }
722 helper::QuickSort<int, double>(tcind, tval, trptr[i], trptr[i+1]-1);
723 }
724 utils::Copy<double>(tval, val, NNZ);
725 utils::Copy<int>(tcind, cind, NNZ);
726 utils::Copy<int>(trptr, rptr, N+1);
727 free(tval);
728 free(tcind);
729 free(trptr);
730}
731
732/*
733int Csr2PaddedCsr(
734 double **val, int **cind, int **rptr,
735 int size, int N)
736{
737 int nnz = 0;
738 int *t_rptr = new int[N+1];
739 t_rptr[0] = 0;
740 for(int i=0; i<N; i++) {
741 int num = rptr[0][i+1] - rptr[0][i];
742 nnz += (num+size-1)/size * size;
743 t_rptr[i+1] = nnz;
744 }
745 double *t_val = new double[nnz];
746 int *t_cind = new int[nnz];
747 for(int i=0; i<N; i++) {
748 int num = rptr[0][i+1] - rptr[0][i];
749 int cnt = 0;
750 for(int j=t_rptr[i]; j<t_rptr[i+1]; j++) {
751 if(cnt < num) {
752 t_val[j] = val[0][rptr[0][i]+cnt];
753 t_cind[j] = cind[0][rptr[0][i]+cnt];
754 }else {
755 t_val[j] = 0;
756 t_cind[j] = t_cind[j-1];
757 }
758 cnt++;
759 }
760 }
761 *val = (double*)realloc(*val, sizeof(double)*nnz);
762 *cind = (int*)realloc(*cind, sizeof(int)*nnz);
763 for(int i=0; i<nnz; i++) {val[0][i] = t_val[i];}
764 for(int i=0; i<nnz; i++) {cind[0][i] = t_cind[i];}
765 for(int i=0; i<N+1; i++) {rptr[0][i] = t_rptr[i];}
766
767 return nnz;
768}
769*/
770} // namespace matrix
771
772} // namespace senk
773
774#endif
void RemoveZeros(T **val, int **cind, int *rptr, int N)
Remove zero elements form an input matrix.
Definition: senk_matrix.hpp:30
The top-level namespace of SenK.
Classes used in SenK are written.
Utility functions are defined.