Amesos2 - Direct Sparse Solver Interfaces Version of the Day
klu2_tsolve.hpp
1/* ========================================================================== */
2/* === KLU_tsolve =========================================================== */
3/* ========================================================================== */
4// @HEADER
5// ***********************************************************************
6//
7// KLU2: A Direct Linear Solver package
8// Copyright 2011 Sandia Corporation
9//
10// Under terms of Contract DE-AC04-94AL85000, with Sandia Corporation, the
11// U.S. Government retains certain rights in this software.
12//
13// This library is free software; you can redistribute it and/or modify
14// it under the terms of the GNU Lesser General Public License as
15// published by the Free Software Foundation; either version 2.1 of the
16// License, or (at your option) any later version.
17//
18// This library is distributed in the hope that it will be useful, but
19// WITHOUT ANY WARRANTY; without even the implied warranty of
20// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21// Lesser General Public License for more details.
22//
23// You should have received a copy of the GNU Lesser General Public
24// License along with this library; if not, write to the Free Software
25// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26// USA
27// Questions? Contact Mike A. Heroux (maherou@sandia.gov)
28//
29// KLU2 is derived work from KLU, licensed under LGPL, and copyrighted by
30// University of Florida. The Authors of KLU are Timothy A. Davis and
31// Eka Palamadai. See Doc/KLU_README.txt for the licensing and copyright
32// information for KLU.
33//
34// ***********************************************************************
35// @HEADER
36
37/* Solve A'x=b using the symbolic and numeric objects from KLU_analyze
38 * (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
39 * performed. Uses Numeric->Xwork as workspace (undefined on input and output),
40 * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
41 * Numeric->Iwork).
42 */
43
44#ifndef KLU2_TSOLVE_HPP
45#define KLU2_TSOLVE_HPP
46
47#include "klu2_internal.h"
48#include "klu2.hpp"
49
50template <typename Entry, typename Int>
51Int KLU_tsolve
52(
53 /* inputs, not modified */
54 KLU_symbolic<Entry, Int> *Symbolic,
55 KLU_numeric<Entry, Int> *Numeric,
56 Int d, /* leading dimension of B */
57 Int nrhs, /* number of right-hand-sides */
58
59 /* right-hand-side on input, overwritten with solution to Ax=b on output */
60 Entry B [ ], /* size n*nrhs, in column-oriented form, with
61 * leading dimension d. */
62#ifdef COMPLEX
63 Int conj_solve, /* TRUE for conjugate transpose solve, FALSE for
64 * array transpose solve. Used for the complex
65 * case only. */
66#endif
67 /* --------------- */
68 KLU_common<Entry, Int> *Common
69)
70{
71 Entry x [4], offik, s ;
72 double rs, *Rs ;
73 Entry *Offx, *X, *Bz, *Udiag ;
74 Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
75 Unit **LUbx ;
76 Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
77
78 /* ---------------------------------------------------------------------- */
79 /* check inputs */
80 /* ---------------------------------------------------------------------- */
81
82 if (Common == NULL)
83 {
84 return (FALSE) ;
85 }
86 if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
87 B == NULL)
88 {
89 Common->status = KLU_INVALID ;
90 return (FALSE) ;
91 }
92 Common->status = KLU_OK ;
93
94 /* ---------------------------------------------------------------------- */
95 /* get the contents of the Symbolic object */
96 /* ---------------------------------------------------------------------- */
97
98 Bz = (Entry *) B ;
99 n = Symbolic->n ;
100 nblocks = Symbolic->nblocks ;
101 Q = Symbolic->Q ;
102 R = Symbolic->R ;
103
104 /* ---------------------------------------------------------------------- */
105 /* get the contents of the Numeric object */
106 /* ---------------------------------------------------------------------- */
107
108 ASSERT (nblocks == Numeric->nblocks) ;
109 Pnum = Numeric->Pnum ;
110 Offp = Numeric->Offp ;
111 Offi = Numeric->Offi ;
112 Offx = (Entry *) Numeric->Offx ;
113
114 Lip = Numeric->Lip ;
115 Llen = Numeric->Llen ;
116 Uip = Numeric->Uip ;
117 Ulen = Numeric->Ulen ;
118 LUbx = (Unit **) Numeric->LUbx ;
119 Udiag = (Entry *) Numeric->Udiag ;
120
121 Rs = Numeric->Rs ;
122 X = (Entry *) Numeric->Xwork ;
123 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
124
125 /* ---------------------------------------------------------------------- */
126 /* solve in chunks of 4 columns at a time */
127 /* ---------------------------------------------------------------------- */
128
129 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
130 {
131
132 /* ------------------------------------------------------------------ */
133 /* get the size of the current chunk */
134 /* ------------------------------------------------------------------ */
135
136 nr = MIN (nrhs - chunk, 4) ;
137
138 /* ------------------------------------------------------------------ */
139 /* permute the right hand side, X = Q'*B */
140 /* ------------------------------------------------------------------ */
141
142 switch (nr)
143 {
144
145 case 1:
146
147 for (k = 0 ; k < n ; k++)
148 {
149 X [k] = Bz [Q [k]] ;
150 }
151 break ;
152
153 case 2:
154
155 for (k = 0 ; k < n ; k++)
156 {
157 i = Q [k] ;
158 X [2*k ] = Bz [i ] ;
159 X [2*k + 1] = Bz [i + d ] ;
160 }
161 break ;
162
163 case 3:
164
165 for (k = 0 ; k < n ; k++)
166 {
167 i = Q [k] ;
168 X [3*k ] = Bz [i ] ;
169 X [3*k + 1] = Bz [i + d ] ;
170 X [3*k + 2] = Bz [i + d*2] ;
171 }
172 break ;
173
174 case 4:
175
176 for (k = 0 ; k < n ; k++)
177 {
178 i = Q [k] ;
179 X [4*k ] = Bz [i ] ;
180 X [4*k + 1] = Bz [i + d ] ;
181 X [4*k + 2] = Bz [i + d*2] ;
182 X [4*k + 3] = Bz [i + d*3] ;
183 }
184 break ;
185
186 }
187
188 /* ------------------------------------------------------------------ */
189 /* solve X = (L*U + Off)'\X */
190 /* ------------------------------------------------------------------ */
191
192 for (block = 0 ; block < nblocks ; block++)
193 {
194
195 /* -------------------------------------------------------------- */
196 /* the block of size nk is from rows/columns k1 to k2-1 */
197 /* -------------------------------------------------------------- */
198
199 k1 = R [block] ;
200 k2 = R [block+1] ;
201 nk = k2 - k1 ;
202 PRINTF (("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
203
204 /* -------------------------------------------------------------- */
205 /* block back-substitution for the off-diagonal-block entries */
206 /* -------------------------------------------------------------- */
207
208 if (block > 0)
209 {
210 switch (nr)
211 {
212
213 case 1:
214
215 for (k = k1 ; k < k2 ; k++)
216 {
217 pend = Offp [k+1] ;
218 for (p = Offp [k] ; p < pend ; p++)
219 {
220#ifdef COMPLEX
221 if (conj_solve)
222 {
223 MULT_SUB_CONJ (X [k], X [Offi [p]],
224 Offx [p]) ;
225 }
226 else
227#endif
228 {
229 MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
230 }
231 }
232 }
233 break ;
234
235 case 2:
236
237 for (k = k1 ; k < k2 ; k++)
238 {
239 pend = Offp [k+1] ;
240 x [0] = X [2*k ] ;
241 x [1] = X [2*k + 1] ;
242 for (p = Offp [k] ; p < pend ; p++)
243 {
244 i = Offi [p] ;
245#ifdef COMPLEX
246 if (conj_solve)
247 {
248 CONJ (offik, Offx [p]) ;
249 }
250 else
251#endif
252 {
253 offik = Offx [p] ;
254 }
255 MULT_SUB (x [0], offik, X [2*i]) ;
256 MULT_SUB (x [1], offik, X [2*i + 1]) ;
257 }
258 X [2*k ] = x [0] ;
259 X [2*k + 1] = x [1] ;
260 }
261 break ;
262
263 case 3:
264
265 for (k = k1 ; k < k2 ; k++)
266 {
267 pend = Offp [k+1] ;
268 x [0] = X [3*k ] ;
269 x [1] = X [3*k + 1] ;
270 x [2] = X [3*k + 2] ;
271 for (p = Offp [k] ; p < pend ; p++)
272 {
273 i = Offi [p] ;
274#ifdef COMPLEX
275 if (conj_solve)
276 {
277 CONJ (offik, Offx [p]) ;
278 }
279 else
280#endif
281 {
282 offik = Offx [p] ;
283 }
284 MULT_SUB (x [0], offik, X [3*i]) ;
285 MULT_SUB (x [1], offik, X [3*i + 1]) ;
286 MULT_SUB (x [2], offik, X [3*i + 2]) ;
287 }
288 X [3*k ] = x [0] ;
289 X [3*k + 1] = x [1] ;
290 X [3*k + 2] = x [2] ;
291 }
292 break ;
293
294 case 4:
295
296 for (k = k1 ; k < k2 ; k++)
297 {
298 pend = Offp [k+1] ;
299 x [0] = X [4*k ] ;
300 x [1] = X [4*k + 1] ;
301 x [2] = X [4*k + 2] ;
302 x [3] = X [4*k + 3] ;
303 for (p = Offp [k] ; p < pend ; p++)
304 {
305 i = Offi [p] ;
306#ifdef COMPLEX
307 if (conj_solve)
308 {
309 CONJ(offik, Offx [p]) ;
310 }
311 else
312#endif
313 {
314 offik = Offx [p] ;
315 }
316 MULT_SUB (x [0], offik, X [4*i]) ;
317 MULT_SUB (x [1], offik, X [4*i + 1]) ;
318 MULT_SUB (x [2], offik, X [4*i + 2]) ;
319 MULT_SUB (x [3], offik, X [4*i + 3]) ;
320 }
321 X [4*k ] = x [0] ;
322 X [4*k + 1] = x [1] ;
323 X [4*k + 2] = x [2] ;
324 X [4*k + 3] = x [3] ;
325 }
326 break ;
327 }
328 }
329
330 /* -------------------------------------------------------------- */
331 /* solve the block system */
332 /* -------------------------------------------------------------- */
333
334 if (nk == 1)
335 {
336#ifdef COMPLEX
337 if (conj_solve)
338 {
339 CONJ (s, Udiag [k1]) ;
340 }
341 else
342#endif
343 {
344 s = Udiag [k1] ;
345 }
346 switch (nr)
347 {
348
349 case 1:
350 DIV (X [k1], X [k1], s) ;
351 break ;
352
353 case 2:
354 DIV (X [2*k1], X [2*k1], s) ;
355 DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
356 break ;
357
358 case 3:
359 DIV (X [3*k1], X [3*k1], s) ;
360 DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
361 DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
362 break ;
363
364 case 4:
365 DIV (X [4*k1], X [4*k1], s) ;
366 DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
367 DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
368 DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
369 break ;
370
371 }
372 }
373 else
374 {
375 KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block],
376 Udiag + k1, nr,
377#ifdef COMPLEX
378 conj_solve,
379#endif
380 X + nr*k1) ;
381 KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
382#ifdef COMPLEX
383 conj_solve,
384#endif
385 X + nr*k1) ;
386 }
387 }
388
389 /* ------------------------------------------------------------------ */
390 /* scale and permute the result, Bz = P'(R\X) */
391 /* ------------------------------------------------------------------ */
392
393 if (Rs == NULL)
394 {
395
396 /* no scaling */
397 switch (nr)
398 {
399
400 case 1:
401
402 for (k = 0 ; k < n ; k++)
403 {
404 Bz [Pnum [k]] = X [k] ;
405 }
406 break ;
407
408 case 2:
409
410 for (k = 0 ; k < n ; k++)
411 {
412 i = Pnum [k] ;
413 Bz [i ] = X [2*k ] ;
414 Bz [i + d ] = X [2*k + 1] ;
415 }
416 break ;
417
418 case 3:
419
420 for (k = 0 ; k < n ; k++)
421 {
422 i = Pnum [k] ;
423 Bz [i ] = X [3*k ] ;
424 Bz [i + d ] = X [3*k + 1] ;
425 Bz [i + d*2] = X [3*k + 2] ;
426 }
427 break ;
428
429 case 4:
430
431 for (k = 0 ; k < n ; k++)
432 {
433 i = Pnum [k] ;
434 Bz [i ] = X [4*k ] ;
435 Bz [i + d ] = X [4*k + 1] ;
436 Bz [i + d*2] = X [4*k + 2] ;
437 Bz [i + d*3] = X [4*k + 3] ;
438 }
439 break ;
440 }
441
442 }
443 else
444 {
445
446 switch (nr)
447 {
448
449 case 1:
450
451 for (k = 0 ; k < n ; k++)
452 {
453 SCALE_DIV_ASSIGN (Bz [Pnum [k]], X [k], Rs [k]) ;
454 }
455 break ;
456
457 case 2:
458
459 for (k = 0 ; k < n ; k++)
460 {
461 i = Pnum [k] ;
462 rs = Rs [k] ;
463 SCALE_DIV_ASSIGN (Bz [i], X [2*k], rs) ;
464 SCALE_DIV_ASSIGN (Bz [i + d], X [2*k + 1], rs) ;
465 }
466 break ;
467
468 case 3:
469
470 for (k = 0 ; k < n ; k++)
471 {
472 i = Pnum [k] ;
473 rs = Rs [k] ;
474 SCALE_DIV_ASSIGN (Bz [i], X [3*k], rs) ;
475 SCALE_DIV_ASSIGN (Bz [i + d], X [3*k + 1], rs) ;
476 SCALE_DIV_ASSIGN (Bz [i + d*2], X [3*k + 2], rs) ;
477 }
478 break ;
479
480 case 4:
481
482 for (k = 0 ; k < n ; k++)
483 {
484 i = Pnum [k] ;
485 rs = Rs [k] ;
486 SCALE_DIV_ASSIGN (Bz [i], X [4*k], rs) ;
487 SCALE_DIV_ASSIGN (Bz [i + d], X [4*k + 1], rs) ;
488 SCALE_DIV_ASSIGN (Bz [i + d*2], X [4*k + 2], rs) ;
489 SCALE_DIV_ASSIGN (Bz [i + d*3], X [4*k + 3], rs) ;
490 }
491 break ;
492 }
493 }
494
495 /* ------------------------------------------------------------------ */
496 /* go to the next chunk of B */
497 /* ------------------------------------------------------------------ */
498
499 Bz += d*4 ;
500 }
501 return (TRUE) ;
502}
503
504
505// Require Xout and B to have equal number of entries, pre-allocated
506template <typename Entry, typename Int>
507Int KLU_tsolve2
508(
509 /* inputs, not modified */
510 KLU_symbolic<Entry, Int> *Symbolic,
511 KLU_numeric<Entry, Int> *Numeric,
512 Int d, /* leading dimension of B */
513 Int nrhs, /* number of right-hand-sides */
514
515 /* right-hand-side on input, overwritten with solution to Ax=b on output */
516 Entry B [ ], /* size n*1, in column-oriented form, with
517 * leading dimension d. */
518 Entry Xout [ ], /* size n*1, in column-oriented form, with
519 * leading dimension d. */
520#ifdef COMPLEX
521 Int conj_solve, /* TRUE for conjugate transpose solve, FALSE for
522 * array transpose solve. Used for the complex
523 * case only. */
524#endif
525 /* --------------- */
526 KLU_common<Entry, Int> *Common
527)
528{
529 Entry x [4], offik, s ;
530 double rs, *Rs ;
531 Entry *Offx, *X, *Bz, *Udiag ;
532 Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
533 Unit **LUbx ;
534 Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
535
536 /* ---------------------------------------------------------------------- */
537 /* check inputs */
538 /* ---------------------------------------------------------------------- */
539
540 if (Common == NULL)
541 {
542 return (FALSE) ;
543 }
544 if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
545 B == NULL || Xout == NULL)
546 {
547 Common->status = KLU_INVALID ;
548 return (FALSE) ;
549 }
550 Common->status = KLU_OK ;
551
552 /* ---------------------------------------------------------------------- */
553 /* get the contents of the Symbolic object */
554 /* ---------------------------------------------------------------------- */
555
556 Bz = (Entry *) B ;
557 n = Symbolic->n ;
558 nblocks = Symbolic->nblocks ;
559 Q = Symbolic->Q ;
560 R = Symbolic->R ;
561
562 /* ---------------------------------------------------------------------- */
563 /* get the contents of the Numeric object */
564 /* ---------------------------------------------------------------------- */
565
566 ASSERT (nblocks == Numeric->nblocks) ;
567 Pnum = Numeric->Pnum ;
568 Offp = Numeric->Offp ;
569 Offi = Numeric->Offi ;
570 Offx = (Entry *) Numeric->Offx ;
571
572 Lip = Numeric->Lip ;
573 Llen = Numeric->Llen ;
574 Uip = Numeric->Uip ;
575 Ulen = Numeric->Ulen ;
576 LUbx = (Unit **) Numeric->LUbx ;
577 Udiag = (Entry *) Numeric->Udiag ;
578
579 Rs = Numeric->Rs ;
580 X = (Entry *) Numeric->Xwork ;
581 ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
582
583 /* ---------------------------------------------------------------------- */
584 /* solve in chunks of 4 columns at a time */
585 /* ---------------------------------------------------------------------- */
586
587 for (chunk = 0 ; chunk < nrhs ; chunk += 4)
588 {
589
590 /* ------------------------------------------------------------------ */
591 /* get the size of the current chunk */
592 /* ------------------------------------------------------------------ */
593
594 nr = MIN (nrhs - chunk, 4) ;
595
596 /* ------------------------------------------------------------------ */
597 /* permute the right hand side, X = Q'*B */
598 /* ------------------------------------------------------------------ */
599
600 switch (nr)
601 {
602
603 case 1:
604
605 for (k = 0 ; k < n ; k++)
606 {
607 X [k] = Bz [Q [k]] ;
608 }
609 break ;
610
611 case 2:
612
613 for (k = 0 ; k < n ; k++)
614 {
615 i = Q [k] ;
616 X [2*k ] = Bz [i ] ;
617 X [2*k + 1] = Bz [i + d ] ;
618 }
619 break ;
620
621 case 3:
622
623 for (k = 0 ; k < n ; k++)
624 {
625 i = Q [k] ;
626 X [3*k ] = Bz [i ] ;
627 X [3*k + 1] = Bz [i + d ] ;
628 X [3*k + 2] = Bz [i + d*2] ;
629 }
630 break ;
631
632 case 4:
633
634 for (k = 0 ; k < n ; k++)
635 {
636 i = Q [k] ;
637 X [4*k ] = Bz [i ] ;
638 X [4*k + 1] = Bz [i + d ] ;
639 X [4*k + 2] = Bz [i + d*2] ;
640 X [4*k + 3] = Bz [i + d*3] ;
641 }
642 break ;
643
644 }
645
646 /* ------------------------------------------------------------------ */
647 /* solve X = (L*U + Off)'\X */
648 /* ------------------------------------------------------------------ */
649
650 for (block = 0 ; block < nblocks ; block++)
651 {
652
653 /* -------------------------------------------------------------- */
654 /* the block of size nk is from rows/columns k1 to k2-1 */
655 /* -------------------------------------------------------------- */
656
657 k1 = R [block] ;
658 k2 = R [block+1] ;
659 nk = k2 - k1 ;
660 PRINTF (("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
661
662 /* -------------------------------------------------------------- */
663 /* block back-substitution for the off-diagonal-block entries */
664 /* -------------------------------------------------------------- */
665
666 if (block > 0)
667 {
668 switch (nr)
669 {
670
671 case 1:
672
673 for (k = k1 ; k < k2 ; k++)
674 {
675 pend = Offp [k+1] ;
676 for (p = Offp [k] ; p < pend ; p++)
677 {
678#ifdef COMPLEX
679 if (conj_solve)
680 {
681 MULT_SUB_CONJ (X [k], X [Offi [p]],
682 Offx [p]) ;
683 }
684 else
685#endif
686 {
687 MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
688 }
689 }
690 }
691 break ;
692
693 case 2:
694
695 for (k = k1 ; k < k2 ; k++)
696 {
697 pend = Offp [k+1] ;
698 x [0] = X [2*k ] ;
699 x [1] = X [2*k + 1] ;
700 for (p = Offp [k] ; p < pend ; p++)
701 {
702 i = Offi [p] ;
703#ifdef COMPLEX
704 if (conj_solve)
705 {
706 CONJ (offik, Offx [p]) ;
707 }
708 else
709#endif
710 {
711 offik = Offx [p] ;
712 }
713 MULT_SUB (x [0], offik, X [2*i]) ;
714 MULT_SUB (x [1], offik, X [2*i + 1]) ;
715 }
716 X [2*k ] = x [0] ;
717 X [2*k + 1] = x [1] ;
718 }
719 break ;
720
721 case 3:
722
723 for (k = k1 ; k < k2 ; k++)
724 {
725 pend = Offp [k+1] ;
726 x [0] = X [3*k ] ;
727 x [1] = X [3*k + 1] ;
728 x [2] = X [3*k + 2] ;
729 for (p = Offp [k] ; p < pend ; p++)
730 {
731 i = Offi [p] ;
732#ifdef COMPLEX
733 if (conj_solve)
734 {
735 CONJ (offik, Offx [p]) ;
736 }
737 else
738#endif
739 {
740 offik = Offx [p] ;
741 }
742 MULT_SUB (x [0], offik, X [3*i]) ;
743 MULT_SUB (x [1], offik, X [3*i + 1]) ;
744 MULT_SUB (x [2], offik, X [3*i + 2]) ;
745 }
746 X [3*k ] = x [0] ;
747 X [3*k + 1] = x [1] ;
748 X [3*k + 2] = x [2] ;
749 }
750 break ;
751
752 case 4:
753
754 for (k = k1 ; k < k2 ; k++)
755 {
756 pend = Offp [k+1] ;
757 x [0] = X [4*k ] ;
758 x [1] = X [4*k + 1] ;
759 x [2] = X [4*k + 2] ;
760 x [3] = X [4*k + 3] ;
761 for (p = Offp [k] ; p < pend ; p++)
762 {
763 i = Offi [p] ;
764#ifdef COMPLEX
765 if (conj_solve)
766 {
767 CONJ(offik, Offx [p]) ;
768 }
769 else
770#endif
771 {
772 offik = Offx [p] ;
773 }
774 MULT_SUB (x [0], offik, X [4*i]) ;
775 MULT_SUB (x [1], offik, X [4*i + 1]) ;
776 MULT_SUB (x [2], offik, X [4*i + 2]) ;
777 MULT_SUB (x [3], offik, X [4*i + 3]) ;
778 }
779 X [4*k ] = x [0] ;
780 X [4*k + 1] = x [1] ;
781 X [4*k + 2] = x [2] ;
782 X [4*k + 3] = x [3] ;
783 }
784 break ;
785 }
786 }
787
788 /* -------------------------------------------------------------- */
789 /* solve the block system */
790 /* -------------------------------------------------------------- */
791
792 if (nk == 1)
793 {
794#ifdef COMPLEX
795 if (conj_solve)
796 {
797 CONJ (s, Udiag [k1]) ;
798 }
799 else
800#endif
801 {
802 s = Udiag [k1] ;
803 }
804 switch (nr)
805 {
806
807 case 1:
808 DIV (X [k1], X [k1], s) ;
809 break ;
810
811 case 2:
812 DIV (X [2*k1], X [2*k1], s) ;
813 DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
814 break ;
815
816 case 3:
817 DIV (X [3*k1], X [3*k1], s) ;
818 DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
819 DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
820 break ;
821
822 case 4:
823 DIV (X [4*k1], X [4*k1], s) ;
824 DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
825 DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
826 DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
827 break ;
828
829 }
830 }
831 else
832 {
833 KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block],
834 Udiag + k1, nr,
835#ifdef COMPLEX
836 conj_solve,
837#endif
838 X + nr*k1) ;
839 KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
840#ifdef COMPLEX
841 conj_solve,
842#endif
843 X + nr*k1) ;
844 }
845 }
846
847 /* ------------------------------------------------------------------ */
848 /* scale and permute the result, Bz = P'(R\X) */
849 /* store result in Xout */
850 /* ------------------------------------------------------------------ */
851
852 if (Rs == NULL)
853 {
854
855 /* no scaling */
856 switch (nr)
857 {
858
859 case 1:
860
861 for (k = 0 ; k < n ; k++)
862 {
863 Xout [Pnum [k]] = X [k] ;
864 }
865 break ;
866
867 case 2:
868
869 for (k = 0 ; k < n ; k++)
870 {
871 i = Pnum [k] ;
872 Xout [i ] = X [2*k ] ;
873 Xout [i + d ] = X [2*k + 1] ;
874 }
875 break ;
876
877 case 3:
878
879 for (k = 0 ; k < n ; k++)
880 {
881 i = Pnum [k] ;
882 Xout [i ] = X [3*k ] ;
883 Xout [i + d ] = X [3*k + 1] ;
884 Xout [i + d*2] = X [3*k + 2] ;
885 }
886 break ;
887
888 case 4:
889
890 for (k = 0 ; k < n ; k++)
891 {
892 i = Pnum [k] ;
893 Xout [i ] = X [4*k ] ;
894 Xout [i + d ] = X [4*k + 1] ;
895 Xout [i + d*2] = X [4*k + 2] ;
896 Xout [i + d*3] = X [4*k + 3] ;
897 }
898 break ;
899 }
900
901 }
902 else
903 {
904
905 switch (nr)
906 {
907
908 case 1:
909
910 for (k = 0 ; k < n ; k++)
911 {
912 SCALE_DIV_ASSIGN (Xout [Pnum [k]], X [k], Rs [k]) ;
913 }
914 break ;
915
916 case 2:
917
918 for (k = 0 ; k < n ; k++)
919 {
920 i = Pnum [k] ;
921 rs = Rs [k] ;
922 SCALE_DIV_ASSIGN (Xout [i], X [2*k], rs) ;
923 SCALE_DIV_ASSIGN (Xout [i + d], X [2*k + 1], rs) ;
924 }
925 break ;
926
927 case 3:
928
929 for (k = 0 ; k < n ; k++)
930 {
931 i = Pnum [k] ;
932 rs = Rs [k] ;
933 SCALE_DIV_ASSIGN (Xout [i], X [3*k], rs) ;
934 SCALE_DIV_ASSIGN (Xout [i + d], X [3*k + 1], rs) ;
935 SCALE_DIV_ASSIGN (Xout [i + d*2], X [3*k + 2], rs) ;
936 }
937 break ;
938
939 case 4:
940
941 for (k = 0 ; k < n ; k++)
942 {
943 i = Pnum [k] ;
944 rs = Rs [k] ;
945 SCALE_DIV_ASSIGN (Xout [i], X [4*k], rs) ;
946 SCALE_DIV_ASSIGN (Xout [i + d], X [4*k + 1], rs) ;
947 SCALE_DIV_ASSIGN (Xout [i + d*2], X [4*k + 2], rs) ;
948 SCALE_DIV_ASSIGN (Xout [i + d*3], X [4*k + 3], rs) ;
949 }
950 break ;
951 }
952 }
953
954 /* ------------------------------------------------------------------ */
955 /* go to the next chunk of B */
956 /* ------------------------------------------------------------------ */
957
958 Xout += d*4 ;
959 }
960 return (TRUE) ;
961}
962
963#endif