root/src/spooles.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ssolve_creategraph
  2. ssolve_permuteA
  3. ssolve_postfactor
  4. ssolve_permuteB
  5. ssolve_permuteout
  6. factor
  7. fsolve
  8. factor_MT
  9. fsolve_MT
  10. spooles_factor
  11. spooles_solve
  12. spooles_cleanup
  13. spooles_factor_rad
  14. spooles_solve_rad
  15. spooles_cleanup_rad
  16. spooles

   1 
   2 /*     CalculiX - A 3-dimensional finite element program                   */
   3 /*              Copyright (C) 1998-2015 Guido Dhondt                          */
   4 
   5 /*     This program is free software; you can redistribute it and/or     */
   6 /*     modify it under the terms of the GNU General Public License as    */
   7 /*     published by the Free Software Foundation(version 2);    */
   8 /*                    */
   9 
  10 /*     This program is distributed in the hope that it will be useful,   */
  11 /*     but WITHOUT ANY WARRANTY; without even the implied warranty of    */
  12 /*     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the      */
  13 /*     GNU General Public License for more details.                      */
  14 
  15 /*     You should have received a copy of the GNU General Public License */
  16 /*     along with this program; if not, write to the Free Software       */
  17 /*     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.         */
  18 /*
  19  * The implementation is derived from the SPOOLES sample described in
  20  * AllInOne.ps
  21  *  created -- 98jun04, cca
  22  *
  23  * Converted to something that resembles C and
  24  * support for multithreaded solving added.
  25  * (C) 2003 Manfred Spraul
  26  */
  27 
  28 /* spooles_factor and spooles_solve occur twice in this routine: once
  29    with their plane names and once with _rad appended to the name. This is
  30    necessary since the factorized stiffness matrices (plain names) and the
  31    factorized radiation matrices (_rad appended) are kept at the same time
  32    in the program */
  33 
  34 #ifdef SPOOLES
  35 
  36 #include <stdio.h>
  37 #include <math.h>
  38 #include <stdlib.h>
  39 #include <unistd.h>
  40 #include "CalculiX.h"
  41 #include "spooles.h"
  42 
  43 #if USE_MT
  44 int num_cpus = -1;
  45 #endif
  46 
  47 #define TUNE_MAXZEROS           1000
  48 #define TUNE_MAXDOMAINSIZE      800
  49 #define TUNE_MAXSIZE            64
  50 
  51 #define RNDSEED         7892713
  52 #define MAGIC_DTOL              0.0
  53 #define MAGIC_TAU               100.0
  54 
  55 /*
  56  * Substeps for solving A X = B:
  57  *
  58  *  (1) form Graph object
  59  *  (2) order matrix and form front tree
  60  *  (3) get the permutation, permute the matrix and 
  61  *      front tree and get the symbolic factorization
  62  *  (4) compute the numeric factorization
  63  *  (5) read in right hand side entries
  64  *  (6) compute the solution
  65  *
  66  * The ssolve_main functions free the input matrices internally
  67  */
  68 
  69 static void ssolve_creategraph(Graph ** graph, ETree ** frontETree,
  70                          InpMtx * mtxA, int size, FILE * msgFile)
  71 {
  72         IVL *adjIVL;
  73         int nedges;
  74 
  75         *graph = Graph_new();
  76         adjIVL = InpMtx_fullAdjacency(mtxA);
  77         nedges = IVL_tsize(adjIVL);
  78         Graph_init2(*graph, 0, size, 0, nedges, size, nedges, adjIVL,
  79                     NULL, NULL);
  80         if (DEBUG_LVL > 1) {
  81                 fprintf(msgFile, "\n\n graph of the input matrix");
  82                 Graph_writeForHumanEye(*graph, msgFile);
  83                 fflush(msgFile);
  84         }
  85         /* (2) order the graph using multiple minimum degree */
  86 
  87         /*maxdomainsize=neqns/100; */
  88         /*if (maxdomainsize==0) maxdomainsize=1; */
  89         /* *frontETree = orderViaMMD(*graph, RNDSEED, DEBUG_LVL, msgFile) ; */
  90         /* *frontETree = orderViaND(*graph,maxdomainsize,RNDSEED,DEBUG_LVL,msgFile); */
  91         /* *frontETree = orderViaMS(*graph,maxdomainsize,RNDSEED,DEBUG_LVL,msgFile); */
  92 
  93         *frontETree =
  94             orderViaBestOfNDandMS(*graph, TUNE_MAXDOMAINSIZE,
  95                                   TUNE_MAXZEROS, TUNE_MAXSIZE, RNDSEED,
  96                                   DEBUG_LVL, msgFile);
  97         if (DEBUG_LVL > 1) {
  98                 fprintf(msgFile, "\n\n front tree from ordering");
  99                 ETree_writeForHumanEye(*frontETree, msgFile);
 100                 fflush(msgFile);
 101         }
 102 }
 103 
 104 static void ssolve_permuteA(IV ** oldToNewIV, IV ** newToOldIV,
 105                          IVL ** symbfacIVL, ETree * frontETree,
 106                          InpMtx * mtxA, FILE * msgFile, int *symmetryflagi4)
 107 {
 108         int *oldToNew;
 109 
 110         *oldToNewIV = ETree_oldToNewVtxPerm(frontETree);
 111         oldToNew = IV_entries(*oldToNewIV);
 112         *newToOldIV = ETree_newToOldVtxPerm(frontETree);
 113         ETree_permuteVertices(frontETree, *oldToNewIV);
 114         InpMtx_permute(mtxA, oldToNew, oldToNew);
 115         if(*symmetryflagi4!=2) InpMtx_mapToUpperTriangle(mtxA);
 116         InpMtx_changeCoordType(mtxA, INPMTX_BY_CHEVRONS);
 117         InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS);
 118         *symbfacIVL = SymbFac_initFromInpMtx(frontETree, mtxA);
 119         if (DEBUG_LVL > 1) {
 120                 fprintf(msgFile, "\n\n old-to-new permutation vector");
 121                 IV_writeForHumanEye(*oldToNewIV, msgFile);
 122                 fprintf(msgFile, "\n\n new-to-old permutation vector");
 123                 IV_writeForHumanEye(*newToOldIV, msgFile);
 124                 fprintf(msgFile, "\n\n front tree after permutation");
 125                 ETree_writeForHumanEye(frontETree, msgFile);
 126                 fprintf(msgFile, "\n\n input matrix after permutation");
 127                 InpMtx_writeForHumanEye(mtxA, msgFile);
 128                 fprintf(msgFile, "\n\n symbolic factorization");
 129                 IVL_writeForHumanEye(*symbfacIVL, msgFile);
 130                 fflush(msgFile);
 131         }
 132 }
 133 
 134 static void ssolve_postfactor(FrontMtx *frontmtx, FILE *msgFile)
 135 {
 136         FrontMtx_postProcess(frontmtx, DEBUG_LVL, msgFile);
 137         if (DEBUG_LVL > 1) {
 138                 fprintf(msgFile, "\n\n factor matrix after post-processing");
 139                 FrontMtx_writeForHumanEye(frontmtx, msgFile);
 140                 fflush(msgFile);
 141         }
 142 }
 143 
 144 static void ssolve_permuteB(DenseMtx *mtxB, IV *oldToNewIV, FILE* msgFile)
 145 {
 146         DenseMtx_permuteRows(mtxB, oldToNewIV);
 147         if (DEBUG_LVL > 1) {
 148                 fprintf(msgFile,
 149                         "\n\n right hand side matrix in new ordering");
 150                 DenseMtx_writeForHumanEye(mtxB, msgFile);
 151                 fflush(msgFile);
 152         }
 153 }
 154 
 155 static void ssolve_permuteout(DenseMtx *mtxX, IV *newToOldIV, FILE *msgFile)
 156 {
 157         DenseMtx_permuteRows(mtxX, newToOldIV);
 158         if (DEBUG_LVL > 1) {
 159                 fprintf(msgFile, "\n\n solution matrix in original ordering");
 160                 DenseMtx_writeForHumanEye(mtxX, msgFile);
 161                 fflush(msgFile);
 162         }
 163 }
 164 
 165  void factor(struct factorinfo *pfi, InpMtx *mtxA, int size, FILE *msgFile,
 166              int *symmetryflagi4)
 167 {
 168         Graph *graph;
 169         IVL *symbfacIVL;
 170         Chv *rootchv;
 171 
 172         /* Initialize pfi: */
 173         pfi->size = size;
 174         pfi->msgFile = msgFile;
 175         pfi->solvemap = NULL;
 176         DVfill(10, pfi->cpus, 0.0);
 177 
 178         /*
 179          * STEP 1 : find a low-fill ordering
 180          * (1) create the Graph object
 181          */
 182         ssolve_creategraph(&graph, &pfi->frontETree, mtxA, size, pfi->msgFile);
 183 
 184         /*
 185          * STEP 2: get the permutation, permute the matrix and 
 186          *      front tree and get the symbolic factorization
 187          */
 188         ssolve_permuteA(&pfi->oldToNewIV, &pfi->newToOldIV, &symbfacIVL, pfi->frontETree,
 189                      mtxA, pfi->msgFile, symmetryflagi4);
 190 
 191         /*
 192          * STEP 3: initialize the front matrix object
 193          */
 194         {
 195                 pfi->frontmtx = FrontMtx_new();
 196                 pfi->mtxmanager = SubMtxManager_new();
 197                 SubMtxManager_init(pfi->mtxmanager, NO_LOCK, 0);
 198                 FrontMtx_init(pfi->frontmtx, pfi->frontETree, symbfacIVL, SPOOLES_REAL,
 199                               *symmetryflagi4, FRONTMTX_DENSE_FRONTS,
 200                               SPOOLES_PIVOTING, NO_LOCK, 0, NULL,
 201                               pfi->mtxmanager, DEBUG_LVL, pfi->msgFile);
 202         }
 203 
 204         /* 
 205          * STEP 4: compute the numeric factorization
 206          */
 207         {
 208                 ChvManager *chvmanager;
 209                 int stats[20];
 210                 int error;
 211 
 212                 chvmanager = ChvManager_new();
 213                 ChvManager_init(chvmanager, NO_LOCK, 1);
 214                 IVfill(20, stats, 0);
 215                 rootchv = FrontMtx_factorInpMtx(pfi->frontmtx, mtxA, MAGIC_TAU, MAGIC_DTOL,
 216                                                 chvmanager, &error, pfi->cpus,
 217                                                 stats, DEBUG_LVL, pfi->msgFile);
 218                 ChvManager_free(chvmanager);
 219                 if (DEBUG_LVL > 1) {
 220                         fprintf(msgFile, "\n\n factor matrix");
 221                         FrontMtx_writeForHumanEye(pfi->frontmtx, pfi->msgFile);
 222                         fflush(msgFile);
 223                 }
 224                 if (rootchv != NULL) {
 225                         fprintf(pfi->msgFile, "\n\n matrix found to be singular\n");
 226                         exit(-1);
 227                 }
 228                 if (error >= 0) {
 229                         fprintf(pfi->msgFile, "\n\nerror encountered at front %d",
 230                                 error);
 231                         exit(-1);
 232                 }
 233         }
 234         /*
 235          * STEP 5: post-process the factorization
 236          */
 237         ssolve_postfactor(pfi->frontmtx, pfi->msgFile);
 238 
 239         /* cleanup: */
 240         IVL_free(symbfacIVL);
 241         InpMtx_free(mtxA);
 242         Graph_free(graph);
 243 }
 244 
 245 DenseMtx *fsolve(struct factorinfo *pfi, DenseMtx *mtxB)
 246 {
 247         DenseMtx *mtxX;
 248         /*
 249          * STEP 6: permute the right hand side into the new ordering
 250          */
 251         {
 252                 DenseMtx_permuteRows(mtxB, pfi->oldToNewIV);
 253                 if (DEBUG_LVL > 1) {
 254                         fprintf(pfi->msgFile,
 255                                 "\n\n right hand side matrix in new ordering");
 256                         DenseMtx_writeForHumanEye(mtxB, pfi->msgFile);
 257                         fflush(pfi->msgFile);
 258                 }
 259         }
 260         /*
 261          * STEP 7: solve the linear system
 262          */
 263         {
 264                 mtxX = DenseMtx_new();
 265                 DenseMtx_init(mtxX, SPOOLES_REAL, 0, 0, pfi->size, 1, 1, pfi->size);
 266                 DenseMtx_zero(mtxX);
 267                 FrontMtx_solve(pfi->frontmtx, mtxX, mtxB, pfi->mtxmanager, pfi->cpus,
 268                                DEBUG_LVL, pfi->msgFile);
 269                 if (DEBUG_LVL > 1) {
 270                         fprintf(pfi->msgFile, "\n\n solution matrix in new ordering");
 271                         DenseMtx_writeForHumanEye(mtxX, pfi->msgFile);
 272                         fflush(pfi->msgFile);
 273                 }
 274         }
 275         /*
 276          * STEP 8:  permute the solution into the original ordering
 277          */
 278         ssolve_permuteout(mtxX, pfi->newToOldIV, pfi->msgFile);
 279 
 280         /* cleanup: */
 281         DenseMtx_free(mtxB);
 282 
 283         return mtxX;
 284 }
 285 
 286 #ifdef USE_MT 
 287 void factor_MT(struct factorinfo *pfi, InpMtx *mtxA, int size, FILE *msgFile, int *symmetryflagi4)
 288 {
 289         Graph *graph;
 290         IV *ownersIV;
 291         IVL *symbfacIVL;
 292         Chv *rootchv;
 293 
 294         /* Initialize pfi: */
 295         pfi->size = size;
 296         pfi->msgFile = msgFile;
 297         DVfill(10, pfi->cpus, 0.0);
 298 
 299         /*
 300          * STEP 1 : find a low-fill ordering
 301          * (1) create the Graph object
 302          */
 303         ssolve_creategraph(&graph, &pfi->frontETree, mtxA, size, msgFile);
 304 
 305         /*
 306          * STEP 2: get the permutation, permute the matrix and 
 307          *      front tree and get the symbolic factorization
 308          */
 309         ssolve_permuteA(&pfi->oldToNewIV, &pfi->newToOldIV, &symbfacIVL, pfi->frontETree,
 310                      mtxA, msgFile, symmetryflagi4);
 311 
 312         /*
 313          * STEP 3: Prepare distribution to multiple threads/cpus
 314          */
 315         {
 316                 DV *cumopsDV;
 317                 int nfront;
 318 
 319                 nfront = ETree_nfront(pfi->frontETree);
 320 
 321                 pfi->nthread = num_cpus;
 322                 if (pfi->nthread > nfront)
 323                         pfi->nthread = nfront;
 324 
 325                 cumopsDV = DV_new();
 326                 DV_init(cumopsDV, pfi->nthread, NULL);
 327                 ownersIV = ETree_ddMap(pfi->frontETree, SPOOLES_REAL, *symmetryflagi4,
 328                                        cumopsDV, 1. / (2. * pfi->nthread));
 329                 if (DEBUG_LVL > 1) {
 330                         fprintf(msgFile,
 331                                 "\n\n map from fronts to threads");
 332                         IV_writeForHumanEye(ownersIV, msgFile);
 333                         fprintf(msgFile,
 334                                 "\n\n factor operations for each front");
 335                         DV_writeForHumanEye(cumopsDV, msgFile);
 336                         fflush(msgFile);
 337                 } else {
 338                         fprintf(msgFile, "\n\n Using %d threads\n",
 339                                 pfi->nthread);
 340                 }
 341                 DV_free(cumopsDV);
 342         }
 343 
 344         /*
 345          * STEP 4: initialize the front matrix object
 346          */
 347         {
 348                 pfi->frontmtx = FrontMtx_new();
 349                 pfi->mtxmanager = SubMtxManager_new();
 350                 SubMtxManager_init(pfi->mtxmanager, LOCK_IN_PROCESS, 0);
 351                 FrontMtx_init(pfi->frontmtx, pfi->frontETree, symbfacIVL, SPOOLES_REAL,
 352                               *symmetryflagi4, FRONTMTX_DENSE_FRONTS,
 353                               SPOOLES_PIVOTING, LOCK_IN_PROCESS, 0, NULL,
 354                               pfi->mtxmanager, DEBUG_LVL, pfi->msgFile);
 355         }
 356 
 357         /*
 358          * STEP 5: compute the numeric factorization in parallel
 359          */
 360         {
 361                 ChvManager *chvmanager;
 362                 int stats[20];
 363                 int error;
 364 
 365                 chvmanager = ChvManager_new();
 366                 ChvManager_init(chvmanager, LOCK_IN_PROCESS, 1);
 367                 IVfill(20, stats, 0);
 368                 rootchv = FrontMtx_MT_factorInpMtx(pfi->frontmtx, mtxA, MAGIC_TAU, MAGIC_DTOL,
 369                                                    chvmanager, ownersIV, 0,
 370                                                    &error, pfi->cpus, stats, DEBUG_LVL,
 371                                                    pfi->msgFile);
 372                 ChvManager_free(chvmanager);
 373                 if (DEBUG_LVL > 1) {
 374                         fprintf(msgFile, "\n\n factor matrix");
 375                         FrontMtx_writeForHumanEye(pfi->frontmtx, pfi->msgFile);
 376                         fflush(pfi->msgFile);
 377                 }
 378                 if (rootchv != NULL) {
 379                         fprintf(pfi->msgFile, "\n\n matrix found to be singular\n");
 380                         exit(-1);
 381                 }
 382                 if (error >= 0) {
 383                         fprintf(pfi->msgFile, "\n\n fatal error at front %d", error);
 384                         exit(-1);
 385                 }
 386         }
 387 
 388         /*
 389          * STEP 6: post-process the factorization
 390          */
 391         ssolve_postfactor(pfi->frontmtx, pfi->msgFile);
 392 
 393         /*
 394          * STEP 7: get the solve map object for the parallel solve
 395          */
 396         {
 397                 pfi->solvemap = SolveMap_new();
 398                 SolveMap_ddMap(pfi->solvemap, *symmetryflagi4,
 399                                FrontMtx_upperBlockIVL(pfi->frontmtx),
 400                                FrontMtx_lowerBlockIVL(pfi->frontmtx), pfi->nthread, ownersIV,
 401                                FrontMtx_frontTree(pfi->frontmtx), RNDSEED, DEBUG_LVL,
 402                                pfi->msgFile);
 403         }
 404 
 405         /* cleanup: */
 406         InpMtx_free(mtxA);
 407         IVL_free(symbfacIVL);
 408         Graph_free(graph);
 409         IV_free(ownersIV);
 410 }
 411 
 412 DenseMtx *fsolve_MT(struct factorinfo *pfi, DenseMtx *mtxB)
 413 {
 414         DenseMtx *mtxX;
 415         /*
 416          * STEP 8: permute the right hand side into the new ordering
 417          */
 418         ssolve_permuteB(mtxB, pfi->oldToNewIV, pfi->msgFile);
 419 
 420 
 421         /*
 422          * STEP 9: solve the linear system in parallel
 423          */
 424         {
 425                 mtxX = DenseMtx_new();
 426                 DenseMtx_init(mtxX, SPOOLES_REAL, 0, 0, pfi->size, 1, 1, pfi->size);
 427                 DenseMtx_zero(mtxX);
 428                 FrontMtx_MT_solve(pfi->frontmtx, mtxX, mtxB, pfi->mtxmanager,
 429                                         pfi->solvemap, pfi->cpus, DEBUG_LVL,
 430                                         pfi->msgFile);
 431                 if (DEBUG_LVL > 1) {
 432                         fprintf(pfi->msgFile, "\n\n solution matrix in new ordering");
 433                         DenseMtx_writeForHumanEye(mtxX, pfi->msgFile);
 434                         fflush(pfi->msgFile);
 435                 }
 436         }
 437 
 438         /*
 439          * STEP 10: permute the solution into the original ordering
 440          */
 441         ssolve_permuteout(mtxX, pfi->newToOldIV, pfi->msgFile);
 442 
 443         /* Cleanup */
 444         DenseMtx_free(mtxB);
 445 
 446         return mtxX;
 447 }
 448 
 449 #endif
 450 
 451 /** 
 452  * factor a system of the form (au - sigma * aub)
 453  * 
 454 */
 455 
 456 FILE *msgFile;
 457 struct factorinfo pfi;
 458 
 459 void spooles_factor(double *ad, double *au,  double *adb, double *aub, 
 460              double *sigma,ITG *icol, ITG *irow, ITG *neq, ITG *nzs, 
 461              ITG *symmetryflag, ITG *inputformat, ITG *nzs3)
 462 {
 463         int size = *neq;
 464         int symmetryflagi4=*symmetryflag;
 465         InpMtx *mtxA;
 466 
 467         if(symmetryflagi4==0){
 468             printf(" Factoring the system of equations using the symmetric spooles solver\n");
 469         }else if(symmetryflagi4==2){
 470             printf(" Factoring the system of equations using the unsymmetric spooles solver\n");
 471         }
 472 
 473 /*      if(*neq==0) return;*/
 474  
 475         if ((msgFile = fopen("spooles.out", "a")) == NULL) {
 476                 fprintf(stderr, "\n fatal error in spooles.c"
 477                         "\n unable to open file spooles.out\n");
 478         }
 479 
 480         /*
 481          * Create the InpMtx object from the CalculiX matrix
 482          *      representation
 483          */
 484 
 485         {
 486             int col, ipoint, ipo;
 487             int nent,i,j;
 488             
 489             mtxA = InpMtx_new();
 490             
 491             if((*inputformat==0)||(*inputformat==3)){
 492                 nent = *nzs + *neq;     /* estimated # of nonzero entries */
 493             }else if(*inputformat==1){
 494                 nent=2**nzs+*neq;
 495             }else if(*inputformat==2){
 496                 nent=0;
 497                 for(i=0;i<*neq;i++){
 498                     for(j=0;j<*neq;j++){
 499                         if(fabs(ad[i*(int)*nzs+j])>1.e-20) nent++;
 500                     }
 501                 }
 502             }
 503             
 504             InpMtx_init(mtxA, INPMTX_BY_ROWS, SPOOLES_REAL, nent, size);
 505             
 506             /* inputformat:
 507                0: sparse lower triangular matrix in ad (diagonal)
 508                   and au (lower triangle)
 509                1: sparse lower + upper triangular matrix in ad (diagonal)
 510                   and au (first lower triangle, then upper triangle; lower
 511                   and upper triangle have nonzero's at symmetric positions)
 512                2: full matrix in field ad
 513                3: sparse upper triangular matrix in ad (diagonal)
 514                   and au (upper triangle)  */
 515             
 516             if(*inputformat==0){
 517                 ipoint = 0;
 518                 
 519                 if(*sigma==0.){
 520                     for (col = 0; col < size; col++) {
 521 //                      printf("row=%d,col=%d,value=%e\n",col,col,ad[col]);
 522                         InpMtx_inputRealEntry(mtxA, col, col, ad[col]);
 523                         for (ipo = ipoint; ipo < ipoint + icol[col]; ipo++) {
 524                             int row = irow[ipo] - 1;
 525 //                      printf("row=%d,col=%d,value=%e\n",col,row,au[ipo]);
 526                             InpMtx_inputRealEntry(mtxA, col, row,
 527                                                   au[ipo]);
 528                         }
 529                         ipoint = ipoint + icol[col];
 530                     }
 531                 }
 532                 else{
 533                     for (col = 0; col < size; col++) {
 534                         InpMtx_inputRealEntry(mtxA, col, col, ad[col]-*sigma*adb[col]);
 535                         for (ipo = ipoint; ipo < ipoint + icol[col]; ipo++) {
 536                             int row = irow[ipo] - 1;
 537                             InpMtx_inputRealEntry(mtxA, col, row,
 538                                                   au[ipo]-*sigma*aub[ipo]);
 539                         }
 540                         ipoint = ipoint + icol[col];
 541                     }
 542                 }
 543             }else if(*inputformat==1){
 544                 ipoint = 0;
 545                 
 546                 if(*sigma==0.){
 547                     for (col = 0; col < size; col++) {
 548 //                      printf("row=%d,col=%d,value=%e\n",col,col,ad[col]);
 549                         InpMtx_inputRealEntry(mtxA, col, col, ad[col]);
 550                         for (ipo = ipoint; ipo < ipoint + icol[col]; ipo++) {
 551                             int row = irow[ipo] - 1;
 552 //                      printf("row=%d,col=%d,value=%e\n",row,col,au[ipo]);
 553                             InpMtx_inputRealEntry(mtxA, row,col,
 554                                                   au[ipo]);
 555 //                      printf("row=%d,col=%d,value=%e\n",col,row,au[ipo+*nzs]);
 556                             InpMtx_inputRealEntry(mtxA, col,row,
 557                                                   au[ipo+(int)*nzs3]);
 558                         }
 559                         ipoint = ipoint + icol[col];
 560                     }
 561                 }
 562                 else{
 563                     for (col = 0; col < size; col++) {
 564                         InpMtx_inputRealEntry(mtxA, col, col, ad[col]-*sigma*adb[col]);
 565                         for (ipo = ipoint; ipo < ipoint + icol[col]; ipo++) {
 566                             int row = irow[ipo] - 1;
 567                             InpMtx_inputRealEntry(mtxA, row,col,
 568                                                   au[ipo]-*sigma*aub[ipo]);
 569                             InpMtx_inputRealEntry(mtxA, col,row,
 570                                                   au[ipo+(int)*nzs3]-*sigma*aub[ipo+(int)*nzs3]);
 571                         }
 572                         ipoint = ipoint + icol[col];
 573                     }
 574                 }
 575             }else if(*inputformat==2){
 576                 for(i=0;i<*neq;i++){
 577                     for(j=0;j<*neq;j++){
 578                         if(fabs(ad[i*(int)*nzs+j])>1.e-20){
 579                             InpMtx_inputRealEntry(mtxA,j,i,
 580                                                   ad[i*(int)*nzs+j]);
 581                         }
 582                     }
 583                 }
 584             }else if(*inputformat==3){
 585                 ipoint = 0;
 586                 
 587                 if(*sigma==0.){
 588                     for (col = 0; col < size; col++) {
 589                         InpMtx_inputRealEntry(mtxA, col, col, ad[col]);
 590                         for (ipo = ipoint; ipo < ipoint + icol[col]; ipo++) {
 591                             int row = irow[ipo] - 1;
 592                             InpMtx_inputRealEntry(mtxA, row, col,
 593                                                   au[ipo]);
 594                         }
 595                         ipoint = ipoint + icol[col];
 596                     }
 597                 }
 598                 else{
 599                     for (col = 0; col < size; col++) {
 600                         InpMtx_inputRealEntry(mtxA, col, col, ad[col]-*sigma*adb[col]);
 601                         for (ipo = ipoint; ipo < ipoint + icol[col]; ipo++) {
 602                             int row = irow[ipo] - 1;
 603                             InpMtx_inputRealEntry(mtxA, row, col, 
 604                                                   au[ipo]-*sigma*aub[ipo]);
 605                         }
 606                         ipoint = ipoint + icol[col];
 607                     }
 608                 }
 609             }             
 610             
 611             InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS);
 612             
 613             if (DEBUG_LVL > 1) {
 614                 fprintf(msgFile, "\n\n input matrix");
 615                 InpMtx_writeForHumanEye(mtxA, msgFile);
 616                 fflush(msgFile);
 617             }
 618         }
 619         
 620         /* solve it! */
 621 
 622 
 623 #ifdef USE_MT
 624         /* Rules for parallel solve:
 625            a. determining the maximum number of cpus:
 626               - if NUMBER_OF_CPUS>0 this is taken as the number of
 627                 cpus in the system
 628               - else it is taken from _SC_NPROCESSORS_CONF, if strictly
 629                 positive
 630               - else 1 cpu is assumed (default)
 631            b. determining the number of cpus to use
 632               - if CCX_NPROC_EQUATION_SOLVER>0 then use
 633                 CCX_NPROC_EQUATION_SOLVER cpus
 634               - else if OMP_NUM_THREADS>0 use OMP_NUM_THREADS cpus
 635               - else use the maximum number of cpus
 636          */
 637         if (num_cpus < 0) {
 638             int sys_cpus;
 639             char *env,*envloc,*envsys;
 640             
 641             num_cpus = 0;
 642             sys_cpus=0;
 643             
 644             /* explicit user declaration prevails */
 645             
 646             envsys=getenv("NUMBER_OF_CPUS");
 647             if(envsys){
 648                 sys_cpus=atoi(envsys);
 649                 if(sys_cpus<0) sys_cpus=0;
 650             }
 651             
 652             /* automatic detection of available number of processors */
 653             
 654             if(sys_cpus==0){
 655                 sys_cpus = getSystemCPUs();
 656                 if(sys_cpus<1) sys_cpus=1;
 657             }
 658             
 659             /* local declaration prevails, if strictly positive */
 660             
 661             envloc = getenv("CCX_NPROC_EQUATION_SOLVER");
 662             if(envloc){
 663                 num_cpus=atoi(envloc);
 664                 if(num_cpus<0){
 665                     num_cpus=0;
 666                 }else if(num_cpus>sys_cpus){
 667                     num_cpus=sys_cpus;
 668                 }
 669             }
 670             
 671             /* else global declaration, if any, applies */
 672             
 673             env = getenv("OMP_NUM_THREADS");
 674             if(num_cpus==0){
 675                 if (env)
 676                     num_cpus = atoi(env);
 677                 if (num_cpus < 1) {
 678                     num_cpus=1;
 679                 }else if(num_cpus>sys_cpus){
 680                     num_cpus=sys_cpus;
 681                 }
 682             }
 683             
 684         }
 685         printf(" Using up to %d cpu(s) for spooles.\n\n", num_cpus);
 686         if (num_cpus > 1) {
 687             /* do not use the multithreaded solver unless
 688              * we have multiple threads - avoid the
 689                  * locking overhead
 690                  */
 691                 factor_MT(&pfi, mtxA, size, msgFile,&symmetryflagi4);
 692         } else {
 693                 factor(&pfi, mtxA, size, msgFile,&symmetryflagi4);
 694         }
 695 #else
 696         printf(" Using 1 cpu for spooles.\n\n");
 697         factor(&pfi, mtxA, size, msgFile,&symmetryflagi4);
 698 #endif
 699 }
 700 
 701 /** 
 702  * solve a system of equations with rhs b
 703  * factorization must have been performed before 
 704  * using spooles_factor
 705  * 
 706 */
 707 
 708 void spooles_solve(double *b, ITG *neq)
 709 {
 710         /* rhs vector B
 711          * Note that there is only one rhs vector, thus
 712          * a bit simpler that the AllInOne example
 713          */
 714         int size = *neq;
 715         DenseMtx *mtxB,*mtxX;
 716 
 717 //      printf(" Solving the system of equations using the symmetric spooles solver\n");
 718 
 719         {
 720                 int i;
 721                 mtxB = DenseMtx_new();
 722                 DenseMtx_init(mtxB, SPOOLES_REAL, 0, 0, size, 1, 1, size);
 723                 DenseMtx_zero(mtxB);
 724                 for (i = 0; i < size; i++) {
 725                         DenseMtx_setRealEntry(mtxB, i, 0, b[i]);
 726                 }
 727                 if (DEBUG_LVL > 1) {
 728                         fprintf(msgFile, "\n\n rhs matrix in original ordering");
 729                         DenseMtx_writeForHumanEye(mtxB, msgFile);
 730                         fflush(msgFile);
 731                 }
 732         }
 733 
 734 #ifdef USE_MT
 735 //      printf(" Using up to %d cpu(s) for spooles.\n\n", num_cpus);
 736         if (num_cpus > 1) {
 737                 /* do not use the multithreaded solver unless
 738                  * we have multiple threads - avoid the
 739                  * locking overhead
 740                  */
 741                 mtxX=fsolve_MT(&pfi, mtxB);
 742         } else {
 743                 mtxX=fsolve(&pfi, mtxB);
 744         }
 745 #else
 746 //      printf(" Using 1 cpu for spooles.\n\n");
 747         mtxX=fsolve(&pfi, mtxB);
 748 #endif
 749 
 750         /* convert the result back to Calculix representation */
 751         {
 752                 int i;
 753                 for (i = 0; i < size; i++) {
 754                         b[i] = DenseMtx_entries(mtxX)[i];
 755                 }
 756         }
 757         /* cleanup */
 758         DenseMtx_free(mtxX);
 759 }
 760 
 761 void spooles_cleanup()
 762 {
 763         
 764         FrontMtx_free(pfi.frontmtx);
 765         IV_free(pfi.newToOldIV);
 766         IV_free(pfi.oldToNewIV);
 767         SubMtxManager_free(pfi.mtxmanager);
 768         if (pfi.solvemap)
 769                 SolveMap_free(pfi.solvemap);
 770         ETree_free(pfi.frontETree);
 771         fclose(msgFile);
 772 }
 773 
 774 
 775 /** 
 776  * factor a system of the form (au - sigma * aub)
 777  * 
 778 */
 779 
 780 FILE *msgFilf;
 781 struct factorinfo pfj;
 782 
 783 void spooles_factor_rad(double *ad, double *au,  double *adb, double *aub, 
 784              double *sigma,ITG *icol, ITG *irow,
 785              ITG *neq, ITG *nzs, ITG *symmetryflag, ITG *inputformat)
 786 {
 787         int symmetryflagi4=*symmetryflag;
 788         int size = *neq;
 789         InpMtx *mtxA;
 790 
 791         printf(" Factoring the system of equations using the unsymmetric spooles solver\n\n");
 792 
 793 /*      if(*neq==0) return;*/
 794  
 795         if ((msgFilf = fopen("spooles.out", "a")) == NULL) {
 796                 fprintf(stderr, "\n fatal error in spooles.c"
 797                         "\n unable to open file spooles.out\n");
 798         }
 799 
 800         /*
 801          * Create the InpMtx object from the Calculix matrix
 802          *      representation
 803          */
 804 
 805         {
 806             int col, ipoint, ipo;
 807             int nent,i,j;
 808             
 809             mtxA = InpMtx_new();
 810             
 811             if((*inputformat==0)||(*inputformat==3)){
 812                 nent = *nzs + *neq;     /* estimated # of nonzero entries */
 813             }else if(*inputformat==1){
 814                 nent=2**nzs+*neq;
 815             }else if(*inputformat==2){
 816                 nent=0;
 817                 for(i=0;i<*neq;i++){
 818                     for(j=0;j<*neq;j++){
 819                         if(fabs(ad[i*(int)*nzs+j])>1.e-20) nent++;
 820                     }
 821                 }
 822             }
 823             
 824             InpMtx_init(mtxA, INPMTX_BY_ROWS, SPOOLES_REAL, nent, size);
 825             
 826             /* inputformat:
 827                0: sparse lower triangular matrix in ad (diagonal)
 828                   and au (lower triangle)
 829                1: sparse lower + upper triangular matrix in ad (diagonal)
 830                   and au (first lower triangle, then upper triangle; lower
 831                   and upper triangle have nonzero's at symmetric positions)
 832                2: full matrix in field ad
 833                3: sparse upper triangular matrix in ad (diagonal)
 834                   and au (upper triangle)  */
 835             
 836             if(*inputformat==0){
 837                 ipoint = 0;
 838                 
 839                 if(*sigma==0.){
 840                     for (col = 0; col < size; col++) {
 841                         InpMtx_inputRealEntry(mtxA, col, col, ad[col]);
 842                         for (ipo = ipoint; ipo < ipoint + icol[col]; ipo++) {
 843                             int row = irow[ipo] - 1;
 844                             InpMtx_inputRealEntry(mtxA, col, row,
 845                                                   au[ipo]);
 846                         }
 847                         ipoint = ipoint + icol[col];
 848                     }
 849                 }
 850                 else{
 851                     for (col = 0; col < size; col++) {
 852                         InpMtx_inputRealEntry(mtxA, col, col, ad[col]-*sigma*adb[col]);
 853                         for (ipo = ipoint; ipo < ipoint + icol[col]; ipo++) {
 854                             int row = irow[ipo] - 1;
 855                             InpMtx_inputRealEntry(mtxA, col, row,
 856                                                   au[ipo]-*sigma*aub[ipo]);
 857                         }
 858                         ipoint = ipoint + icol[col];
 859                     }
 860                 }
 861             }else if(*inputformat==1){
 862                 ipoint = 0;
 863                 
 864                 if(*sigma==0.){
 865                     for (col = 0; col < size; col++) {
 866                         InpMtx_inputRealEntry(mtxA, col, col, ad[col]);
 867                         for (ipo = ipoint; ipo < ipoint + icol[col]; ipo++) {
 868                             int row = irow[ipo] - 1;
 869                             InpMtx_inputRealEntry(mtxA, row,col,
 870                                                   au[ipo]);
 871                             InpMtx_inputRealEntry(mtxA, col,row,
 872                                                   au[ipo+(int)*nzs]);
 873                         }
 874                         ipoint = ipoint + icol[col];
 875                     }
 876                 }
 877                 else{
 878                     for (col = 0; col < size; col++) {
 879                         InpMtx_inputRealEntry(mtxA, col, col, ad[col]-*sigma*adb[col]);
 880                         for (ipo = ipoint; ipo < ipoint + icol[col]; ipo++) {
 881                             int row = irow[ipo] - 1;
 882                             InpMtx_inputRealEntry(mtxA, row,col,
 883                                                   au[ipo]-*sigma*aub[ipo]);
 884                             InpMtx_inputRealEntry(mtxA, col,row,
 885                                                   au[ipo+(int)*nzs]-*sigma*aub[ipo+(int)*nzs]);
 886                         }
 887                         ipoint = ipoint + icol[col];
 888                     }
 889                 }
 890             }else if(*inputformat==2){
 891                 for(i=0;i<*neq;i++){
 892                     for(j=0;j<*neq;j++){
 893                         if(fabs(ad[i*(int)*nzs+j])>1.e-20){
 894                             InpMtx_inputRealEntry(mtxA,j,i,
 895                                                   ad[i*(int)*nzs+j]);
 896                         }
 897                     }
 898                 }
 899             }else if(*inputformat==3){
 900                 ipoint = 0;
 901                 
 902                 if(*sigma==0.){
 903                     for (col = 0; col < size; col++) {
 904                         InpMtx_inputRealEntry(mtxA, col, col, ad[col]);
 905                         for (ipo = ipoint; ipo < ipoint + icol[col]; ipo++) {
 906                             int row = irow[ipo] - 1;
 907                             InpMtx_inputRealEntry(mtxA, row, col,
 908                                                   au[ipo]);
 909                         }
 910                         ipoint = ipoint + icol[col];
 911                     }
 912                 }
 913                 else{
 914                     for (col = 0; col < size; col++) {
 915                         InpMtx_inputRealEntry(mtxA, col, col, ad[col]-*sigma*adb[col]);
 916                         for (ipo = ipoint; ipo < ipoint + icol[col]; ipo++) {
 917                             int row = irow[ipo] - 1;
 918                             InpMtx_inputRealEntry(mtxA, row, col, 
 919                                                   au[ipo]-*sigma*aub[ipo]);
 920                         }
 921                         ipoint = ipoint + icol[col];
 922                     }
 923                 }
 924             }             
 925             
 926             InpMtx_changeStorageMode(mtxA, INPMTX_BY_VECTORS);
 927             
 928             if (DEBUG_LVL > 1) {
 929                 fprintf(msgFilf, "\n\n input matrix");
 930                 InpMtx_writeForHumanEye(mtxA, msgFilf);
 931                 fflush(msgFilf);
 932             }
 933         }
 934 
 935         /* solve it! */
 936 
 937 
 938 #ifdef USE_MT
 939         /* Rules for parallel solve:
 940            a. determining the maximum number of cpus:
 941               - if NUMBER_OF_CPUS>0 this is taken as the number of
 942                 cpus in the system
 943               - else it is taken from _SC_NPROCESSORS_CONF, if strictly
 944                 positive
 945               - else 1 cpu is assumed (default)
 946            b. determining the number of cpus to use
 947               - if CCX_NPROC_EQUATION_SOLVER>0 then use
 948                 CCX_NPROC_EQUATION_SOLVER cpus
 949               - else if OMP_NUM_THREADS>0 use OMP_NUM_THREADS cpus
 950               - else use the maximum number of cpus
 951          */
 952         if (num_cpus < 0) {
 953             int sys_cpus;
 954             char *env,*envloc,*envsys;
 955             
 956             num_cpus = 0;
 957             sys_cpus=0;
 958             
 959             /* explicit user declaration prevails */
 960             
 961             envsys=getenv("NUMBER_OF_CPUS");
 962             if(envsys){
 963                 sys_cpus=atoi(envsys);
 964                 if(sys_cpus<0) sys_cpus=0;
 965             }
 966             
 967             /* automatic detection of available number of processors */
 968             
 969             if(sys_cpus==0){
 970                 sys_cpus = getSystemCPUs();
 971                 if(sys_cpus<1) sys_cpus=1;
 972             }
 973             
 974             /* local declaration prevails, if strictly positive */
 975             
 976             envloc = getenv("CCX_NPROC_EQUATION_SOLVER");
 977             if(envloc){
 978                 num_cpus=atoi(envloc);
 979                 if(num_cpus<0){
 980                     num_cpus=0;
 981                 }else if(num_cpus>sys_cpus){
 982                     num_cpus=sys_cpus;
 983                 }
 984             }
 985             
 986             /* else global declaration, if any, applies */
 987             
 988             env = getenv("OMP_NUM_THREADS");
 989             if(num_cpus==0){
 990                 if (env)
 991                     num_cpus = atoi(env);
 992                 if (num_cpus < 1) {
 993                     num_cpus=1;
 994                 }else if(num_cpus>sys_cpus){
 995                     num_cpus=sys_cpus;
 996                 }
 997             }
 998             
 999         }
1000         printf(" Using up to %d cpu(s) for spooles.\n\n", num_cpus);
1001         if (num_cpus > 1) {
1002                 /* do not use the multithreaded solver unless
1003                  * we have multiple threads - avoid the
1004                  * locking overhead
1005                  */
1006                 factor_MT(&pfj, mtxA, size, msgFilf,&symmetryflagi4);
1007         } else {
1008                 factor(&pfj, mtxA, size, msgFilf,&symmetryflagi4);
1009         }
1010 #else
1011         printf(" Using 1 cpu for spooles.\n\n");
1012         factor(&pfj, mtxA, size, msgFilf,&symmetryflagi4);
1013 #endif
1014 }
1015 
1016 /** 
1017  * solve a system of equations with rhs b
1018  * factorization must have been performed before 
1019  * using spooles_factor
1020  * 
1021 */
1022 
1023 void spooles_solve_rad(double *b, ITG *neq)
1024 {
1025         /* rhs vector B
1026          * Note that there is only one rhs vector, thus
1027          * a bit simpler that the AllInOne example
1028          */
1029         int size = *neq;
1030         DenseMtx *mtxB,*mtxX;
1031 
1032         printf(" solving the system of equations using the unsymmetric spooles solver\n");
1033 
1034         {
1035                 int i;
1036                 mtxB = DenseMtx_new();
1037                 DenseMtx_init(mtxB, SPOOLES_REAL, 0, 0, size, 1, 1, size);
1038                 DenseMtx_zero(mtxB);
1039                 for (i = 0; i < size; i++) {
1040                         DenseMtx_setRealEntry(mtxB, i, 0, b[i]);
1041                 }
1042                 if (DEBUG_LVL > 1) {
1043                         fprintf(msgFilf, "\n\n rhs matrix in original ordering");
1044                         DenseMtx_writeForHumanEye(mtxB, msgFilf);
1045                         fflush(msgFilf);
1046                 }
1047         }
1048 
1049 #ifdef USE_MT
1050         printf(" Using up to %d cpu(s) for spooles.\n\n", num_cpus);
1051         if (num_cpus > 1) {
1052                 /* do not use the multithreaded solver unless
1053                  * we have multiple threads - avoid the
1054                  * locking overhead
1055                  */
1056                 mtxX=fsolve_MT(&pfj, mtxB);
1057         } else {
1058                 mtxX=fsolve(&pfj, mtxB);
1059         }
1060 #else
1061         printf(" Using 1 cpu for spooles.\n\n");
1062         mtxX=fsolve(&pfj, mtxB);
1063 #endif
1064 
1065         /* convert the result back to Calculix representation */
1066         {
1067                 int i;
1068                 for (i = 0; i < size; i++) {
1069                         b[i] = DenseMtx_entries(mtxX)[i];
1070                 }
1071         }
1072         /* cleanup */
1073         DenseMtx_free(mtxX);
1074 }
1075 
1076 void spooles_cleanup_rad()
1077 {
1078         
1079         FrontMtx_free(pfj.frontmtx);
1080         IV_free(pfj.newToOldIV);
1081         IV_free(pfj.oldToNewIV);
1082         SubMtxManager_free(pfj.mtxmanager);
1083         if (pfj.solvemap)
1084                 SolveMap_free(pfj.solvemap);
1085         ETree_free(pfj.frontETree);
1086         fclose(msgFilf);
1087 }
1088 
1089 /** 
1090  * spooles: Main interface between Calculix and spooles:
1091  *
1092  * Perform 3 operations:
1093  * 1) factor
1094  * 2) solve
1095  * 3) cleanup
1096  *   
1097  */
1098 
1099 void spooles(double *ad, double *au, double *adb, double *aub, double *sigma,
1100              double *b, ITG *icol, ITG *irow, ITG *neq, ITG *nzs, 
1101              ITG *symmetryflag, ITG *inputformat, ITG *nzs3)
1102 {
1103 
1104   if(*neq==0) return;
1105 
1106   spooles_factor(ad,au,adb,aub,sigma,icol,irow,neq,nzs,symmetryflag,
1107                  inputformat,nzs3);
1108   
1109   spooles_solve(b,neq);
1110   
1111   spooles_cleanup();
1112 
1113 }
1114 
1115 #endif

/* [<][>][^][v][top][bottom][index][help] */