Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2929_PrioritizedGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.PGE/3.3/go-code/go-levmar/levmar-2.6/Axb_core.c @ 16080

Last change on this file since 16080 was 16080, checked in by hmaislin, 6 years ago

#2929 initial commit of working PGE version

File size: 36.8 KB
Line 
1/////////////////////////////////////////////////////////////////////////////////
2//
3//  Solution of linear systems involved in the Levenberg - Marquardt
4//  minimization algorithm
5//  Copyright (C) 2004  Manolis Lourakis (lourakis at ics forth gr)
6//  Institute of Computer Science, Foundation for Research & Technology - Hellas
7//  Heraklion, Crete, Greece.
8//
9//  This program is free software; you can redistribute it and/or modify
10//  it under the terms of the GNU General Public License as published by
11//  the Free Software Foundation; either version 2 of the License, or
12//  (at your option) any later version.
13//
14//  This program is distributed in the hope that it will be useful,
15//  but WITHOUT ANY WARRANTY; without even the implied warranty of
16//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17//  GNU General Public License for more details.
18//
19/////////////////////////////////////////////////////////////////////////////////
20
21
22/* Solvers for the linear systems Ax=b. Solvers should NOT modify their A & B arguments! */
23
24
25#ifndef LM_REAL // not included by Axb.c
26#error This file should not be compiled directly!
27#endif
28
29
30#ifdef LINSOLVERS_RETAIN_MEMORY
31#define __STATIC__ static
32#else
33#define __STATIC__ // empty
34#endif /* LINSOLVERS_RETAIN_MEMORY */
35
36#ifdef HAVE_LAPACK
37
38/* prototypes of LAPACK routines */
39
40#define GEQRF LM_MK_LAPACK_NAME(geqrf)
41#define ORGQR LM_MK_LAPACK_NAME(orgqr)
42#define TRTRS LM_MK_LAPACK_NAME(trtrs)
43#define POTF2 LM_MK_LAPACK_NAME(potf2)
44#define POTRF LM_MK_LAPACK_NAME(potrf)
45#define POTRS LM_MK_LAPACK_NAME(potrs)
46#define GETRF LM_MK_LAPACK_NAME(getrf)
47#define GETRS LM_MK_LAPACK_NAME(getrs)
48#define GESVD LM_MK_LAPACK_NAME(gesvd)
49#define GESDD LM_MK_LAPACK_NAME(gesdd)
50#define SYTRF LM_MK_LAPACK_NAME(sytrf)
51#define SYTRS LM_MK_LAPACK_NAME(sytrs)
52#define PLASMA_POSV LM_CAT_(PLASMA_, LM_ADD_PREFIX(posv))
53
54#ifdef __cplusplus
55extern "C" {
56#endif
57/* QR decomposition */
58extern int GEQRF(int *m, int *n, LM_REAL *a, int *lda, LM_REAL *tau, LM_REAL *work, int *lwork, int *info);
59extern int ORGQR(int *m, int *n, int *k, LM_REAL *a, int *lda, LM_REAL *tau, LM_REAL *work, int *lwork, int *info);
60
61/* solution of triangular systems */
62extern int TRTRS(char *uplo, char *trans, char *diag, int *n, int *nrhs, LM_REAL *a, int *lda, LM_REAL *b, int *ldb, int *info);
63
64/* Cholesky decomposition and systems solution */
65extern int POTF2(char *uplo, int *n, LM_REAL *a, int *lda, int *info);
66extern int POTRF(char *uplo, int *n, LM_REAL *a, int *lda, int *info); /* block version of dpotf2 */
67extern int POTRS(char *uplo, int *n, int *nrhs, LM_REAL *a, int *lda, LM_REAL *b, int *ldb, int *info);
68
69/* LU decomposition and systems solution */
70extern int GETRF(int *m, int *n, LM_REAL *a, int *lda, int *ipiv, int *info);
71extern int GETRS(char *trans, int *n, int *nrhs, LM_REAL *a, int *lda, int *ipiv, LM_REAL *b, int *ldb, int *info);
72
73/* Singular Value Decomposition (SVD) */
74extern int GESVD(char *jobu, char *jobvt, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, LM_REAL *u, int *ldu,
75                   LM_REAL *vt, int *ldvt, LM_REAL *work, int *lwork, int *info);
76
77/* lapack 3.0 new SVD routine, faster than xgesvd().
78 * In case that your version of LAPACK does not include them, use the above two older routines
79 */
80extern int GESDD(char *jobz, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, LM_REAL *u, int *ldu, LM_REAL *vt, int *ldvt,
81                   LM_REAL *work, int *lwork, int *iwork, int *info);
82
83/* LDLt/UDUt factorization and systems solution */
84extern int SYTRF(char *uplo, int *n, LM_REAL *a, int *lda, int *ipiv, LM_REAL *work, int *lwork, int *info);
85extern int SYTRS(char *uplo, int *n, int *nrhs, LM_REAL *a, int *lda, int *ipiv, LM_REAL *b, int *ldb, int *info);
86#ifdef __cplusplus
87}
88#endif
89
90/* precision-specific definitions */
91#define AX_EQ_B_QR LM_ADD_PREFIX(Ax_eq_b_QR)
92#define AX_EQ_B_QRLS LM_ADD_PREFIX(Ax_eq_b_QRLS)
93#define AX_EQ_B_CHOL LM_ADD_PREFIX(Ax_eq_b_Chol)
94#define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU)
95#define AX_EQ_B_SVD LM_ADD_PREFIX(Ax_eq_b_SVD)
96#define AX_EQ_B_BK LM_ADD_PREFIX(Ax_eq_b_BK)
97#define AX_EQ_B_PLASMA_CHOL LM_ADD_PREFIX(Ax_eq_b_PLASMA_Chol)
98
99/*
100 * This function returns the solution of Ax = b
101 *
102 * The function is based on QR decomposition with explicit computation of Q:
103 * If A=Q R with Q orthogonal and R upper triangular, the linear system becomes
104 * Q R x = b or R x = Q^T b.
105 * The last equation can be solved directly.
106 *
107 * A is mxm, b is mx1
108 *
109 * The function returns 0 in case of error, 1 if successful
110 *
111 * This function is often called repetitively to solve problems of identical
112 * dimensions. To avoid repetitive malloc's and free's, allocated memory is
113 * retained between calls and free'd-malloc'ed when not of the appropriate size.
114 * A call with NULL as the first argument forces this memory to be released.
115 */
116int AX_EQ_B_QR(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
117{
118__STATIC__ LM_REAL *buf=NULL;
119__STATIC__ int buf_sz=0;
120
121static int nb=0; /* no __STATIC__ decl. here! */
122
123LM_REAL *a, *tau, *r, *work;
124int a_sz, tau_sz, r_sz, tot_sz;
125register int i, j;
126int info, worksz, nrhs=1;
127register LM_REAL sum;
128
129    if(!A)
130#ifdef LINSOLVERS_RETAIN_MEMORY
131    {
132      if(buf) free(buf);
133      buf=NULL;
134      buf_sz=0;
135
136      return 1;
137    }
138#else
139      return 1; /* NOP */
140#endif /* LINSOLVERS_RETAIN_MEMORY */
141   
142    /* calculate required memory size */
143    a_sz=m*m;
144    tau_sz=m;
145    r_sz=m*m; /* only the upper triangular part really needed */
146    if(!nb){
147      LM_REAL tmp;
148
149      worksz=-1; // workspace query; optimal size is returned in tmp
150      GEQRF((int *)&m, (int *)&m, NULL, (int *)&m, NULL, (LM_REAL *)&tmp, (int *)&worksz, (int *)&info);
151      nb=((int)tmp)/m; // optimal worksize is m*nb
152    }
153    worksz=nb*m;
154    tot_sz=a_sz + tau_sz + r_sz + worksz;
155
156#ifdef LINSOLVERS_RETAIN_MEMORY
157    if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
158      if(buf) free(buf); /* free previously allocated memory */
159
160      buf_sz=tot_sz;
161      buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
162      if(!buf){
163        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QR) "() failed!\n");
164        exit(1);
165      }
166    }
167#else
168      buf_sz=tot_sz;
169      buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
170      if(!buf){
171        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QR) "() failed!\n");
172        exit(1);
173      }
174#endif /* LINSOLVERS_RETAIN_MEMORY */
175
176    a=buf;
177    tau=a+a_sz;
178    r=tau+tau_sz;
179    work=r+r_sz;
180
181  /* store A (column major!) into a */
182  for(i=0; i<m; i++)
183    for(j=0; j<m; j++)
184      a[i+j*m]=A[i*m+j];
185
186  /* QR decomposition of A */
187  GEQRF((int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info);
188  /* error treatment */
189  if(info!=0){
190    if(info<0){
191      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GEQRF) " in ", AX_EQ_B_QR) "()\n", -info);
192      exit(1);
193    }
194    else{
195      fprintf(stderr, RCAT(RCAT("Unknown LAPACK error %d for ", GEQRF) " in ", AX_EQ_B_QR) "()\n", info);
196#ifndef LINSOLVERS_RETAIN_MEMORY
197      free(buf);
198#endif
199
200      return 0;
201    }
202  }
203
204  /* R is stored in the upper triangular part of a; copy it in r so that ORGQR() below won't destroy it */
205  memcpy(r, a, r_sz*sizeof(LM_REAL));
206
207  /* compute Q using the elementary reflectors computed by the above decomposition */
208  ORGQR((int *)&m, (int *)&m, (int *)&m, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info);
209  if(info!=0){
210    if(info<0){
211      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", ORGQR) " in ", AX_EQ_B_QR) "()\n", -info);
212      exit(1);
213    }
214    else{
215      fprintf(stderr, RCAT("Unknown LAPACK error (%d) in ", AX_EQ_B_QR) "()\n", info);
216#ifndef LINSOLVERS_RETAIN_MEMORY
217      free(buf);
218#endif
219
220      return 0;
221    }
222  }
223
224  /* Q is now in a; compute Q^T b in x */
225  for(i=0; i<m; i++){
226    for(j=0, sum=0.0; j<m; j++)
227      sum+=a[i*m+j]*B[j];
228    x[i]=sum;
229  }
230
231  /* solve the linear system R x = Q^t b */
232  TRTRS("U", "N", "N", (int *)&m, (int *)&nrhs, r, (int *)&m, x, (int *)&m, &info);
233  /* error treatment */
234  if(info!=0){
235    if(info<0){
236      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_QR) "()\n", -info);
237      exit(1);
238    }
239    else{
240      fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_QR) "()\n", info);
241#ifndef LINSOLVERS_RETAIN_MEMORY
242      free(buf);
243#endif
244
245      return 0;
246    }
247  }
248
249#ifndef LINSOLVERS_RETAIN_MEMORY
250  free(buf);
251#endif
252
253  return 1;
254}
255
256/*
257 * This function returns the solution of min_x ||Ax - b||
258 *
259 * || . || is the second order (i.e. L2) norm. This is a least squares technique that
260 * is based on QR decomposition:
261 * If A=Q R with Q orthogonal and R upper triangular, the normal equations become
262 * (A^T A) x = A^T b  or (R^T Q^T Q R) x = A^T b or (R^T R) x = A^T b.
263 * This amounts to solving R^T y = A^T b for y and then R x = y for x
264 * Note that Q does not need to be explicitly computed
265 *
266 * A is mxn, b is mx1
267 *
268 * The function returns 0 in case of error, 1 if successful
269 *
270 * This function is often called repetitively to solve problems of identical
271 * dimensions. To avoid repetitive malloc's and free's, allocated memory is
272 * retained between calls and free'd-malloc'ed when not of the appropriate size.
273 * A call with NULL as the first argument forces this memory to be released.
274 */
275int AX_EQ_B_QRLS(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m, int n)
276{
277__STATIC__ LM_REAL *buf=NULL;
278__STATIC__ int buf_sz=0;
279
280static int nb=0; /* no __STATIC__ decl. here! */
281
282LM_REAL *a, *tau, *r, *work;
283int a_sz, tau_sz, r_sz, tot_sz;
284register int i, j;
285int info, worksz, nrhs=1;
286register LM_REAL sum;
287   
288    if(!A)
289#ifdef LINSOLVERS_RETAIN_MEMORY
290    {
291      if(buf) free(buf);
292      buf=NULL;
293      buf_sz=0;
294
295      return 1;
296    }
297#else
298      return 1; /* NOP */
299#endif /* LINSOLVERS_RETAIN_MEMORY */
300   
301    if(m<n){
302      fprintf(stderr, RCAT("Normal equations require that the number of rows is greater than number of columns in ", AX_EQ_B_QRLS) "() [%d x %d]! -- try transposing\n", m, n);
303      exit(1);
304    }
305     
306    /* calculate required memory size */
307    a_sz=m*n;
308    tau_sz=n;
309    r_sz=n*n;
310    if(!nb){
311      LM_REAL tmp;
312
313      worksz=-1; // workspace query; optimal size is returned in tmp
314      GEQRF((int *)&m, (int *)&m, NULL, (int *)&m, NULL, (LM_REAL *)&tmp, (int *)&worksz, (int *)&info);
315      nb=((int)tmp)/m; // optimal worksize is m*nb
316    }
317    worksz=nb*m;
318    tot_sz=a_sz + tau_sz + r_sz + worksz;
319
320#ifdef LINSOLVERS_RETAIN_MEMORY
321    if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
322      if(buf) free(buf); /* free previously allocated memory */
323
324      buf_sz=tot_sz;
325      buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
326      if(!buf){
327        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QRLS) "() failed!\n");
328        exit(1);
329      }
330    }
331#else
332      buf_sz=tot_sz;
333      buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
334      if(!buf){
335        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_QRLS) "() failed!\n");
336        exit(1);
337      }
338#endif /* LINSOLVERS_RETAIN_MEMORY */
339
340    a=buf;
341    tau=a+a_sz;
342    r=tau+tau_sz;
343    work=r+r_sz;
344
345  /* store A (column major!) into a */
346  for(i=0; i<m; i++)
347    for(j=0; j<n; j++)
348      a[i+j*m]=A[i*n+j];
349
350  /* compute A^T b in x */
351  for(i=0; i<n; i++){
352    for(j=0, sum=0.0; j<m; j++)
353      sum+=A[j*n+i]*B[j];
354    x[i]=sum;
355  }
356
357  /* QR decomposition of A */
358  GEQRF((int *)&m, (int *)&n, a, (int *)&m, tau, work, (int *)&worksz, (int *)&info);
359  /* error treatment */
360  if(info!=0){
361    if(info<0){
362      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GEQRF) " in ", AX_EQ_B_QRLS) "()\n", -info);
363      exit(1);
364    }
365    else{
366      fprintf(stderr, RCAT(RCAT("Unknown LAPACK error %d for ", GEQRF) " in ", AX_EQ_B_QRLS) "()\n", info);
367#ifndef LINSOLVERS_RETAIN_MEMORY
368      free(buf);
369#endif
370
371      return 0;
372    }
373  }
374
375  /* R is stored in the upper triangular part of a. Note that a is mxn while r nxn */
376  for(j=0; j<n; j++){
377    for(i=0; i<=j; i++)
378      r[i+j*n]=a[i+j*m];
379
380    /* lower part is zero */
381    for(i=j+1; i<n; i++)
382      r[i+j*n]=0.0;
383  }
384
385  /* solve the linear system R^T y = A^t b */
386  TRTRS("U", "T", "N", (int *)&n, (int *)&nrhs, r, (int *)&n, x, (int *)&n, &info);
387  /* error treatment */
388  if(info!=0){
389    if(info<0){
390      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_QRLS) "()\n", -info);
391      exit(1);
392    }
393    else{
394      fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_QRLS) "()\n", info);
395#ifndef LINSOLVERS_RETAIN_MEMORY
396      free(buf);
397#endif
398
399      return 0;
400    }
401  }
402
403  /* solve the linear system R x = y */
404  TRTRS("U", "N", "N", (int *)&n, (int *)&nrhs, r, (int *)&n, x, (int *)&n, &info);
405  /* error treatment */
406  if(info!=0){
407    if(info<0){
408      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_QRLS) "()\n", -info);
409      exit(1);
410    }
411    else{
412      fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_QRLS) "()\n", info);
413#ifndef LINSOLVERS_RETAIN_MEMORY
414      free(buf);
415#endif
416
417      return 0;
418    }
419  }
420
421#ifndef LINSOLVERS_RETAIN_MEMORY
422  free(buf);
423#endif
424
425  return 1;
426}
427
428/*
429 * This function returns the solution of Ax=b
430 *
431 * The function assumes that A is symmetric & postive definite and employs
432 * the Cholesky decomposition:
433 * If A=L L^T with L lower triangular, the system to be solved becomes
434 * (L L^T) x = b
435 * This amounts to solving L y = b for y and then L^T x = y for x
436 *
437 * A is mxm, b is mx1
438 *
439 * The function returns 0 in case of error, 1 if successful
440 *
441 * This function is often called repetitively to solve problems of identical
442 * dimensions. To avoid repetitive malloc's and free's, allocated memory is
443 * retained between calls and free'd-malloc'ed when not of the appropriate size.
444 * A call with NULL as the first argument forces this memory to be released.
445 */
446int AX_EQ_B_CHOL(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
447{
448__STATIC__ LM_REAL *buf=NULL;
449__STATIC__ int buf_sz=0;
450
451LM_REAL *a;
452int a_sz, tot_sz;
453int info, nrhs=1;
454   
455    if(!A)
456#ifdef LINSOLVERS_RETAIN_MEMORY
457    {
458      if(buf) free(buf);
459      buf=NULL;
460      buf_sz=0;
461
462      return 1;
463    }
464#else
465      return 1; /* NOP */
466#endif /* LINSOLVERS_RETAIN_MEMORY */
467   
468    /* calculate required memory size */
469    a_sz=m*m;
470    tot_sz=a_sz;
471
472#ifdef LINSOLVERS_RETAIN_MEMORY
473    if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
474      if(buf) free(buf); /* free previously allocated memory */
475
476      buf_sz=tot_sz;
477      buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
478      if(!buf){
479        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_CHOL) "() failed!\n");
480        exit(1);
481      }
482    }
483#else
484      buf_sz=tot_sz;
485      buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
486      if(!buf){
487        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_CHOL) "() failed!\n");
488        exit(1);
489      }
490#endif /* LINSOLVERS_RETAIN_MEMORY */
491
492  a=buf;
493
494  /* store A into a and B into x. A is assumed symmetric,
495   * hence no transposition is needed
496   */
497  memcpy(a, A, a_sz*sizeof(LM_REAL));
498  memcpy(x, B, m*sizeof(LM_REAL));
499
500  /* Cholesky decomposition of A */
501  //POTF2("L", (int *)&m, a, (int *)&m, (int *)&info);
502  POTRF("L", (int *)&m, a, (int *)&m, (int *)&info);
503  /* error treatment */
504  if(info!=0){
505    if(info<0){
506      fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", POTF2) "/", POTRF) " in ",
507                      AX_EQ_B_CHOL) "()\n", -info);
508      exit(1);
509    }
510    else{
511      fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization could not be completed for ", POTF2) "/", POTRF) " in ", AX_EQ_B_CHOL) "()\n", info);
512#ifndef LINSOLVERS_RETAIN_MEMORY
513      free(buf);
514#endif
515
516      return 0;
517    }
518  }
519
520  /* solve using the computed Cholesky in one lapack call */
521  POTRS("L", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info);
522  if(info<0){
523    fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", POTRS) " in ", AX_EQ_B_CHOL) "()\n", -info);
524    exit(1);
525  }
526
527#if 0
528  /* alternative: solve the linear system L y = b ... */
529  TRTRS("L", "N", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info);
530  /* error treatment */
531  if(info!=0){
532    if(info<0){
533      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) " in ", AX_EQ_B_CHOL) "()\n", -info);
534      exit(1);
535    }
536    else{
537      fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_CHOL) "()\n", info);
538#ifndef LINSOLVERS_RETAIN_MEMORY
539      free(buf);
540#endif
541
542      return 0;
543    }
544  }
545
546  /* ... solve the linear system L^T x = y */
547  TRTRS("L", "T", "N", (int *)&m, (int *)&nrhs, a, (int *)&m, x, (int *)&m, &info);
548  /* error treatment */
549  if(info!=0){
550    if(info<0){
551      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRS) "in ", AX_EQ_B_CHOL) "()\n", -info);
552      exit(1);
553    }
554    else{
555      fprintf(stderr, RCAT("LAPACK error: the %d-th diagonal element of A is zero (singular matrix) in ", AX_EQ_B_CHOL) "()\n", info);
556#ifndef LINSOLVERS_RETAIN_MEMORY
557      free(buf);
558#endif
559
560      return 0;
561    }
562  }
563#endif /* 0 */
564
565#ifndef LINSOLVERS_RETAIN_MEMORY
566  free(buf);
567#endif
568
569  return 1;
570}
571
572#ifdef HAVE_PLASMA
573
574/* Linear algebra using PLASMA parallel library for multicore CPUs.
575 * http://icl.cs.utk.edu/plasma/
576 *
577 * WARNING: BLAS multithreading should be disabled, e.g. setenv MKL_NUM_THREADS 1
578 */
579
580#ifndef _LM_PLASMA_MISC_
581/* avoid multiple inclusion of helper code */
582#define _LM_PLASMA_MISC_
583
584#include <plasma.h>
585#include <cblas.h>
586#include <lapacke.h>
587#include <plasma_tmg.h>
588#include <core_blas.h>
589
590/* programmatically determine the number of cores on the current machine */
591#ifdef _WIN32
592#include <windows.h>
593#elif __linux
594#include <unistd.h>
595#endif
596static int getnbcores()
597{
598#ifdef _WIN32
599  SYSTEM_INFO sysinfo;
600  GetSystemInfo(&sysinfo);
601  return sysinfo.dwNumberOfProcessors;
602#elif __linux
603  return sysconf(_SC_NPROCESSORS_ONLN);
604#else // unknown system
605  return 2<<1; // will be halved by right shift below
606#endif
607}
608
609static int PLASMA_ncores=-(getnbcores()>>1); // >0 if PLASMA initialized, <0 otherwise
610
611/* user-specified number of cores */
612void levmar_PLASMA_setnbcores(int cores)
613{
614  PLASMA_ncores=(cores>0)? -cores : ((cores)? cores : -2);
615}
616#endif /* _LM_PLASMA_MISC_ */
617
618/*
619 * This function returns the solution of Ax=b
620 *
621 * The function assumes that A is symmetric & positive definite and employs the
622 * Cholesky decomposition implemented by PLASMA for homogeneous multicore processors.
623 *
624 * A is mxm, b is mx1
625 *
626 * The function returns 0 in case of error, 1 if successfull
627 *
628 * This function is often called repetitively to solve problems of identical
629 * dimensions. To avoid repetitive malloc's and free's, allocated memory is
630 * retained between calls and free'd-malloc'ed when not of the appropriate size.
631 * A call with NULL as the first argument forces this memory to be released.
632 */
633int AX_EQ_B_PLASMA_CHOL(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
634{
635__STATIC__ LM_REAL *buf=NULL;
636__STATIC__ int buf_sz=0;
637
638LM_REAL *a;
639int a_sz, tot_sz;
640int info, nrhs=1;
641
642    if(A==NULL){
643#ifdef LINSOLVERS_RETAIN_MEMORY
644      if(buf) free(buf);
645      buf=NULL;
646      buf_sz=0;
647#endif /* LINSOLVERS_RETAIN_MEMORY */
648
649      PLASMA_Finalize();
650      PLASMA_ncores=-PLASMA_ncores;
651
652      return 1;
653    }
654
655    /* calculate required memory size */
656    a_sz=m*m;
657    tot_sz=a_sz;
658
659#ifdef LINSOLVERS_RETAIN_MEMORY
660    if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
661      if(buf) free(buf); /* free previously allocated memory */
662
663      buf_sz=tot_sz;
664      buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
665      if(!buf){
666        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_PLASMA_CHOL) "() failed!\n");
667        exit(1);
668      }
669    }
670#else
671    buf_sz=tot_sz;
672    buf=(LM_REAL *)malloc(buf_sz*sizeof(LM_REAL));
673    if(!buf){
674      fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_PLASMA_CHOL) "() failed!\n");
675      exit(1);
676    }
677#endif /* LINSOLVERS_RETAIN_MEMORY */
678
679    a=buf;
680
681    /* store A into a and B into x; A is assumed to be symmetric,
682     * hence no transposition is needed
683     */
684    memcpy(a, A, a_sz*sizeof(LM_REAL));
685    memcpy(x, B, m*sizeof(LM_REAL));
686
687  /* initialize PLASMA */
688  if(PLASMA_ncores<0){
689    PLASMA_ncores=-PLASMA_ncores;
690    PLASMA_Init(PLASMA_ncores);
691    fprintf(stderr, RCAT("\n", AX_EQ_B_PLASMA_CHOL) "(): PLASMA is running on %d cores.\n\n", PLASMA_ncores);
692  }
693 
694  /* Solve the linear system */
695  info=PLASMA_POSV(PlasmaLower, m, 1, a, m, x, m);
696  /* error treatment */
697  if(info!=0){
698    if(info<0){
699      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", PLASMA_POSV) " in ",
700                      AX_EQ_B_PLASMA_CHOL) "()\n", -info);
701      exit(1);
702    }
703    else{
704      fprintf(stderr, RCAT(RCAT("LAPACK error: the leading minor of order %d is not positive definite,\n"
705                                "the factorization could not be completed for ", PLASMA_POSV) " in ", AX_EQ_B_CHOL) "()\n", info);
706#ifndef LINSOLVERS_RETAIN_MEMORY
707      free(buf);
708#endif
709      return 0;
710    }
711  }
712
713#ifndef LINSOLVERS_RETAIN_MEMORY
714  free(buf);
715#endif
716
717  return 1;
718}
719#endif /* HAVE_PLASMA */
720
721/*
722 * This function returns the solution of Ax = b
723 *
724 * The function employs LU decomposition:
725 * If A=L U with L lower and U upper triangular, then the original system
726 * amounts to solving
727 * L y = b, U x = y
728 *
729 * A is mxm, b is mx1
730 *
731 * The function returns 0 in case of error, 1 if successful
732 *
733 * This function is often called repetitively to solve problems of identical
734 * dimensions. To avoid repetitive malloc's and free's, allocated memory is
735 * retained between calls and free'd-malloc'ed when not of the appropriate size.
736 * A call with NULL as the first argument forces this memory to be released.
737 */
738int AX_EQ_B_LU(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
739{
740__STATIC__ LM_REAL *buf=NULL;
741__STATIC__ int buf_sz=0;
742
743int a_sz, ipiv_sz, tot_sz;
744register int i, j;
745int info, *ipiv, nrhs=1;
746LM_REAL *a;
747   
748    if(!A)
749#ifdef LINSOLVERS_RETAIN_MEMORY
750    {
751      if(buf) free(buf);
752      buf=NULL;
753      buf_sz=0;
754
755      return 1;
756    }
757#else
758      return 1; /* NOP */
759#endif /* LINSOLVERS_RETAIN_MEMORY */
760   
761    /* calculate required memory size */
762    ipiv_sz=m;
763    a_sz=m*m;
764    tot_sz=a_sz*sizeof(LM_REAL) + ipiv_sz*sizeof(int); /* should be arranged in that order for proper doubles alignment */
765
766#ifdef LINSOLVERS_RETAIN_MEMORY
767    if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
768      if(buf) free(buf); /* free previously allocated memory */
769
770      buf_sz=tot_sz;
771      buf=(LM_REAL *)malloc(buf_sz);
772      if(!buf){
773        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
774        exit(1);
775      }
776    }
777#else
778      buf_sz=tot_sz;
779      buf=(LM_REAL *)malloc(buf_sz);
780      if(!buf){
781        fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
782        exit(1);
783      }
784#endif /* LINSOLVERS_RETAIN_MEMORY */
785
786    a=buf;
787    ipiv=(int *)(a+a_sz);
788
789    /* store A (column major!) into a and B into x */
790    for(i=0; i<m; i++){
791      for(j=0; j<m; j++)
792        a[i+j*m]=A[i*m+j];
793
794      x[i]=B[i];
795    }
796
797  /* LU decomposition for A */
798  GETRF((int *)&m, (int *)&m, a, (int *)&m, ipiv, (int *)&info); 
799  if(info!=0){
800    if(info<0){
801      fprintf(stderr, RCAT(RCAT("argument %d of ", GETRF) " illegal in ", AX_EQ_B_LU) "()\n", -info);
802      exit(1);
803    }
804    else{
805      fprintf(stderr, RCAT(RCAT("singular matrix A for ", GETRF) " in ", AX_EQ_B_LU) "()\n");
806#ifndef LINSOLVERS_RETAIN_MEMORY
807      free(buf);
808#endif
809
810      return 0;
811    }
812  }
813
814  /* solve the system with the computed LU */
815  GETRS("N", (int *)&m, (int *)&nrhs, a, (int *)&m, ipiv, x, (int *)&m, (int *)&info);
816  if(info!=0){
817    if(info<0){
818      fprintf(stderr, RCAT(RCAT("argument %d of ", GETRS) " illegal in ", AX_EQ_B_LU) "()\n", -info);
819      exit(1);
820    }
821    else{
822      fprintf(stderr, RCAT(RCAT("unknown error for ", GETRS) " in ", AX_EQ_B_LU) "()\n");
823#ifndef LINSOLVERS_RETAIN_MEMORY
824      free(buf);
825#endif
826
827      return 0;
828    }
829  }
830
831#ifndef LINSOLVERS_RETAIN_MEMORY
832  free(buf);
833#endif
834
835  return 1;
836}
837
838/*
839 * This function returns the solution of Ax = b
840 *
841 * The function is based on SVD decomposition:
842 * If A=U D V^T with U, V orthogonal and D diagonal, the linear system becomes
843 * (U D V^T) x = b or x=V D^{-1} U^T b
844 * Note that V D^{-1} U^T is the pseudoinverse A^+
845 *
846 * A is mxm, b is mx1.
847 *
848 * The function returns 0 in case of error, 1 if successful
849 *
850 * This function is often called repetitively to solve problems of identical
851 * dimensions. To avoid repetitive malloc's and free's, allocated memory is
852 * retained between calls and free'd-malloc'ed when not of the appropriate size.
853 * A call with NULL as the first argument forces this memory to be released.
854 */
855int AX_EQ_B_SVD(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
856{
857__STATIC__ LM_REAL *buf=NULL;
858__STATIC__ int buf_sz=0;
859static LM_REAL eps=LM_CNST(-1.0);
860
861register int i, j;
862LM_REAL *a, *u, *s, *vt, *work;
863int a_sz, u_sz, s_sz, vt_sz, tot_sz;
864LM_REAL thresh, one_over_denom;
865register LM_REAL sum;
866int info, rank, worksz, *iwork, iworksz;
867   
868    if(!A)
869#ifdef LINSOLVERS_RETAIN_MEMORY
870    {
871      if(buf) free(buf);
872      buf=NULL;
873      buf_sz=0;
874
875      return 1;
876    }
877#else
878      return 1; /* NOP */
879#endif /* LINSOLVERS_RETAIN_MEMORY */
880   
881  /* calculate required memory size */
882#if 1 /* use optimal size */
883  worksz=-1; // workspace query. Keep in mind that GESDD requires more memory than GESVD
884  /* note that optimal work size is returned in thresh */
885  GESVD("A", "A", (int *)&m, (int *)&m, NULL, (int *)&m, NULL, NULL, (int *)&m, NULL, (int *)&m, (LM_REAL *)&thresh, (int *)&worksz, &info);
886  //GESDD("A", (int *)&m, (int *)&m, NULL, (int *)&m, NULL, NULL, (int *)&m, NULL, (int *)&m, (LM_REAL *)&thresh, (int *)&worksz, NULL, &info);
887  worksz=(int)thresh;
888#else /* use minimum size */
889  worksz=5*m; // min worksize for GESVD
890  //worksz=m*(7*m+4); // min worksize for GESDD
891#endif
892  iworksz=8*m;
893  a_sz=m*m;
894  u_sz=m*m; s_sz=m; vt_sz=m*m;
895
896  tot_sz=(a_sz + u_sz + s_sz + vt_sz + worksz)*sizeof(LM_REAL) + iworksz*sizeof(int); /* should be arranged in that order for proper doubles alignment */
897
898#ifdef LINSOLVERS_RETAIN_MEMORY
899  if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
900    if(buf) free(buf); /* free previously allocated memory */
901
902    buf_sz=tot_sz;
903    buf=(LM_REAL *)malloc(buf_sz);
904    if(!buf){
905      fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n");
906      exit(1);
907    }
908  }
909#else
910    buf_sz=tot_sz;
911    buf=(LM_REAL *)malloc(buf_sz);
912    if(!buf){
913      fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n");
914      exit(1);
915    }
916#endif /* LINSOLVERS_RETAIN_MEMORY */
917
918  a=buf;
919  u=a+a_sz;
920  s=u+u_sz;
921  vt=s+s_sz;
922  work=vt+vt_sz;
923  iwork=(int *)(work+worksz);
924
925  /* store A (column major!) into a */
926  for(i=0; i<m; i++)
927    for(j=0; j<m; j++)
928      a[i+j*m]=A[i*m+j];
929
930  /* SVD decomposition of A */
931  GESVD("A", "A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, &info);
932  //GESDD("A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, iwork, &info);
933
934  /* error treatment */
935  if(info!=0){
936    if(info<0){
937      fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GESVD), "/" GESDD) " in ", AX_EQ_B_SVD) "()\n", -info);
938      exit(1);
939    }
940    else{
941      fprintf(stderr, RCAT("LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in ", AX_EQ_B_SVD) "() [info=%d]\n", info);
942#ifndef LINSOLVERS_RETAIN_MEMORY
943      free(buf);
944#endif
945
946      return 0;
947    }
948  }
949
950  if(eps<0.0){
951    LM_REAL aux;
952
953    /* compute machine epsilon */
954    for(eps=LM_CNST(1.0); aux=eps+LM_CNST(1.0), aux-LM_CNST(1.0)>0.0; eps*=LM_CNST(0.5))
955                                          ;
956    eps*=LM_CNST(2.0);
957  }
958
959  /* compute the pseudoinverse in a */
960  for(i=0; i<a_sz; i++) a[i]=0.0; /* initialize to zero */
961  for(rank=0, thresh=eps*s[0]; rank<m && s[rank]>thresh; rank++){
962    one_over_denom=LM_CNST(1.0)/s[rank];
963
964    for(j=0; j<m; j++)
965      for(i=0; i<m; i++)
966        a[i*m+j]+=vt[rank+i*m]*u[j+rank*m]*one_over_denom;
967  }
968
969  /* compute A^+ b in x */
970  for(i=0; i<m; i++){
971    for(j=0, sum=0.0; j<m; j++)
972      sum+=a[i*m+j]*B[j];
973    x[i]=sum;
974  }
975
976#ifndef LINSOLVERS_RETAIN_MEMORY
977  free(buf);
978#endif
979
980  return 1;
981}
982
983/*
984 * This function returns the solution of Ax = b for a real symmetric matrix A
985 *
986 * The function is based on LDLT factorization with the pivoting
987 * strategy of Bunch and Kaufman:
988 * A is factored as L*D*L^T where L is lower triangular and
989 * D symmetric and block diagonal (aka spectral decomposition,
990 * Banachiewicz factorization, modified Cholesky factorization)
991 *
992 * A is mxm, b is mx1.
993 *
994 * The function returns 0 in case of error, 1 if successfull
995 *
996 * This function is often called repetitively to solve problems of identical
997 * dimensions. To avoid repetitive malloc's and free's, allocated memory is
998 * retained between calls and free'd-malloc'ed when not of the appropriate size.
999 * A call with NULL as the first argument forces this memory to be released.
1000 */
1001int AX_EQ_B_BK(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
1002{
1003__STATIC__ LM_REAL *buf=NULL;
1004__STATIC__ int buf_sz=0, nb=0;
1005
1006LM_REAL *a, *work;
1007int a_sz, ipiv_sz, work_sz, tot_sz;
1008int info, *ipiv, nrhs=1;
1009   
1010  if(!A)
1011#ifdef LINSOLVERS_RETAIN_MEMORY
1012  {
1013    if(buf) free(buf);
1014    buf=NULL;
1015    buf_sz=0;
1016
1017    return 1;
1018  }
1019#else
1020  return 1; /* NOP */
1021#endif /* LINSOLVERS_RETAIN_MEMORY */
1022
1023  /* calculate required memory size */
1024  ipiv_sz=m;
1025  a_sz=m*m;
1026  if(!nb){
1027    LM_REAL tmp;
1028
1029    work_sz=-1; // workspace query; optimal size is returned in tmp
1030    SYTRF("L", (int *)&m, NULL, (int *)&m, NULL, (LM_REAL *)&tmp, (int *)&work_sz, (int *)&info);
1031    nb=((int)tmp)/m; // optimal worksize is m*nb
1032  }
1033  work_sz=(nb!=-1)? nb*m : 1;
1034  tot_sz=(a_sz + work_sz)*sizeof(LM_REAL) + ipiv_sz*sizeof(int); /* should be arranged in that order for proper doubles alignment */
1035
1036#ifdef LINSOLVERS_RETAIN_MEMORY
1037  if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
1038    if(buf) free(buf); /* free previously allocated memory */
1039
1040    buf_sz=tot_sz;
1041    buf=(LM_REAL *)malloc(buf_sz);
1042    if(!buf){
1043      fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_BK) "() failed!\n");
1044      exit(1);
1045    }
1046  }
1047#else
1048  buf_sz=tot_sz;
1049  buf=(LM_REAL *)malloc(buf_sz);
1050  if(!buf){
1051    fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_BK) "() failed!\n");
1052    exit(1);
1053  }
1054#endif /* LINSOLVERS_RETAIN_MEMORY */
1055
1056  a=buf;
1057  work=a+a_sz;
1058  ipiv=(int *)(work+work_sz);
1059
1060  /* store A into a and B into x; A is assumed to be symmetric, hence
1061   * the column and row major order representations are the same
1062   */
1063  memcpy(a, A, a_sz*sizeof(LM_REAL));
1064  memcpy(x, B, m*sizeof(LM_REAL));
1065
1066  /* LDLt factorization for A */
1067  SYTRF("L", (int *)&m, a, (int *)&m, ipiv, work, (int *)&work_sz, (int *)&info);
1068  if(info!=0){
1069    if(info<0){
1070      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", SYTRF) " in ", AX_EQ_B_BK) "()\n", -info);
1071      exit(1);
1072    }
1073    else{
1074      fprintf(stderr, RCAT(RCAT("LAPACK error: singular block diagonal matrix D for", SYTRF) " in ", AX_EQ_B_BK)"() [D(%d, %d) is zero]\n", info, info);
1075#ifndef LINSOLVERS_RETAIN_MEMORY
1076      free(buf);
1077#endif
1078
1079      return 0;
1080    }
1081  }
1082
1083  /* solve the system with the computed factorization */
1084  SYTRS("L", (int *)&m, (int *)&nrhs, a, (int *)&m, ipiv, x, (int *)&m, (int *)&info);
1085  if(info<0){
1086    fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", SYTRS) " in ", AX_EQ_B_BK) "()\n", -info);
1087    exit(1);
1088  }
1089
1090#ifndef LINSOLVERS_RETAIN_MEMORY
1091  free(buf);
1092#endif
1093
1094  return 1;
1095}
1096
1097/* undefine all. IT MUST REMAIN IN THIS POSITION IN FILE */
1098#undef AX_EQ_B_QR
1099#undef AX_EQ_B_QRLS
1100#undef AX_EQ_B_CHOL
1101#undef AX_EQ_B_LU
1102#undef AX_EQ_B_SVD
1103#undef AX_EQ_B_BK
1104#undef AX_EQ_B_PLASMA_CHOL
1105
1106#undef GEQRF
1107#undef ORGQR
1108#undef TRTRS
1109#undef POTF2
1110#undef POTRF
1111#undef POTRS
1112#undef GETRF
1113#undef GETRS
1114#undef GESVD
1115#undef GESDD
1116#undef SYTRF
1117#undef SYTRS
1118#undef PLASMA_POSV
1119
1120#else // no LAPACK
1121
1122/* precision-specific definitions */
1123#define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU_noLapack)
1124
1125/*
1126 * This function returns the solution of Ax = b
1127 *
1128 * The function employs LU decomposition followed by forward/back substitution (see
1129 * also the LAPACK-based LU solver above)
1130 *
1131 * A is mxm, b is mx1
1132 *
1133 * The function returns 0 in case of error, 1 if successful
1134 *
1135 * This function is often called repetitively to solve problems of identical
1136 * dimensions. To avoid repetitive malloc's and free's, allocated memory is
1137 * retained between calls and free'd-malloc'ed when not of the appropriate size.
1138 * A call with NULL as the first argument forces this memory to be released.
1139 */
1140int AX_EQ_B_LU(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
1141{
1142__STATIC__ void *buf=NULL;
1143__STATIC__ int buf_sz=0;
1144
1145register int i, j, k;
1146int *idx, maxi=-1, idx_sz, a_sz, work_sz, tot_sz;
1147LM_REAL *a, *work, max, sum, tmp;
1148
1149    if(!A)
1150#ifdef LINSOLVERS_RETAIN_MEMORY
1151    {
1152      if(buf) free(buf);
1153      buf=NULL;
1154      buf_sz=0;
1155
1156      return 1;
1157    }
1158#else
1159    return 1; /* NOP */
1160#endif /* LINSOLVERS_RETAIN_MEMORY */
1161   
1162  /* calculate required memory size */
1163  idx_sz=m;
1164  a_sz=m*m;
1165  work_sz=m;
1166  tot_sz=(a_sz+work_sz)*sizeof(LM_REAL) + idx_sz*sizeof(int); /* should be arranged in that order for proper doubles alignment */
1167
1168#ifdef LINSOLVERS_RETAIN_MEMORY
1169  if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
1170    if(buf) free(buf); /* free previously allocated memory */
1171
1172    buf_sz=tot_sz;
1173    buf=(void *)malloc(tot_sz);
1174    if(!buf){
1175      fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
1176      exit(1);
1177    }
1178  }
1179#else
1180    buf_sz=tot_sz;
1181    buf=(void *)malloc(tot_sz);
1182    if(!buf){
1183      fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
1184      exit(1);
1185    }
1186#endif /* LINSOLVERS_RETAIN_MEMORY */
1187
1188  a=buf;
1189  work=a+a_sz;
1190  idx=(int *)(work+work_sz);
1191
1192  /* avoid destroying A, B by copying them to a, x resp. */
1193  memcpy(a, A, a_sz*sizeof(LM_REAL));
1194  memcpy(x, B, m*sizeof(LM_REAL));
1195
1196  /* compute the LU decomposition of a row permutation of matrix a; the permutation itself is saved in idx[] */
1197  for(i=0; i<m; ++i){
1198    max=0.0;
1199    for(j=0; j<m; ++j)
1200      if((tmp=FABS(a[i*m+j]))>max)
1201        max=tmp;
1202      if(max==0.0){
1203        fprintf(stderr, RCAT("Singular matrix A in ", AX_EQ_B_LU) "()!\n");
1204#ifndef LINSOLVERS_RETAIN_MEMORY
1205        free(buf);
1206#endif
1207
1208        return 0;
1209      }
1210      work[i]=LM_CNST(1.0)/max;
1211  }
1212
1213  for(j=0; j<m; ++j){
1214    for(i=0; i<j; ++i){
1215      sum=a[i*m+j];
1216      for(k=0; k<i; ++k)
1217        sum-=a[i*m+k]*a[k*m+j];
1218      a[i*m+j]=sum;
1219    }
1220    max=0.0;
1221    for(i=j; i<m; ++i){
1222      sum=a[i*m+j];
1223      for(k=0; k<j; ++k)
1224        sum-=a[i*m+k]*a[k*m+j];
1225      a[i*m+j]=sum;
1226      if((tmp=work[i]*FABS(sum))>=max){
1227        max=tmp;
1228        maxi=i;
1229      }
1230    }
1231    if(j!=maxi){
1232      for(k=0; k<m; ++k){
1233        tmp=a[maxi*m+k];
1234        a[maxi*m+k]=a[j*m+k];
1235        a[j*m+k]=tmp;
1236      }
1237      work[maxi]=work[j];
1238    }
1239    idx[j]=maxi;
1240    if(a[j*m+j]==0.0)
1241      a[j*m+j]=LM_REAL_EPSILON;
1242    if(j!=m-1){
1243      tmp=LM_CNST(1.0)/(a[j*m+j]);
1244      for(i=j+1; i<m; ++i)
1245        a[i*m+j]*=tmp;
1246    }
1247  }
1248
1249  /* The decomposition has now replaced a. Solve the linear system using
1250   * forward and back substitution
1251   */
1252  for(i=k=0; i<m; ++i){
1253    j=idx[i];
1254    sum=x[j];
1255    x[j]=x[i];
1256    if(k!=0)
1257      for(j=k-1; j<i; ++j)
1258        sum-=a[i*m+j]*x[j];
1259    else
1260      if(sum!=0.0)
1261        k=i+1;
1262    x[i]=sum;
1263  }
1264
1265  for(i=m-1; i>=0; --i){
1266    sum=x[i];
1267    for(j=i+1; j<m; ++j)
1268      sum-=a[i*m+j]*x[j];
1269    x[i]=sum/a[i*m+i];
1270  }
1271
1272#ifndef LINSOLVERS_RETAIN_MEMORY
1273  free(buf);
1274#endif
1275
1276  return 1;
1277}
1278
1279/* undefine all. IT MUST REMAIN IN THIS POSITION IN FILE */
1280#undef AX_EQ_B_LU
1281
1282#endif /* HAVE_LAPACK */
Note: See TracBrowser for help on using the repository browser.