///////////////////////////////////////////////////////////////////////////////// // // Levenberg - Marquardt non-linear minimization algorithm // Copyright (C) 2004 Manolis Lourakis (lourakis at ics forth gr) // Institute of Computer Science, Foundation for Research & Technology - Hellas // Heraklion, Crete, Greece. // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // ///////////////////////////////////////////////////////////////////////////////// #ifndef LM_REAL // not included by lm.c #error This file should not be compiled directly! #endif /* precision-specific definitions */ #define LEVMAR_DER LM_ADD_PREFIX(levmar_der) #define LEVMAR_DIF LM_ADD_PREFIX(levmar_dif) #define LEVMAR_FDIF_FORW_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_forw_jac_approx) #define LEVMAR_FDIF_CENT_JAC_APPROX LM_ADD_PREFIX(levmar_fdif_cent_jac_approx) #define LEVMAR_TRANS_MAT_MAT_MULT LM_ADD_PREFIX(levmar_trans_mat_mat_mult) #define LEVMAR_L2NRMXMY LM_ADD_PREFIX(levmar_L2nrmxmy) #define LEVMAR_COVAR LM_ADD_PREFIX(levmar_covar) #ifdef HAVE_LAPACK #define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU) #define AX_EQ_B_CHOL LM_ADD_PREFIX(Ax_eq_b_Chol) #define AX_EQ_B_QR LM_ADD_PREFIX(Ax_eq_b_QR) #define AX_EQ_B_QRLS LM_ADD_PREFIX(Ax_eq_b_QRLS) #define AX_EQ_B_SVD LM_ADD_PREFIX(Ax_eq_b_SVD) #define AX_EQ_B_BK LM_ADD_PREFIX(Ax_eq_b_BK) #else #define AX_EQ_B_LU LM_ADD_PREFIX(Ax_eq_b_LU_noLapack) #endif /* HAVE_LAPACK */ #ifdef HAVE_PLASMA #define AX_EQ_B_PLASMA_CHOL LM_ADD_PREFIX(Ax_eq_b_PLASMA_Chol) #endif /* * This function seeks the parameter vector p that best describes the measurements vector x. * More precisely, given a vector function func : R^m --> R^n with n>=m, * it finds p s.t. func(p) ~= x, i.e. the squared second order (i.e. L2) norm of * e=x-func(p) is minimized. * * This function requires an analytic Jacobian. In case the latter is unavailable, * use LEVMAR_DIF() bellow * * Returns the number of iterations (>=0) if successful, LM_ERROR if failed * * For more details, see K. Madsen, H.B. Nielsen and O. Tingleff's lecture notes on * non-linear least squares at http://www.imm.dtu.dk/pubdb/views/edoc_download.php/3215/pdf/imm3215.pdf */ int LEVMAR_DER( void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* functional relation describing measurements. A p \in R^m yields a \hat{x} \in R^n */ void (*jacf)(LM_REAL *p, LM_REAL *j, int m, int n, void *adata), /* function to evaluate the Jacobian \part x / \part p */ LM_REAL *p, /* I/O: initial parameter estimates. On output has the estimated solution */ LM_REAL *x, /* I: measurement vector. NULL implies a zero vector */ int m, /* I: parameter vector dimension (i.e. #unknowns) */ int n, /* I: measurement vector dimension */ int itmax, /* I: maximum number of iterations */ LM_REAL opts[4], /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3]. Respectively the scale factor for initial \mu, * stopping thresholds for ||J^T e||_inf, ||Dp||_2 and ||e||_2. Set to NULL for defaults to be used */ LM_REAL info[LM_INFO_SZ], /* O: information regarding the minimization. Set to NULL if don't care * info[0]= ||e||_2 at initial p. * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||Dp||_2, mu/max[J^T J]_ii ], all computed at estimated p. * info[5]= # iterations, * info[6]=reason for terminating: 1 - stopped by small gradient J^T e * 2 - stopped by small Dp * 3 - stopped by itmax * 4 - singular matrix. Restart from current p with increased mu * 5 - no further error reduction is possible. Restart with increased mu * 6 - stopped by small ||e||_2 * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error * info[7]= # function evaluations * info[8]= # Jacobian evaluations * info[9]= # linear systems solved, i.e. # attempts for reducing error */ LM_REAL *work, /* working memory at least LM_DER_WORKSZ() reals large, allocated if NULL */ LM_REAL *covar, /* O: Covariance matrix corresponding to LS solution; mxm. Set to NULL if not needed. */ void *adata) /* pointer to possibly additional data, passed uninterpreted to func & jacf. * Set to NULL if not needed */ { register int i, j, k, l; int worksz, freework=0, issolved; /* temp work arrays */ LM_REAL *e, /* nx1 */ *hx, /* \hat{x}_i, nx1 */ *jacTe, /* J^T e_i mx1 */ *jac, /* nxm */ *jacTjac, /* mxm */ *Dp, /* mx1 */ *diag_jacTjac, /* diagonal of J^T J, mx1 */ *pDp; /* p + Dp, mx1 */ register LM_REAL mu, /* damping constant */ tmp; /* mainly used in matrix & vector multiplications */ LM_REAL p_eL2, jacTe_inf, pDp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+Dp)||_2 */ LM_REAL p_L2, Dp_L2=LM_REAL_MAX, dF, dL; LM_REAL tau, eps1, eps2, eps2_sq, eps3; LM_REAL init_p_eL2; int nu=2, nu2, stop=0, nfev, njev=0, nlss=0; const int nm=n*m; int (*linsolver)(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)=NULL; mu=jacTe_inf=0.0; /* -Wall */ if(n0; ) jacTjac[i]=0.0; for(i=m; i-->0; ) jacTe[i]=0.0; for(l=n; l-->0; ){ jaclm=jac+l*m; for(i=m; i-->0; ){ jacTjacim=jacTjac+i*m; alpha=jaclm[i]; //jac[l*m+i]; for(j=i+1; j-->0; ) /* j<=i computes lower triangular part only */ jacTjacim[j]+=jaclm[j]*alpha; //jacTjac[i*m+j]+=jac[l*m+j]*alpha /* J^T e */ jacTe[i]+=alpha*e[l]; } } for(i=m; i-->0; ) /* copy to upper part */ for(j=i+1; jtmp) tmp=diag_jacTjac[i]; /* find max diagonal element */ mu=tau*tmp; } /* determine increment using adaptive damping */ while(1){ /* augment normal equations */ for(i=0; i=(p_L2+eps2)/(LM_CNST(EPSILON)*LM_CNST(EPSILON))){ /* almost singular */ //if(Dp_L2>=(p_L2+eps2)/LM_CNST(EPSILON)){ /* almost singular */ stop=4; break; } (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + Dp */ /* compute ||e(pDp)||_2 */ /* ### hx=x-hx, pDp_eL2=||hx|| */ #if 1 pDp_eL2=LEVMAR_L2NRMXMY(hx, x, hx, n); #else for(i=0, pDp_eL2=0.0; i0.0 && dF>0.0){ /* reduction in error, increment is accepted */ tmp=(LM_CNST(2.0)*dF/dL-LM_CNST(1.0)); tmp=LM_CNST(1.0)-tmp*tmp*tmp; mu=mu*( (tmp>=LM_CNST(ONE_THIRD))? tmp : LM_CNST(ONE_THIRD) ); nu=2; for(i=0 ; i=itmax) stop=3; for(i=0; i=10)? m: 10, updjac, updp=1, newjac; const int nm=n*m; int (*linsolver)(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)=NULL; mu=jacTe_inf=p_L2=0.0; /* -Wall */ updjac=newjac=0; /* -Wall */ if(n16) || updjac==K){ /* compute difference approximation to J */ if(using_ffdif){ /* use forward differences */ LEVMAR_FDIF_FORW_JAC_APPROX(func, p, hx, wrk, delta, jac, m, n, adata); ++njap; nfev+=m; } else{ /* use central differences */ LEVMAR_FDIF_CENT_JAC_APPROX(func, p, wrk, wrk2, delta, jac, m, n, adata); ++njap; nfev+=2*m; } nu=2; updjac=0; updp=0; newjac=1; } if(newjac){ /* Jacobian has changed, recompute J^T J, J^t e, etc */ newjac=0; /* J^T J, J^T e */ if(nm<=__BLOCKSZ__SQ){ // this is a small problem /* J^T*J_ij = \sum_l J^T_il * J_lj = \sum_l J_li * J_lj. * Thus, the product J^T J can be computed using an outer loop for * l that adds J_li*J_lj to each element ij of the result. Note that * with this scheme, the accesses to J and JtJ are always along rows, * therefore induces less cache misses compared to the straightforward * algorithm for computing the product (i.e., l loop is innermost one). * A similar scheme applies to the computation of J^T e. * However, for large minimization problems (i.e., involving a large number * of unknowns and measurements) for which J/J^T J rows are too large to * fit in the L1 cache, even this scheme incures many cache misses. In * such cases, a cache-efficient blocking scheme is preferable. * * Thanks to John Nitao of Lawrence Livermore Lab for pointing out this * performance problem. * * Note that the non-blocking algorithm is faster on small * problems since in this case it avoids the overheads of blocking. */ register int l; register LM_REAL alpha, *jaclm, *jacTjacim; /* looping downwards saves a few computations */ for(i=m*m; i-->0; ) jacTjac[i]=0.0; for(i=m; i-->0; ) jacTe[i]=0.0; for(l=n; l-->0; ){ jaclm=jac+l*m; for(i=m; i-->0; ){ jacTjacim=jacTjac+i*m; alpha=jaclm[i]; //jac[l*m+i]; for(j=i+1; j-->0; ) /* j<=i computes lower triangular part only */ jacTjacim[j]+=jaclm[j]*alpha; //jacTjac[i*m+j]+=jac[l*m+j]*alpha /* J^T e */ jacTe[i]+=alpha*e[l]; } } for(i=m; i-->0; ) /* copy to upper part */ for(j=i+1; jtmp) tmp=diag_jacTjac[i]; /* find max diagonal element */ mu=tau*tmp; } /* determine increment using adaptive damping */ /* augment normal equations */ for(i=0; i=(p_L2+eps2)/(LM_CNST(EPSILON)*LM_CNST(EPSILON))){ /* almost singular */ //if(Dp_L2>=(p_L2+eps2)/LM_CNST(EPSILON)){ /* almost singular */ stop=4; break; } (*func)(pDp, wrk, m, n, adata); ++nfev; /* evaluate function at p + Dp */ /* compute ||e(pDp)||_2 */ /* ### wrk2=x-wrk, pDp_eL2=||wrk2|| */ #if 1 pDp_eL2=LEVMAR_L2NRMXMY(wrk2, x, wrk, n); #else for(i=0, pDp_eL2=0.0; i0){ /* update jac */ for(i=0; i0.0 && dF>0.0){ /* reduction in error, increment is accepted */ tmp=(LM_CNST(2.0)*dF/dL-LM_CNST(1.0)); tmp=LM_CNST(1.0)-tmp*tmp*tmp; mu=mu*( (tmp>=LM_CNST(ONE_THIRD))? tmp : LM_CNST(ONE_THIRD) ); nu=2; for(i=0 ; i=itmax) stop=3; for(i=0; i