MUSE Pipeline Reference Manual  2.1.1
muse_optimize.c
1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set sw=2 sts=2 et cin: */
3 /*
4  * This file is part of the MUSE Instrument Pipeline
5  * Copyright (C) 2005-2014 European Southern Observatory
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20  */
21 #ifdef HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24 
25 #include <stdlib.h>
26 #include <math.h>
27 #include <float.h>
28 #include <string.h>
29 #ifndef _OPENMP
30 #define omp_get_max_threads() 1
31 #else
32 #include <omp.h>
33 #endif
34 
35 /*----------------------------------------------------------------------------*
36  The following lines come from lmmin.h.
37  *----------------------------------------------------------------------------*/
38 /* User-supplied subroutines. */
39 
40 /* Compact high-level interface. */
41 
42 /* Collection of control parameters. */
43 typedef struct {
44  double ftol; /* relative error desired in the sum of squares. */
45  double xtol; /* relative error between last two approximations. */
46  double gtol; /* orthogonality desired between fvec and its derivs. */
47  double epsilon; /* step used to calculate the jacobian. */
48  double stepbound; /* initial bound to steps in the outer loop. */
49  double fnorm; /* norm of the residue vector fvec. */
50  int maxcall; /* maximum number of iterations. */
51  int nfev; /* actual number of iterations. */
52  int info; /* status of minimization. */
53 } lm_control_type;
54 
55 /* Initialize control parameters with default values. */
56 static void lm_initialize_control(lm_control_type * control);
57 
58 /* Refined calculation of Eucledian norm, typically used in printout routine. */
59 static double lm_enorm(int, double *);
60 
61 /* The actual minimization. */
62 static void lm_minimize(int m_dat, int n_par, double *par,
63  void (*evaluate) (double *par, int m_dat, double *fvec,
64  void *data, int *info),
65  void (*printout) (const char *func, int n_par,
66  double *par, int m_dat,
67  double *fvec, void *data, int iflag,
68  int iter, int nfev),
69  void *data, lm_control_type * control);
70 
71 
72 /* Legacy low-level interface. */
73 
74 /* Alternative to lm_minimize, allowing full control, and read-out
75  of auxiliary arrays. For usage, see implementation of lm_minimize. */
76 static void lm_lmdif(int m, int n, double *x, double *fvec, double ftol,
77  double xtol, double gtol, int maxfev, double epsfcn,
78  double *diag, int mode, double factor, int *info,
79  int *nfev, double *fjac, int *ipvt, double *qtf,
80  double *wa1, double *wa2, double *wa3, double *wa4,
81  void (*evaluate) (double *par, int m_dat, double *fvec,
82  void *data, int *info),
83  void (*printout) (const char *func, int n_par,
84  double *par, int m_dat,
85  double *fvec, void *data, int iflag,
86  int iter, int nfev),
87  void *data);
88 
89 /* static const char *lm_infmsg[]; */
90 /* static const char *lm_shortmsg[]; */
91 /*---------------------------------------------------------------------------*
92  End of lmmin.h.
93  *---------------------------------------------------------------------------*/
94 
95 /*---------------------------------------------------------------------------*
96  Definition of high level CPL interface.
97  *---------------------------------------------------------------------------*/
98 
99 #include "muse_optimize.h"
100 #include "muse_utils.h"
106 typedef struct {
108  int npar;
109  void *data;
110  cpl_error_code retval;
111 } muse_cpl_optimize_eval_struct;
112 
119 static void
120 lm_evaluate_cpl (double *par, int m_dat, double *fvec, void *data, int *info) {
121  muse_cpl_optimize_eval_struct *s = data;
122 
123  cpl_array *cpl_par = cpl_array_wrap_double(par, s->npar);
124  cpl_array *cpl_fvec = cpl_array_wrap_double(fvec, m_dat);
125 
126  s->retval = (*(s->func))(s->data, cpl_par, cpl_fvec);
127 
128  cpl_array_unwrap(cpl_par);
129  cpl_array_unwrap(cpl_fvec);
130 
131  if (s->retval != CPL_ERROR_NONE) {
132  *info = -1;
133  }
134 }
135 
136 static const cpl_error_code error_code_tbl[] = { // see also lm_infmsg
137  CPL_ERROR_ILLEGAL_INPUT,
138  CPL_ERROR_NONE,
139  CPL_ERROR_NONE,
140  CPL_ERROR_NONE,
141  CPL_ERROR_SINGULAR_MATRIX,
142  CPL_ERROR_CONTINUE,
143  CPL_ERROR_CONTINUE,
144  CPL_ERROR_CONTINUE,
145  CPL_ERROR_CONTINUE,
146  CPL_ERROR_ILLEGAL_OUTPUT,
147  CPL_ERROR_NONE,
148 };
149 
150 static const char *lm_infmsg[] = { /* from lmmin.c */
151  "fatal coding error (improper input parameters)",
152  "success (the relative error in the sum of squares is at most tol)",
153  "success (the relative error between x and the solution is at most tol)",
154  "success (both errors are at most tol)",
155  "trapped by degeneracy (fvec is orthogonal to the columns of the jacobian)"
156  "timeout (number of calls to fcn has reached maxcall*(n+1))",
157  "failure (ftol<tol: cannot reduce sum of squares any further)",
158  "failure (xtol<tol: cannot improve approximate solution any further)",
159  "failure (gtol<tol: cannot improve approximate solution any further)",
160  "exception (not enough memory)",
161  "exception (break requested within function evaluation)"
162 };
163 
176 static void
177 muse_cpl_optimize_print(const char *func, int n_par, double *par,
178  int m_dat, double *fvec,
179  void *data, int iflag, int iter, int nfev) {
180  if (iflag == 2) {
181  cpl_msg_debug(func, "trying step in gradient direction");
182  } else if (iflag == 1) {
183  cpl_msg_debug(func, "determining gradient (iteration %d)", iter);
184  } else if (iflag == 0) {
185  cpl_msg_debug(func, "starting minimization");
186  } else if (iflag == -1) {
187  cpl_msg_debug(func, "terminated after %d evaluations", nfev);
188  }
189 
190  char *parstring = cpl_calloc(n_par * 15 + 30, sizeof(char));
191  snprintf(parstring, 5, "par:");
192  int i;
193  for (i = 0; i < n_par; ++i) {
194  snprintf(parstring + strlen(parstring), 15, " %7.3g", par[i]);
195  }
196  snprintf(parstring + strlen(parstring), 25, " => norm: %7g",
197  lm_enorm(m_dat, fvec));
198  cpl_msg_debug(func, "%s", parstring);
199  cpl_free(parstring);
200 
201 #if 0 /* still not adopted */
202  double f, y, t;
203  muse_cpl_optimize_eval_struct *mydata
204  = (muse_cpl_optimize_eval_struct *) data;
205 
206  if (iflag == -1) {
207  cpl_msg_debug(func, " fitting data as follows:", mydata);
208  for (i = 0; i < m_dat; ++i) {
209  t = (mydata->tvec)[i];
210  y = (mydata->yvec)[i];
211  f = mydata->f(t, par);
212  cpl_msg_debug(func, " t[%2d]=%12g y=%12g fit=%12g residue=%12g",
213  i, t, y, f, y - f);
214  }
215  }
216 #else /* just to silence "unused parameter" warning: */
217  UNUSED_ARGUMENT(data);
218 #endif
219 }
220 
250 cpl_error_code
251 muse_cpl_optimize_lvmq(void *aData, cpl_array *aPar, int aSize,
252  muse_cpl_evaluate_func *aFunction,
254 
255  cpl_ensure_code(aPar != NULL, CPL_ERROR_NULL_INPUT);
256  int npars = cpl_array_get_size(aPar);
257  cpl_ensure_code(npars > 0, CPL_ERROR_ILLEGAL_INPUT);
258  cpl_ensure_code(aSize > 0, CPL_ERROR_ILLEGAL_INPUT);
259  cpl_ensure_code(aFunction != NULL, CPL_ERROR_NULL_INPUT);
260 
261  muse_cpl_optimize_eval_struct s = {
262  aFunction,
263  npars,
264  aData,
265  CPL_ERROR_NONE
266  };
267 
268  void (*debug_func) (const char *, int, double *, int, double *, void *,
269  int, int, int) = NULL;
270  lm_control_type control;
271  lm_initialize_control(&control);
272  if (aCtrl != NULL) {
273  if (aCtrl->maxcall > 0) {
274  control.maxcall = aCtrl->maxcall;
275  }
276  if (aCtrl->ftol > 0) {
277  control.ftol = aCtrl->ftol;
278  }
279  if (aCtrl->xtol > 0) {
280  control.xtol = aCtrl->xtol;
281  }
282  if (aCtrl->gtol > 0) {
283  control.gtol = aCtrl->gtol;
284  }
285  if (aCtrl->debug == CPL_TRUE) {
286  debug_func = muse_cpl_optimize_print;
287  }
288  }
289 
290  double timeinit = cpl_test_get_walltime();
291  double cpuinit = cpl_test_get_cputime();
292  lm_minimize(aSize,
293  cpl_array_get_size(aPar),
294  cpl_array_get_data_double(aPar),
295  lm_evaluate_cpl,
296  debug_func,
297  &s, &control);
298 
299  double timefini = cpl_test_get_walltime();
300  double cpufini = cpl_test_get_cputime();
301 
302  cpl_msg_debug(__func__, "Minimizing finished after %i steps: %s",
303  control.nfev / ((int)cpl_array_get_size(aPar)+1),
304  lm_infmsg[control.info]);
305 
306  cpl_msg_debug(__func__, "processing time %.3fs (%.3fs CPU, %d CPUs)",
307  timefini - timeinit, cpufini - cpuinit, omp_get_max_threads());
308 
309 
310  cpl_error_code res = error_code_tbl[control.info];
311 
312  cpl_ensure_code(res == CPL_ERROR_NONE, res);
313 
314  return s.retval;
315 }
316 
319 /*----------------------------------------------------------------------------*
320  This comes from lmmin.c.
321  *----------------------------------------------------------------------------*/
322 /*
323  * Project: LevenbergMarquardtLeastSquaresFitting
324  *
325  * File: lmmin.c
326  *
327  * Contents: Levenberg-Marquardt core implementation,
328  * and simplified user interface.
329  *
330  * Authors: Burton S. Garbow, Kenneth E. Hillstrom, Jorge J. More
331  * (lmdif and other routines from the public-domain library
332  * netlib::minpack, Argonne National Laboratories, March 1980);
333  * Steve Moshier (initial C translation);
334  * Joachim Wuttke (conversion into C++ compatible ANSI style,
335  * corrections, comments, wrappers, hosting).
336  *
337  * Homepage: www.messen-und-deuten.de/lmfit
338  *
339  * Licence: Public domain.
340  *
341  * Make: For instance: gcc -c lmmin.c; ar rc liblmmin.a lmmin.o
342  */
343 
344 /* *********************** high-level interface **************************** */
345 
346 /* machine-dependent constants from float.h */
347 #define LM_MACHEP DBL_EPSILON /* resolution of arithmetic */
348 #define LM_DWARF DBL_MIN /* smallest nonzero number */
349 #if 0
350 #define LM_SQRT_DWARF sqrt(DBL_MIN) /* square should not underflow */
351 #define LM_SQRT_GIANT sqrt(DBL_MAX) /* square should not overflow */
352 #else
353 #define LM_SQRT_DWARF 1.e-150
354 #define LM_SQRT_GIANT 1.e150
355 #endif
356 #define LM_USERTOL 30*LM_MACHEP /* users are recommened to require this */
357 
358 /* If the above values do not work, the following seem good for an x86:
359  LM_MACHEP .555e-16
360  LM_DWARF 9.9e-324
361  LM_SQRT_DWARF 1.e-160
362  LM_SQRT_GIANT 1.e150
363  LM_USER_TOL 1.e-14
364  The following values should work on any machine:
365  LM_MACHEP 1.2e-16
366  LM_DWARF 1.0e-38
367  LM_SQRT_DWARF 3.834e-20
368  LM_SQRT_GIANT 1.304e19
369  LM_USER_TOL 1.e-14
370 */
371 
372 
373 static void lm_initialize_control( lm_control_type * control ) {
374  control->maxcall = 100;
375  control->epsilon = LM_USERTOL;
376  control->stepbound = 100.;
377  control->ftol = LM_USERTOL;
378  control->xtol = LM_USERTOL;
379  control->gtol = LM_USERTOL;
380 }
381 
382 static void lm_minimize( int m_dat, int n_par, double *par,
383  void (*evaluate) (double *par, int m_dat, double *fvec,
384  void *data, int *info),
385  void (*printout) (const char *func,
386  int n_par, double *par, int m_dat,
387  double *fvec, void *data, int iflag,
388  int iter, int nfev),
389  void *data, lm_control_type * control ) {
390 
391 /* allocate work space. */
392 
393  double *fvec, *diag, *fjac, *qtf, *wa1, *wa2, *wa3, *wa4;
394  int *ipvt;
395 
396  int n = n_par;
397  int m = m_dat;
398 
399  if ( (fvec = (double *) cpl_malloc(m * sizeof(double))) == NULL ||
400  (diag = (double *) cpl_malloc(n * sizeof(double))) == NULL ||
401  (qtf = (double *) cpl_malloc(n * sizeof(double))) == NULL ||
402  (fjac = (double *) cpl_malloc(n*m*sizeof(double))) == NULL ||
403  (wa1 = (double *) cpl_malloc(n * sizeof(double))) == NULL ||
404  (wa2 = (double *) cpl_malloc(n * sizeof(double))) == NULL ||
405  (wa3 = (double *) cpl_malloc(n * sizeof(double))) == NULL ||
406  (wa4 = (double *) cpl_malloc(m * sizeof(double))) == NULL ||
407  (ipvt = (int *) cpl_malloc(n * sizeof(int) )) == NULL ) {
408  control->info = 9;
409  return;
410  }
411 
412 /* perform fit. */
413 
414  control->info = 0;
415  control->nfev = 0;
416 
417  /* this goes through the modified legacy interface: */
418  lm_lmdif( m, n, par, fvec, control->ftol, control->xtol, control->gtol,
419  control->maxcall * (n + 1), control->epsilon, diag, 1,
420  control->stepbound, &(control->info),
421  &(control->nfev), fjac, ipvt, qtf, wa1, wa2, wa3, wa4,
422  evaluate, printout, data );
423 
424  if ( printout )
425  (*printout) (__func__, n, par, m, fvec, data, -1, 0, control->nfev);
426  control->fnorm = lm_enorm(m, fvec);
427  if ( control->info < 0 )
428  control->info = 10;
429 
430 /* clean up. */
431 
432  cpl_free(fvec);
433  cpl_free(diag);
434  cpl_free(qtf);
435  cpl_free(fjac);
436  cpl_free(wa1);
437  cpl_free(wa2);
438  cpl_free(wa3);
439  cpl_free(wa4);
440  cpl_free(ipvt);
441 } /* lm_minimize. */
442 
443 
444 /* the following messages are indexed by the variable info. */
445 
446 #if 0 /* moved definition of lm_infmsg to the beginning of the file */
447 static const char *lm_infmsg[] = {
448  "fatal coding error (improper input parameters)",
449  "success (the relative error in the sum of squares is at most tol)",
450  "success (the relative error between x and the solution is at most tol)",
451  "success (both errors are at most tol)",
452  "trapped by degeneracy (fvec is orthogonal to the columns of the jacobian)"
453  "timeout (number of calls to fcn has reached maxcall*(n+1))",
454  "failure (ftol<tol: cannot reduce sum of squares any further)",
455  "failure (xtol<tol: cannot improve approximate solution any further)",
456  "failure (gtol<tol: cannot improve approximate solution any further)",
457  "exception (not enough memory)",
458  "exception (break requested within function evaluation)"
459 };
460 
461 static const char *lm_shortmsg[] = {
462  "invalid input",
463  "success (f)",
464  "success (p)",
465  "success (f,p)",
466  "degenerate",
467  "call limit",
468  "failed (f)",
469  "failed (p)",
470  "failed (o)",
471  "no memory",
472  "user break"
473 };
474 #endif
475 
476 
477 /* ************************** implementation ******************************* */
478 
479 #define BUG 0
480 #if BUG
481 #include <stdio.h>
482 #endif
483 
484 static void lm_qrfac(int m, int n, double *a, int pivot, int *ipvt,
485  double *rdiag, double *acnorm, double *wa);
486 static void lm_qrsolv(int n, double *r, int ldr, int *ipvt, double *diag,
487  double *qtb, double *x, double *sdiag, double *wa);
488 static void lm_lmpar(int n, double *r, int ldr, int *ipvt, double *diag,
489  double *qtb, double delta, double *par, double *x,
490  double *sdiag, double *wa1, double *wa2);
491 
492 #define MIN(a,b) (((a)<=(b)) ? (a) : (b))
493 #define MAX(a,b) (((a)>=(b)) ? (a) : (b))
494 #define SQR(x) (x)*(x)
495 
496 
497 /* the low-level legacy interface for full control. */
498 
673 static void lm_lmdif(int m, int n, double *x, double *fvec, double ftol,
674  double xtol, double gtol, int maxfev, double epsfcn,
675  double *diag, int mode, double factor, int *info,
676  int *nfev, double *fjac, int *ipvt, double *qtf,
677  double *wa1, double *wa2, double *wa3, double *wa4,
678  void (*evaluate) (double *par, int m_dat, double *fvec,
679  void *data, int *info),
680  void (*printout) (const char *func, int n_par, double *par, int m_dat,
681  double *fvec, void *data, int iflag,
682  int iter, int nfev),
683  void *data)
684 {
685  int iter, j;
686  double actred, delta, dirder, eps, fnorm, fnorm1, gnorm, par, pnorm,
687  prered, ratio, sum, temp, temp1, temp2, temp3, xnorm;
688  static double p1 = 0.1;
689  static double p0001 = 1.0e-4;
690 
691  *nfev = 0; /* function evaluation counter */
692  iter = 1; /* outer loop counter */
693  par = 0; /* levenberg-marquardt parameter */
694  delta = 0; /* to prevent a warning (initialization within if-clause) */
695  xnorm = 0; /* ditto */
696  temp = MAX(epsfcn, LM_MACHEP);
697  eps = sqrt(temp); /* for calculating the Jacobian by forward differences */
698 
699 /* lmdif: check input parameters for errors. */
700 
701  if ((n <= 0) || (m < n) || (ftol < 0.)
702  || (xtol < 0.) || (gtol < 0.) || (maxfev <= 0) || (factor <= 0.)) {
703  *info = 0; // invalid parameter
704  return;
705  }
706  if (mode == 2) { /* scaling by diag[] */
707  for (j = 0; j < n; j++) { /* check for nonpositive elements */
708  if (diag[j] <= 0.0) {
709  *info = 0; // invalid parameter
710  return;
711  }
712  }
713  }
714 #if BUG
715  printf("lmdif\n");
716 #endif
717 
718 /* lmdif: evaluate function at starting point and calculate norm. */
719 
720  *info = 0;
721  (*evaluate) (x, m, fvec, data, info); ++(*nfev);
722  if( printout )
723  (*printout) (__func__, n, x, m, fvec, data, 0, 0, *nfev);
724  if (*info < 0)
725  return;
726  fnorm = lm_enorm(m, fvec);
727 
728 /* lmdif: the outer loop. */
729 
730  do {
731 #if BUG
732  cpl_msg_debug(__func__, "iter=%d nfev=%d fnorm=%g\n",
733  iter, *nfev, fnorm);
734 #endif
735 
736 /* outer: calculate the jacobian matrix. */
737  *info = 0;
738 #if 1
739  #pragma omp parallel for default(none) /* as req. by Ralf */ \
740  shared(n, info, m, x, eps, evaluate, data, printout, nfev, iter, \
741  fjac, fvec)
742 #endif
743  for (j = 0; j < n; j++) {
744  if (*info < 0)
745  continue;
746  double *wa5 = (double *) cpl_malloc(m * sizeof(double));
747  double *xd = (double *) cpl_malloc(n * sizeof(double));
748  memcpy(xd, x, n * sizeof(double));
749  double step = eps * fabs(xd[j]);
750  if (step == 0.)
751  step = eps;
752  xd[j] += step;
753  int infod = 0;
754  (*evaluate) (xd, m, wa5, data, &infod); ++(*nfev);
755  if( printout )
756  (*printout) (__func__, n, xd, m, wa5, data, 1, iter, *nfev);
757  if (infod < 0) {/* user requested break */
758  *info = infod;
759  cpl_free(wa5);
760  cpl_free(xd);
761  continue;
762  }
763  int i;
764  double diff = xd[j] - x[j];
765  double *fjacm = fjac + j * m;
766  for (i = 0; i < m; i++) { /* changed in 2.3, Mark Bydder */
767  fjacm[i] = (wa5[i] - fvec[i]) / diff;
768  }
769  cpl_free(wa5);
770  cpl_free(xd);
771  }
772  if (*info < 0)
773  return; /* user requested break */
774 
775 #if BUG>1
776  /* DEBUG: print the entire matrix */
777  {
778  int i;
779  for (i = 0; i < m; i++) {
780  for (j = 0; j < n; j++)
781  printf("%.5e ", fjac[j * m + i]);
782  printf("\n");
783  }
784  }
785 #endif
786 
787 /* outer: compute the qr factorization of the jacobian. */
788 
789  lm_qrfac(m, n, fjac, 1, ipvt, wa1, wa2, wa3);
790 
791  if (iter == 1) { /* first iteration */
792  if (mode != 2) {
793  /* diag := norms of the columns of the initial jacobian */
794  for (j = 0; j < n; j++) {
795  diag[j] = wa2[j];
796  if (wa2[j] == 0.)
797  diag[j] = 1.;
798  }
799  }
800  /* use diag to scale x, then calculate the norm */
801  for (j = 0; j < n; j++)
802  wa3[j] = diag[j] * x[j];
803  xnorm = lm_enorm(n, wa3);
804  /* initialize the step bound delta. */
805  delta = factor * xnorm;
806  if (delta == 0.)
807  delta = factor;
808  }
809 
810 /* outer: form (q transpose)*fvec and store first n components in qtf. */
811 
812  int i;
813  for (i = 0; i < m; i++)
814  wa4[i] = fvec[i];
815 
816  for (j = 0; j < n; j++) {
817  temp3 = fjac[j * m + j];
818  if (temp3 != 0.) {
819  sum = 0;
820  for (i = j; i < m; i++)
821  sum += fjac[j * m + i] * wa4[i];
822  temp = -sum / temp3;
823  for (i = j; i < m; i++)
824  wa4[i] += fjac[j * m + i] * temp;
825  }
826  fjac[j * m + j] = wa1[j];
827  qtf[j] = wa4[j];
828  }
829 
830 /* outer: compute norm of scaled gradient and test for convergence. */
831 
832  gnorm = 0;
833  if (fnorm != 0) {
834  for (j = 0; j < n; j++) {
835  if (wa2[ipvt[j]] == 0)
836  continue;
837 
838  sum = 0.;
839  for (i = 0; i <= j; i++)
840  sum += fjac[j * m + i] * qtf[i] / fnorm;
841  gnorm = MAX(gnorm, fabs(sum / wa2[ipvt[j]]));
842  }
843  }
844 
845  if (gnorm <= gtol) {
846  *info = 4;
847  return;
848  }
849 
850 /* outer: rescale if necessary. */
851 
852  if (mode != 2) {
853  for (j = 0; j < n; j++)
854  diag[j] = MAX(diag[j], wa2[j]);
855  }
856 
857 /* the inner loop. */
858  do {
859 #if BUG
860  printf("lmdif/ inner loop iter=%d nfev=%d\n", iter, *nfev);
861 #endif
862 
863 /* inner: determine the levenberg-marquardt parameter. */
864 
865  lm_lmpar(n, fjac, m, ipvt, diag, qtf, delta, &par,
866  wa1, wa2, wa3, wa4);
867 
868 /* inner: store the direction p and x + p; calculate the norm of p. */
869 
870  for (j = 0; j < n; j++) {
871  wa1[j] = -wa1[j];
872  wa2[j] = x[j] + wa1[j];
873  wa3[j] = diag[j] * wa1[j];
874  }
875  pnorm = lm_enorm(n, wa3);
876 
877 /* inner: on the first iteration, adjust the initial step bound. */
878 
879  if (*nfev <= 1 + n)
880  delta = MIN(delta, pnorm);
881 
882  /* evaluate the function at x + p and calculate its norm. */
883 
884  *info = 0;
885  (*evaluate) (wa2, m, wa4, data, info); ++(*nfev);
886  if( printout )
887  (*printout) (__func__, n, x, m, wa4, data, 2, iter, *nfev);
888  if (*info < 0)
889  return; /* user requested break. */
890 
891  fnorm1 = lm_enorm(m, wa4);
892 #if BUG
893  printf("lmdif/ pnorm %.10e fnorm1 %.10e fnorm %.10e"
894  " delta=%.10e par=%.10e\n",
895  pnorm, fnorm1, fnorm, delta, par);
896 #endif
897 
898 /* inner: compute the scaled actual reduction. */
899 
900  if (p1 * fnorm1 < fnorm)
901  actred = 1 - SQR(fnorm1 / fnorm);
902  else
903  actred = -1;
904 
905 /* inner: compute the scaled predicted reduction and
906  the scaled directional derivative. */
907 
908  for (j = 0; j < n; j++) {
909  wa3[j] = 0;
910  for (i = 0; i <= j; i++)
911  wa3[i] += fjac[j * m + i] * wa1[ipvt[j]];
912  }
913  temp1 = lm_enorm(n, wa3) / fnorm;
914  temp2 = sqrt(par) * pnorm / fnorm;
915  prered = SQR(temp1) + 2 * SQR(temp2);
916  dirder = -(SQR(temp1) + SQR(temp2));
917 
918 /* inner: compute the ratio of the actual to the predicted reduction. */
919 
920  ratio = prered != 0 ? actred / prered : 0;
921 #if BUG
922  printf("lmdif/ actred=%.10e prered=%.10e ratio=%.10e"
923  " sq(1)=%.10e sq(2)=%.10e dd=%.10e\n",
924  actred, prered, prered != 0 ? ratio : 0.,
925  SQR(temp1), SQR(temp2), dirder);
926 #endif
927 
928 /* inner: update the step bound. */
929 
930  if (ratio <= 0.25) {
931  if (actred >= 0.)
932  temp = 0.5;
933  else
934  temp = 0.5 * dirder / (dirder + 0.55 * actred);
935  if (p1 * fnorm1 >= fnorm || temp < p1)
936  temp = p1;
937  delta = temp * MIN(delta, pnorm / p1);
938  par /= temp;
939  } else if (par == 0. || ratio >= 0.75) {
940  delta = pnorm / 0.5;
941  par *= 0.5;
942  }
943 
944 /* inner: test for successful iteration. */
945 
946  if (ratio >= p0001) {
947  /* yes, success: update x, fvec, and their norms. */
948  for (j = 0; j < n; j++) {
949  x[j] = wa2[j];
950  wa2[j] = diag[j] * x[j];
951  }
952  for (i = 0; i < m; i++)
953  fvec[i] = wa4[i];
954  xnorm = lm_enorm(n, wa2);
955  fnorm = fnorm1;
956  iter++;
957  }
958 #if BUG
959  else {
960  printf("ATTN: iteration considered unsuccessful\n");
961  }
962 #endif
963 
964 /* inner: tests for convergence ( otherwise *info = 1, 2, or 3 ). */
965 
966  *info = 0; /* do not terminate (unless overwritten by nonzero) */
967  if (fabs(actred) <= ftol && prered <= ftol && 0.5 * ratio <= 1)
968  *info = 1;
969  if (delta <= xtol * xnorm)
970  *info += 2;
971  if (*info != 0)
972  return;
973 
974 /* inner: tests for termination and stringent tolerances. */
975 
976  if (*nfev >= maxfev)
977  *info = 5;
978  if (fabs(actred) <= LM_MACHEP &&
979  prered <= LM_MACHEP && 0.5 * ratio <= 1)
980  *info = 6;
981  if (delta <= LM_MACHEP * xnorm)
982  *info = 7;
983  if (gnorm <= LM_MACHEP)
984  *info = 8;
985  if (*info != 0)
986  return;
987 
988 /* inner: end of the loop. repeat if iteration unsuccessful. */
989 
990  } while (ratio < p0001);
991 
992 /* outer: end of the loop. */
993 
994  } while (1);
995 
996 } /* lm_lmdif. */
997 
1068 static void lm_lmpar(int n, double *r, int ldr, int *ipvt, double *diag,
1069  double *qtb, double delta, double *par, double *x,
1070  double *sdiag, double *wa1, double *wa2)
1071 {
1072  int i, iter, j, nsing;
1073  double dxnorm, fp, fp_old, gnorm, parc, parl, paru;
1074  double sum, temp;
1075  static double p1 = 0.1;
1076 
1077 #if BUG
1078  printf("lmpar\n");
1079 #endif
1080 
1081 /* lmpar: compute and store in x the gauss-newton direction. if the
1082  jacobian is rank-deficient, obtain a least squares solution. */
1083 
1084  nsing = n;
1085  for (j = 0; j < n; j++) {
1086  wa1[j] = qtb[j];
1087  if (r[j * ldr + j] == 0 && nsing == n)
1088  nsing = j;
1089  if (nsing < n)
1090  wa1[j] = 0;
1091  }
1092 #if BUG
1093  printf("nsing %d ", nsing);
1094 #endif
1095  for (j = nsing - 1; j >= 0; j--) {
1096  wa1[j] = wa1[j] / r[j + ldr * j];
1097  temp = wa1[j];
1098  for (i = 0; i < j; i++)
1099  wa1[i] -= r[j * ldr + i] * temp;
1100  }
1101 
1102  for (j = 0; j < n; j++)
1103  x[ipvt[j]] = wa1[j];
1104 
1105 /* lmpar: initialize the iteration counter, evaluate the function at the
1106  origin, and test for acceptance of the gauss-newton direction. */
1107 
1108  iter = 0;
1109  for (j = 0; j < n; j++)
1110  wa2[j] = diag[j] * x[j];
1111  dxnorm = lm_enorm(n, wa2);
1112  fp = dxnorm - delta;
1113  if (fp <= p1 * delta) {
1114 #if BUG
1115  printf("lmpar/ terminate (fp<p1*delta)\n");
1116 #endif
1117  *par = 0;
1118  return;
1119  }
1120 
1121 /* lmpar: if the jacobian is not rank deficient, the newton
1122  step provides a lower bound, parl, for the 0. of
1123  the function. otherwise set this bound to 0.. */
1124 
1125  parl = 0;
1126  if (nsing >= n) {
1127  for (j = 0; j < n; j++)
1128  wa1[j] = diag[ipvt[j]] * wa2[ipvt[j]] / dxnorm;
1129 
1130  for (j = 0; j < n; j++) {
1131  sum = 0.;
1132  for (i = 0; i < j; i++)
1133  sum += r[j * ldr + i] * wa1[i];
1134  wa1[j] = (wa1[j] - sum) / r[j + ldr * j];
1135  }
1136  temp = lm_enorm(n, wa1);
1137  parl = fp / delta / temp / temp;
1138  }
1139 
1140 /* lmpar: calculate an upper bound, paru, for the 0. of the function. */
1141 
1142  for (j = 0; j < n; j++) {
1143  sum = 0;
1144  for (i = 0; i <= j; i++)
1145  sum += r[j * ldr + i] * qtb[i];
1146  wa1[j] = sum / diag[ipvt[j]];
1147  }
1148  gnorm = lm_enorm(n, wa1);
1149  paru = gnorm / delta;
1150  if (paru == 0.)
1151  paru = LM_DWARF / MIN(delta, p1);
1152 
1153 /* lmpar: if the input par lies outside of the interval (parl,paru),
1154  set par to the closer endpoint. */
1155 
1156  *par = MAX(*par, parl);
1157  *par = MIN(*par, paru);
1158  if (*par == 0.)
1159  *par = gnorm / dxnorm;
1160 #if BUG
1161  printf("lmpar/ parl %.4e par %.4e paru %.4e\n", parl, *par, paru);
1162 #endif
1163 
1164 /* lmpar: iterate. */
1165 
1166  for (;; iter++) {
1167 
1168  /* evaluate the function at the current value of par. */
1169 
1170  if (*par == 0.)
1171  *par = MAX(LM_DWARF, 0.001 * paru);
1172  temp = sqrt(*par);
1173  for (j = 0; j < n; j++)
1174  wa1[j] = temp * diag[j];
1175  lm_qrsolv(n, r, ldr, ipvt, wa1, qtb, x, sdiag, wa2);
1176  for (j = 0; j < n; j++)
1177  wa2[j] = diag[j] * x[j];
1178  dxnorm = lm_enorm(n, wa2);
1179  fp_old = fp;
1180  fp = dxnorm - delta;
1181 
1182  /* if the function is small enough, accept the current value
1183  of par. Also test for the exceptional cases where parl
1184  is zero or the number of iterations has reached 10. */
1185 
1186  if (fabs(fp) <= p1 * delta
1187  || (parl == 0. && fp <= fp_old && fp_old < 0.)
1188  || iter == 10)
1189  break; /* the only exit from the iteration. */
1190 
1191  /* compute the Newton correction. */
1192 
1193  for (j = 0; j < n; j++)
1194  wa1[j] = diag[ipvt[j]] * wa2[ipvt[j]] / dxnorm;
1195 
1196  for (j = 0; j < n; j++) {
1197  wa1[j] = wa1[j] / sdiag[j];
1198  for (i = j + 1; i < n; i++)
1199  wa1[i] -= r[j * ldr + i] * wa1[j];
1200  }
1201  temp = lm_enorm(n, wa1);
1202  parc = fp / delta / temp / temp;
1203 
1204  /* depending on the sign of the function, update parl or paru. */
1205 
1206  if (fp > 0)
1207  parl = MAX(parl, *par);
1208  else if (fp < 0)
1209  paru = MIN(paru, *par);
1210  /* the case fp==0 is precluded by the break condition */
1211 
1212  /* compute an improved estimate for par. */
1213 
1214  *par = MAX(parl, *par + parc);
1215 
1216  }
1217 
1218 } /* lm_lmpar. */
1219 
1270 static void lm_qrfac(int m, int n, double *a, int pivot, int *ipvt,
1271  double *rdiag, double *acnorm, double *wa)
1272 {
1273  int i, j, k, minmn;
1274  double ajnorm;
1275  static double p05 = 0.05;
1276 
1277 /* qrfac: compute initial column norms and initialize several arrays. */
1278 
1279  for (j = 0; j < n; j++) {
1280  acnorm[j] = lm_enorm(m, &a[j * m]);
1281  rdiag[j] = acnorm[j];
1282  wa[j] = rdiag[j];
1283  if (pivot)
1284  ipvt[j] = j;
1285  }
1286 #if BUG
1287  printf("qrfac\n");
1288 #endif
1289 
1290 /* qrfac: reduce a to r with householder transformations. */
1291 
1292  minmn = MIN(m, n);
1293  for (j = 0; j < minmn; j++) {
1294  double *am = a + j *m;
1295  if (pivot) {
1296 
1297  /* bring the column of largest norm into the pivot position. */
1298 
1299  int kmax = j;
1300  for (k = j + 1; k < n; k++)
1301  if (rdiag[k] > rdiag[kmax])
1302  kmax = k;
1303  if (kmax != j) {
1304  double *akmaxm = a + kmax * m;
1305  for (i = 0; i < m; i++) {
1306  double temp = am[i];
1307  am[i] = akmaxm[i];
1308  akmaxm[i] = temp;
1309  }
1310  rdiag[kmax] = rdiag[j];
1311  wa[kmax] = wa[j];
1312  int ktmp = ipvt[j];
1313  ipvt[j] = ipvt[kmax];
1314  ipvt[kmax] = ktmp;
1315  }
1316  }
1317  /* compute the Householder transformation to reduce the
1318  j-th column of a to a multiple of the j-th unit vector. */
1319 
1320  ajnorm = lm_enorm(m - j, &am[j]);
1321  if (ajnorm == 0.) {
1322  rdiag[j] = 0;
1323  continue;
1324  }
1325 
1326  if (am[j] < 0.)
1327  ajnorm = -ajnorm;
1328  for (i = j; i < m; i++)
1329  am[i] /= ajnorm;
1330  am[j] += 1;
1331 
1332  /* apply the transformation to the remaining columns
1333  and update the norms. */
1334 
1335  for (k = j + 1; k < n; k++) {
1336  double sum = 0;
1337  double *akm = a + k * m;
1338 
1339  for (i = j; i < m; i++)
1340  sum += am[i] * akm[i];
1341 
1342  double temp = sum / am[j];
1343 
1344  for (i = j; i < m; i++)
1345  akm[i] -= temp * am[i];
1346 
1347  if (pivot && rdiag[k] != 0.) {
1348  temp = akm[j] / rdiag[k];
1349  temp = MAX(0., 1 - temp * temp);
1350  rdiag[k] *= sqrt(temp);
1351  temp = rdiag[k] / wa[k];
1352  if (p05 * SQR(temp) <= LM_MACHEP) {
1353  rdiag[k] = lm_enorm(m - j - 1, &akm[j + 1]);
1354  wa[k] = rdiag[k];
1355  }
1356  }
1357  }
1358 
1359  rdiag[j] = -ajnorm;
1360  }
1361 }
1362 
1424 static void
1425 lm_qrsolv(int n, double *r, int ldr, int *ipvt, double *diag,
1426  double *qtb, double *x, double *sdiag, double *wa)
1427 {
1428  int i, kk, j, k, nsing;
1429  double qtbpj, sum, temp;
1430  double _sin, _cos, _tan, _cot; /* local variables, not functions */
1431 
1432 /* qrsolv: copy r and (q transpose)*b to preserve input and initialize s.
1433  in particular, save the diagonal elements of r in x. */
1434 
1435  for (j = 0; j < n; j++) {
1436  for (i = j; i < n; i++)
1437  r[j * ldr + i] = r[i * ldr + j];
1438  x[j] = r[j * ldr + j];
1439  wa[j] = qtb[j];
1440  }
1441 #if BUG
1442  printf("qrsolv\n");
1443 #endif
1444 
1445 /* qrsolv: eliminate the diagonal matrix d using a givens rotation. */
1446 
1447  for (j = 0; j < n; j++) {
1448 
1449 /* qrsolv: prepare the row of d to be eliminated, locating the
1450  diagonal element using p from the qr factorization. */
1451 
1452  if (diag[ipvt[j]] == 0.)
1453  goto L90;
1454  for (k = j; k < n; k++)
1455  sdiag[k] = 0.;
1456  sdiag[j] = diag[ipvt[j]];
1457 
1458 /* qrsolv: the transformations to eliminate the row of d modify only
1459  a single element of (q transpose)*b beyond the first n, which is
1460  initially 0.. */
1461 
1462  qtbpj = 0.;
1463  for (k = j; k < n; k++) {
1464 
1465  /* determine a givens rotation which eliminates the
1466  appropriate element in the current row of d. */
1467 
1468  if (sdiag[k] == 0.)
1469  continue;
1470  kk = k + ldr * k;
1471  if (fabs(r[kk]) < fabs(sdiag[k])) {
1472  _cot = r[kk] / sdiag[k];
1473  _sin = 1 / sqrt(1 + SQR(_cot));
1474  _cos = _sin * _cot;
1475  } else {
1476  _tan = sdiag[k] / r[kk];
1477  _cos = 1 / sqrt(1 + SQR(_tan));
1478  _sin = _cos * _tan;
1479  }
1480 
1481  /* compute the modified diagonal element of r and
1482  the modified element of ((q transpose)*b,0). */
1483 
1484  r[kk] = _cos * r[kk] + _sin * sdiag[k];
1485  temp = _cos * wa[k] + _sin * qtbpj;
1486  qtbpj = -_sin * wa[k] + _cos * qtbpj;
1487  wa[k] = temp;
1488 
1489  /* accumulate the tranformation in the row of s. */
1490 
1491  for (i = k + 1; i < n; i++) {
1492  temp = _cos * r[k * ldr + i] + _sin * sdiag[i];
1493  sdiag[i] = -_sin * r[k * ldr + i] + _cos * sdiag[i];
1494  r[k * ldr + i] = temp;
1495  }
1496  }
1497 
1498  L90:
1499  /* store the diagonal element of s and restore
1500  the corresponding diagonal element of r. */
1501 
1502  sdiag[j] = r[j * ldr + j];
1503  r[j * ldr + j] = x[j];
1504  }
1505 
1506 /* qrsolv: solve the triangular system for z. if the system is
1507  singular, then obtain a least squares solution. */
1508 
1509  nsing = n;
1510  for (j = 0; j < n; j++) {
1511  if (sdiag[j] == 0. && nsing == n)
1512  nsing = j;
1513  if (nsing < n)
1514  wa[j] = 0;
1515  }
1516 
1517  for (j = nsing - 1; j >= 0; j--) {
1518  sum = 0;
1519  for (i = j + 1; i < nsing; i++)
1520  sum += r[j * ldr + i] * wa[i];
1521  wa[j] = (wa[j] - sum) / sdiag[j];
1522  }
1523 
1524 /* qrsolv: permute the components of z back to components of x. */
1525 
1526  for (j = 0; j < n; j++)
1527  x[ipvt[j]] = wa[j];
1528 
1529 } /* lm_qrsolv. */
1530 
1547 static double
1548 lm_enorm(int n, double *x) {
1549  int i;
1550  double agiant, s1, s2, s3, xabs, x1max, x3max, temp;
1551 
1552  s1 = 0;
1553  s2 = 0;
1554  s3 = 0;
1555  x1max = 0;
1556  x3max = 0;
1557  agiant = LM_SQRT_GIANT / ((double) n);
1558 
1559  /* sum squares. */
1560  for (i = 0; i < n; i++) {
1561  xabs = fabs(x[i]);
1562  if (xabs > LM_SQRT_DWARF && xabs < agiant) {
1563  /* sum for intermediate components. */
1564  s2 += xabs * xabs;
1565  continue;
1566  }
1567 
1568  if (xabs > LM_SQRT_DWARF) {
1569  /* sum for large components. */
1570  if (xabs > x1max) {
1571  temp = x1max / xabs;
1572  s1 = 1 + s1 * SQR(temp);
1573  x1max = xabs;
1574  } else {
1575  temp = xabs / x1max;
1576  s1 += SQR(temp);
1577  }
1578  continue;
1579  }
1580  /* sum for small components. */
1581  if (xabs > x3max) {
1582  temp = x3max / xabs;
1583  s3 = 1 + s3 * SQR(temp);
1584  x3max = xabs;
1585  } else {
1586  if (xabs != 0.) {
1587  temp = xabs / x3max;
1588  s3 += SQR(temp);
1589  }
1590  }
1591  }
1592 
1593  /* calculation of norm. */
1594 
1595  if (s1 != 0)
1596  return x1max * sqrt(s1 + (s2 / x1max) / x1max);
1597  if (s2 != 0) {
1598  if (s2 >= x3max)
1599  return sqrt(s2 * (1 + (x3max / s2) * (x3max * s3)));
1600  else
1601  return sqrt(x3max * ((s2 / x3max) + (x3max * s3)));
1602  }
1603 
1604  return x3max * sqrt(s3);
1605 
1606 } /* lm_enorm. */
1607 
cpl_error_code muse_cpl_optimize_lvmq(void *aData, cpl_array *aPar, int aSize, muse_cpl_evaluate_func *aFunction, muse_cpl_optimize_control_t *aCtrl)
Minimize a function with the Levenberg-Marquardt algorithm.
int debug
Flag to switch on debugging messages. Default value: CPL_FALSE.
Definition: muse_optimize.h:70
int maxcall
Maximum number of iterations. Default value (when set to -1): 100.
Definition: muse_optimize.h:65
double xtol
Relative error between last two approximations. Default value (when set to -1): 30 * DBL_EPSILON...
Definition: muse_optimize.h:50
double ftol
Relative error desired in the sum of squares. Default value (when set to -1): 30 * DBL_EPSILON...
Definition: muse_optimize.h:41
double gtol
Orthogonality desired between fvec and its derivs. Default value (when set to -1): 30 * DBL_EPSILON...
Definition: muse_optimize.h:60
Optimization control parameters.
Definition: muse_optimize.h:32
cpl_error_code( muse_cpl_evaluate_func)(void *aData, cpl_array *aPar, cpl_array *aRetval)
User provided function to evaluate in muse_cpl_optimize_lvmq().
Definition: muse_optimize.h:88
static void muse_cpl_optimize_print(const char *func, int n_par, double *par, int m_dat, double *fvec, void *data, int iflag, int iter, int nfev)
Default printout method adopted to CPL.