Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/08/17 19:51:31 (7 years ago)
Author:
gkronber
Message:

#2789: worked on nonlinear regression with constraints

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/SymbolicRegressionConstantOptimizationEvaluator.cs

    r15311 r15313  
    2323using System.Collections.Generic;
    2424using System.Linq;
    25 using System.Runtime.Remoting.Channels;
    2625using HeuristicLab.Common;
    2726using HeuristicLab.Core;
     
    3332using HeuristicLab.Problems.DataAnalysis.Symbolic;
    3433using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
     34using DiffSharp.Interop.Float64;
    3535
    3636namespace HeuristicLab.Algorithms.DataAnalysis.Experimental {
     
    168168      // A dictionary is used to find parameters
    169169      double[] initialConstants;
    170       var parameters = new List<TreeToAutoDiffTermConverter.DataForVariable>();
    171 
    172       TreeToAutoDiffTermConverter.ParametricFunction func;
    173       TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad;
    174       TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad_for_vars;
    175       if (!TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, updateVariableWeights, out parameters, out initialConstants,
    176         out func, out func_grad, out func_grad_for_vars))
     170      var parameters = new List<TreeToDiffSharpConverter.DataForVariable>();
     171
     172      Func<DV, D> func;
     173      if (!TreeToDiffSharpConverter.TryConvertToDiffSharp(tree, updateVariableWeights, out parameters, out initialConstants,
     174        out func))
    177175        throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree.");
    178176      if (parameters.Count == 0) return 0.0; // gkronber: constant expressions always have a R² of 0.0
     
    182180      // extract inital constants
    183181      double[] c = new double[initialConstants.Length + 2];
     182      double[] s = new double[c.Length];
    184183      {
    185         c[0] = 0.0;
    186         c[1] = 1.0;
     184        c[0] = 1.0;
     185        c[1] = 0.0;
    187186        Array.Copy(initialConstants, 0, c, 2, initialConstants.Length);
    188       }
     187
     188        // s[0] = 1.0;
     189        // s[1] = 1.0;
     190        // Array.Copy(initialConstants.Select(ci=>Math.Abs(ci)).ToArray()
     191        //   , 0, s, 2, initialConstants.Length);
     192      }
     193      s = Enumerable.Repeat(1.0, c.Length).ToArray();
     194
    189195      double[] originalConstants = (double[])c.Clone();
    190196      double originalQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);
     
    195201      IDataset ds = problemData.Dataset;
    196202      double[,] x = new double[rows.Count(), parameters.Count];
    197       double[,] constraints = new double[rows.Count(), parameters.Count + 1]; // +1 for constraint for f(x)
    198       string[,] comp = string[rows.Count(), parameters.Count + 1];
     203      int col = 0;
    199204      int row = 0;
    200       foreach (var r in rows) {
    201         int col = 0;
    202         foreach (var info in parameterEntries) {
     205      foreach (var info in parameterEntries) {
     206        row = 0;
     207        foreach (var r in rows) {
    203208          if (ds.VariableHasType<double>(info.variableName)) {
    204209            x[row, col] = ds.GetDoubleValue(info.variableName, r + info.lag);
     
    207212          } else throw new InvalidProgramException("found a variable of unknown type");
    208213
    209           // find the matching df/dx column
    210           var colName = string.Format("df/d({0})", info.variableName);
    211           constraints[row, col] = ds.GetDoubleValue(colName, r);
    212 
    213           var compColName = string.Format("df/d({0}) constraint-type")
    214           comp[row, col] = ds.GetStringValue(compColName, r);
    215           col++;
    216         }
     214          row++;
     215        }
     216        col++;
     217      }
     218
     219      var target = problemData.TargetVariable;
     220      var constraintRows = Enumerable.Range(0, problemData.Dataset.Rows).Where(rIdx => double.IsNaN(ds.GetDoubleValue(target, rIdx)));
     221      double[,] constraints = new double[constraintRows.Count(), parameters.Count + 1]; // +1 for constraint for f(x)
     222      string[,] comp = new string[constraintRows.Count(), parameters.Count + 1];
     223      int eqConstraints = 0;
     224      int ieqConstraints = 0;
     225      col = 0;
     226      foreach (var info in parameterEntries) {
     227        row = 0;
     228        // find the matching df/dx column
     229        var colName = string.Format("df/d({0})", info.variableName);
     230        var compColName = string.Format("df/d({0}) constraint-type", info.variableName);
     231
     232        if (ds.VariableNames.Contains(colName)) {
     233          foreach (var r in constraintRows) {
     234            constraints[row, col] = ds.GetDoubleValue(colName, r);
     235            comp[row, col] = ds.GetStringValue(compColName, r);
     236
     237            if (comp[row, col] == "EQ") eqConstraints++;
     238            else if (comp[row, col] == "LEQ" || comp[row, col] == "GEQ") ieqConstraints++;
     239
     240            row++;
     241          }
     242        }
     243        col++;
     244      }
     245      // f(x) constraint
     246      row = 0;
     247      col = constraints.GetLength(1) - 1;
     248      foreach (var r in constraintRows) {
    217249        constraints[row, col] = ds.GetDoubleValue("f(x)", r);
    218250        comp[row, col] = ds.GetStringValue("f(x) constraint-type", r);
     251        if (comp[row, col] == "EQ") eqConstraints++;
     252        else if (comp[row, col] == "LEQ" || comp[row, col] == "GEQ") ieqConstraints++;
    219253        row++;
    220254      }
     255
     256
    221257      double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray();
    222258      int n = x.GetLength(0);
     
    224260      int k = c.Length;
    225261
    226       // alglib.ndimensional_pfunc function_cx_1_func = CreatePFunc(func);
    227       // alglib.ndimensional_pgrad function_cx_1_grad = CreatePGrad(func_grad);
    228       alglib.ndimensional_jac jac = CreateJac(x, y, constraints, comp, func, func_grad, func_grad_for_vars);
    229       double[] s = c.Select(ci=>Math.Max(Math.Abs(ci), 1E-6)).ToArray(); // use absolute value of variables as scale
     262      alglib.ndimensional_jac jac = CreateJac(x, y, constraints, comp, func, AD.Grad(func));
    230263      double rho = 1000;
    231264      int outeriters = 3;
     
    243276        alglib.minnlcsetprecexactlowrank(state, updateFreq);
    244277        // TODO set constraints;
    245         alglib.minnlcsetnlc(state, 0, 2);
     278        alglib.minnlcsetnlc(state, eqConstraints, ieqConstraints);
    246279        alglib.minnlcoptimize(state, jac, null, null);
    247280        alglib.minnlcresults(state, out c, out rep);
     
    286319      double[,] x, // x longer than y
    287320      double[] y, // only targets
     321      double[,] constraints, // df/d(xi), same order as for x
    288322      string[,] comparison, // {LEQ, GEQ, EQ }
    289       double[,] constraints, // df/d(xi), same order as for x
    290       TreeToAutoDiffTermConverter.ParametricFunction func,
    291       TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad,
    292       TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad_for_vars) {
     323      Func<DV, D> func,
     324      Func<DV, DV> func_grad) {
    293325      return (double[] c, double[] fi, double[,] jac, object o) => {
    294326        // objective function is sum of squared errors
     
    298330        Array.Clear(fi, 0, fi.Length);
    299331        Array.Clear(jac, 0, jac.Length);
    300         double[] xi = new double[nParams];
     332        var p = new double[nParams + c.Length];
     333        Array.Copy(c, 0, p, nParams, c.Length); // copy c to the end of the function parameters vector
    301334        for (int rowIdx = 0; rowIdx < nRows; rowIdx++) {
    302           // copy row
     335          // copy x_i to the beginning of the function parameters vector
    303336          for (int cIdx = 0; cIdx < nParams; cIdx++)
    304             xi[cIdx] = x[rowIdx, cIdx];
    305           var fg = func_grad(c, xi);
    306           double f = fg.Item2;
    307           double[] g = fg.Item1;
     337            p[cIdx] = x[rowIdx, cIdx];
     338
     339          double f = (double)func(p);
     340          double[] g = (double[])func_grad(p);
    308341          var e = y[rowIdx] - f;
    309342          fi[0] += e * e;
    310343          // update gradient
    311344          for (int colIdx = 0; colIdx < c.Length; colIdx++) {
    312             jac[0, colIdx] += -2 * e * g[colIdx];
     345            jac[0, colIdx] += -2 * e * g[nParams + colIdx]; // skip the elements for the variable values
    313346          }
    314347        }
    315348
    316         // constraints
    317         var nConstraintPoints = constraints.GetLength(0);
    318 
    319         // TODO: AutoDiff for gradients d/d(c) d/d(xi) f(xi,c) for all xi
    320         // converter should produce the differentials for all variables as functions which can be differentiated wrt the parameters c
    321 
     349        int fidx = 1;
     350        int constraintRows = constraints.GetLength(0);
     351
     352        // eq constraints
     353        for (int rowIdx = 0; rowIdx < constraintRows; rowIdx++) {
     354          for (int colIdx = 0; colIdx < constraints.GetLength(1); colIdx++) {
     355            if (comparison[rowIdx, colIdx] == "EQ") {
     356              throw new NotSupportedException();
     357            }
     358          }
     359        }
     360        // ineq constraints
     361        for (int rowIdx = 0; rowIdx < constraintRows; rowIdx++) {
     362          for (int colIdx = 0; colIdx < constraints.GetLength(1); colIdx++) {
     363            // there is a constraint value
     364            if (!double.IsNaN(constraints[rowIdx, colIdx]) && !string.IsNullOrEmpty(comparison[rowIdx, colIdx])) {
     365              var factor = (comparison[rowIdx, colIdx] == "LEQ") ? 1.0
     366              : comparison[rowIdx, colIdx] == "GEQ" ? -1.0 : 0.0;
     367
     368              // f(x) constraint
     369              if (colIdx == constraints.GetLength(1) - 1) {
     370                // copy x_i to the beginning of the function parameters vector
     371                for (int cIdx = 0; cIdx < nParams; cIdx++)
     372                  p[cIdx] = x[rowIdx, cIdx];
     373
     374                fi[fidx] = factor * ((double)(func(p)) - constraints[rowIdx, colIdx]);
     375                var g = (double[])func_grad(p);
     376                for (int jacIdx = 0; jacIdx < c.Length; jacIdx++) {
     377                  jac[fidx, jacIdx] = factor * g[nParams + jacIdx]; // skip the elements for the variable values
     378                }
     379                fidx++;
     380              } else {
     381                // df / dxi constraint
     382                var g = (double[])func_grad(p);
     383                fi[fidx] = factor * g[colIdx];
     384                var hess = AD.Hessian(func, p);
     385                for (int jacIdx = 0; jacIdx < c.Length; jacIdx++) {
     386                  jac[fidx, jacIdx] = factor * (double)hess[nParams + colIdx, nParams + jacIdx]; // skip the elements for the variable values
     387                }
     388                fidx++;
     389              }
     390            }
     391          }
     392        }
    322393      };
    323394    }
    324395
    325     // private static alglib.ndimensional_pfunc CreatePFunc(TreeToAutoDiffTermConverter.ParametricFunction func) {
    326     //   return (double[] c, double[] x, ref double fx, object o) => {
    327     //     fx = func(c, x);
    328     //   };
    329     // }
    330     //
    331     // private static alglib.ndimensional_pgrad CreatePGrad(TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad) {
    332     //   return (double[] c, double[] x, ref double fx, double[] grad, object o) => {
    333     //     var tupel = func_grad(c, x);
    334     //     fx = tupel.Item2;
    335     //     Array.Copy(tupel.Item1, grad, grad.Length);
    336     //   };
    337     // }
    338396    public static bool CanOptimizeConstants(ISymbolicExpressionTree tree) {
    339397      return TreeToAutoDiffTermConverter.IsCompatible(tree);
Note: See TracChangeset for help on using the changeset viewer.