Changeset 18009


Ignore:
Timestamp:
07/15/21 12:07:27 (3 months ago)
Author:
bburlacu
Message:

#3087: refactor ConstantOptimizationEvaluator to use the native ParameterOptimizer

Location:
branches/3087_Ceres_Integration/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/3087_Ceres_Integration/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression-3.4.csproj

    r18006 r18009  
    102102      <SpecificVersion>False</SpecificVersion>
    103103      <HintPath>..\..\bin\ALGLIB-3.17.0.dll</HintPath>
     104      <Private>False</Private>
     105    </Reference>
     106    <Reference Include="HeuristicLab.Problems.DataAnalysis.Symbolic.NativeInterpreter-0.1, Version=0.0.0.1, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
     107      <SpecificVersion>False</SpecificVersion>
     108      <HintPath>..\..\bin\HeuristicLab.Problems.DataAnalysis.Symbolic.NativeInterpreter-0.1.dll</HintPath>
    104109      <Private>False</Private>
    105110    </Reference>
  • branches/3087_Ceres_Integration/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/SymbolicRegressionConstantOptimizationEvaluator.cs

    r18006 r18009  
    2323using System.Collections.Generic;
    2424using System.Linq;
     25
    2526using HEAL.Attic;
     27
    2628using HeuristicLab.Common;
    2729using HeuristicLab.Core;
    2830using HeuristicLab.Data;
    2931using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
     32using HeuristicLab.NativeInterpreter;
    3033using HeuristicLab.Optimization;
    3134using HeuristicLab.Parameters;
     
    209212      bool updateConstantsInTree = true, Action<double[], double, object> iterationCallback = null, EvaluationsCounter counter = null) {
    210213
    211       // Numeric constants in the tree become variables for parameter optimization.
    212       // Variables in the tree become parameters (fixed values) for parameter optimization.
    213       // For each parameter (variable in the original tree) we store the
    214       // variable name, variable value (for factor vars) and lag as a DataForVariable object.
    215       // A dictionary is used to find parameters
    216       double[] initialConstants;
    217       var parameters = new List<TreeToAutoDiffTermConverter.DataForVariable>();
    218 
    219       TreeToAutoDiffTermConverter.ParametricFunction func;
    220       TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad;
    221       if (!TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, updateVariableWeights, applyLinearScaling, out parameters, out initialConstants, out func, out func_grad))
    222         throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree.");
    223       if (parameters.Count == 0) return 0.0; // constant expressions always have a R² of 0.0
    224       var parameterEntries = parameters.ToArray(); // order of entries must be the same for x
    225 
    226       // extract inital constants
    227       double[] c;
    228       if (applyLinearScaling) {
    229         c = new double[initialConstants.Length + 2];
    230         c[0] = 0.0;
    231         c[1] = 1.0;
    232         Array.Copy(initialConstants, 0, c, 2, initialConstants.Length);
    233       } else {
    234         c = (double[])initialConstants.Clone();
    235       }
    236 
    237214      double originalQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);
    238215
    239       if (counter == null) counter = new EvaluationsCounter();
    240       var rowEvaluationsCounter = new EvaluationsCounter();
    241 
    242       alglib.lsfitstate state;
    243       alglib.lsfitreport rep;
    244       int retVal;
    245 
    246       IDataset ds = problemData.Dataset;
    247       double[,] x = new double[rows.Count(), parameters.Count];
    248       int row = 0;
    249       foreach (var r in rows) {
    250         int col = 0;
    251         foreach (var info in parameterEntries) {
    252           if (ds.VariableHasType<double>(info.variableName)) {
    253             x[row, col] = ds.GetDoubleValue(info.variableName, r + info.lag);
    254           } else if (ds.VariableHasType<string>(info.variableName)) {
    255             x[row, col] = ds.GetStringValue(info.variableName, r) == info.variableValue ? 1 : 0;
    256           } else throw new InvalidProgramException("found a variable of unknown type");
    257           col++;
    258         }
    259         row++;
    260       }
    261       double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray();
    262       int n = x.GetLength(0);
    263       int m = x.GetLength(1);
    264       int k = c.Length;
    265 
    266       alglib.ndimensional_pfunc function_cx_1_func = CreatePFunc(func);
    267       alglib.ndimensional_pgrad function_cx_1_grad = CreatePGrad(func_grad);
    268       alglib.ndimensional_rep xrep = (p, f, obj) => iterationCallback(p, f, obj);
    269 
    270       try {
    271         alglib.lsfitcreatefg(x, y, c, n, m, k, false, out state);
    272         alglib.lsfitsetcond(state, 0.0, maxIterations);
    273         alglib.lsfitsetxrep(state, iterationCallback != null);
    274         alglib.lsfitfit(state, function_cx_1_func, function_cx_1_grad, xrep, rowEvaluationsCounter);
    275         alglib.lsfitresults(state, out retVal, out c, out rep);
    276       } catch (ArithmeticException) {
    277         return originalQuality;
    278       } catch (alglib.alglibexception) {
    279         return originalQuality;
    280       }
    281 
    282       counter.FunctionEvaluations += rowEvaluationsCounter.FunctionEvaluations / n;
    283       counter.GradientEvaluations += rowEvaluationsCounter.GradientEvaluations / n;
    284 
    285       //retVal == -7  => constant optimization failed due to wrong gradient
    286       //          -8  => optimizer detected  NAN / INF  in  the target
    287       //                 function and/ or gradient
    288       if (retVal != -7 && retVal != -8) {
    289         if (applyLinearScaling) {
    290           var tmp = new double[c.Length - 2];
    291           Array.Copy(c, 2, tmp, 0, tmp.Length);
    292           UpdateConstants(tree, tmp, updateVariableWeights);
    293         } else UpdateConstants(tree, c, updateVariableWeights);
    294       }
     216      var nodesToOptimize = new HashSet<ISymbolicExpressionTreeNode>();
     217      var originalNodeValues = new Dictionary<ISymbolicExpressionTreeNode, double>();
     218
     219      foreach (var node in tree.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>()) {
     220        if (node is VariableTreeNode && !updateVariableWeights) {
     221          continue;
     222        }
     223        if (node is ConstantTreeNode && node.Parent.Symbol is Power && node.Parent.GetSubtree(0) == node) {
     224          // do not optimize exponents
     225          continue;
     226        }
     227        nodesToOptimize.Add(node);
     228        if (node is ConstantTreeNode constant) {
     229          originalNodeValues[node] = constant.Value;
     230        } else if (node is VariableTreeNode variable) {
     231          originalNodeValues[node] = variable.Weight;
     232        }
     233      }
     234
     235      var options = new SolverOptions {
     236        Iterations = maxIterations
     237      };
     238      var summary = new OptimizationSummary();
     239      var optimizedNodeValues = ParameterOptimizer.OptimizeTree(tree, problemData.Dataset, problemData.TrainingIndices, problemData.TargetVariable, nodesToOptimize, options, ref summary);
     240
     241      counter.FunctionEvaluations += summary.ResidualEvaluations;
     242      counter.GradientEvaluations += summary.JacobianEvaluations;
     243
     244      // check if the fitting of the parameters was successful
     245      UpdateNodeValues(optimizedNodeValues);
     246     
    295247      var quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);
    296 
    297       if (!updateConstantsInTree) UpdateConstants(tree, initialConstants, updateVariableWeights);
    298 
    299       if (originalQuality - quality > 0.001 || double.IsNaN(quality)) {
    300         UpdateConstants(tree, initialConstants, updateVariableWeights);
    301         return originalQuality;
    302       }
    303       return quality;
    304     }
    305 
    306     private static void UpdateConstants(ISymbolicExpressionTree tree, double[] constants, bool updateVariableWeights) {
    307       int i = 0;
    308       foreach (var node in tree.Root.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>()) {
    309         ConstantTreeNode constantTreeNode = node as ConstantTreeNode;
    310         VariableTreeNodeBase variableTreeNodeBase = node as VariableTreeNodeBase;
    311         FactorVariableTreeNode factorVarTreeNode = node as FactorVariableTreeNode;
    312         if (constantTreeNode != null) {
    313           if (constantTreeNode.Parent.Symbol is Power
    314               && constantTreeNode.Parent.GetSubtree(1) == constantTreeNode) continue; // exponents in powers are not optimizated (see TreeToAutoDiffTermConverter)
    315           constantTreeNode.Value = constants[i++];
    316         } else if (updateVariableWeights && variableTreeNodeBase != null)
    317           variableTreeNodeBase.Weight = constants[i++];
    318         else if (factorVarTreeNode != null) {
    319           for (int j = 0; j < factorVarTreeNode.Weights.Length; j++)
    320             factorVarTreeNode.Weights[j] = constants[i++];
    321         }
    322       }
    323     }
    324 
    325     private static alglib.ndimensional_pfunc CreatePFunc(TreeToAutoDiffTermConverter.ParametricFunction func) {
    326       return (double[] c, double[] x, ref double fx, object o) => {
    327         fx = func(c, x);
    328         var counter = (EvaluationsCounter)o;
    329         counter.FunctionEvaluations++;
    330       };
    331     }
    332 
    333     private static alglib.ndimensional_pgrad CreatePGrad(TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad) {
    334       return (double[] c, double[] x, ref double fx, double[] grad, object o) => {
    335         var tuple = func_grad(c, x);
    336         fx = tuple.Item2;
    337         Array.Copy(tuple.Item1, grad, grad.Length);
    338         var counter = (EvaluationsCounter)o;
    339         counter.GradientEvaluations++;
    340       };
    341     }
     248      if (quality < originalQuality || !updateConstantsInTree) {
     249        UpdateNodeValues(originalNodeValues);
     250      }
     251      return Math.Max(quality, originalQuality);
     252    }
     253
     254    private static void UpdateNodeValues(IDictionary<ISymbolicExpressionTreeNode, double> values) {
     255      foreach (var item in values) {
     256        var node = item.Key;
     257        if (node is ConstantTreeNode constant) {
     258          constant.Value = item.Value;
     259        } else if (node is VariableTreeNode variable) {
     260          variable.Weight = item.Value;
     261        }
     262      }
     263    }
     264
    342265    public static bool CanOptimizeConstants(ISymbolicExpressionTree tree) {
    343266      return TreeToAutoDiffTermConverter.IsCompatible(tree);
Note: See TracChangeset for help on using the changeset viewer.