Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
03/10/20 08:28:49 (5 years ago)
Author:
pfleck
Message:

#3040 Moved Alglib+AutoDiff constant optimizer in own class and created base class to provide multiple constant-opt implementations.

Location:
branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective
Files:
2 edited
1 copied

Legend:

Unmodified
Added
Removed
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/ConstantOptimizationAnalyzer.cs

    r17180 r17472  
    9393      : base() {
    9494      Parameters.Add(new FixedValueParameter<PercentValue>(PercentageOfBestSolutionsParameterName, "The percentage of the top solutions which should be analyzed.", new PercentValue(0.1)));
    95       Parameters.Add(new FixedValueParameter<SymbolicRegressionConstantOptimizationEvaluator>(ConstantOptimizationEvaluatorParameterName, "The operator used to perform the constant optimization"));
     95      Parameters.Add(new FixedValueParameter<NonlinearLeastSquaresConstantOptimizationEvaluator>(ConstantOptimizationEvaluatorParameterName, "The operator used to perform the constant optimization"));
    9696
    9797      //Changed the ActualName of the EvaluationPartitionParameter so that it matches the parameter name of symbolic regression problems.
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/NonlinearLeastSquaresConstantOptimizationEvaluator.cs

    r17455 r17472  
    2323using System.Collections.Generic;
    2424using System.Linq;
    25 using HEAL.Attic;
     25using System.Threading;
    2626using HeuristicLab.Common;
    2727using HeuristicLab.Core;
    2828using HeuristicLab.Data;
    2929using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    30 using HeuristicLab.Optimization;
    3130using HeuristicLab.Parameters;
     31using HEAL.Attic;
    3232
    3333namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
    34   [Item("Constant Optimization Evaluator", "Calculates Pearson R² of a symbolic regression solution and optimizes the constant used.")]
    3534  [StorableType("24B68851-036D-4446-BD6F-3823E9028FF4")]
    36   public class SymbolicRegressionConstantOptimizationEvaluator : SymbolicRegressionSingleObjectiveEvaluator {
    37     private const string ConstantOptimizationIterationsParameterName = "ConstantOptimizationIterations";
    38     private const string ConstantOptimizationImprovementParameterName = "ConstantOptimizationImprovement";
    39     private const string ConstantOptimizationProbabilityParameterName = "ConstantOptimizationProbability";
    40     private const string ConstantOptimizationRowsPercentageParameterName = "ConstantOptimizationRowsPercentage";
    41     private const string UpdateConstantsInTreeParameterName = "UpdateConstantsInSymbolicExpressionTree";
    42     private const string UpdateVariableWeightsParameterName = "Update Variable Weights";
    43 
    44     private const string FunctionEvaluationsResultParameterName = "Constants Optimization Function Evaluations";
    45     private const string GradientEvaluationsResultParameterName = "Constants Optimization Gradient Evaluations";
    46     private const string CountEvaluationsParameterName = "Count Function and Gradient Evaluations";
    47 
     35  [Item("NonlinearLeastSquaresOptimizer", "")]
     36  public class NonlinearLeastSquaresConstantOptimizationEvaluator : SymbolicRegressionConstantOptimizationEvaluator {
     37
     38    private const string ConstantOptimizationIterationsName = "ConstantOptimizationIterations";
     39
     40    #region Parameter Properties
    4841    public IFixedValueParameter<IntValue> ConstantOptimizationIterationsParameter {
    49       get { return (IFixedValueParameter<IntValue>)Parameters[ConstantOptimizationIterationsParameterName]; }
    50     }
    51     public IFixedValueParameter<DoubleValue> ConstantOptimizationImprovementParameter {
    52       get { return (IFixedValueParameter<DoubleValue>)Parameters[ConstantOptimizationImprovementParameterName]; }
    53     }
    54     public IFixedValueParameter<PercentValue> ConstantOptimizationProbabilityParameter {
    55       get { return (IFixedValueParameter<PercentValue>)Parameters[ConstantOptimizationProbabilityParameterName]; }
    56     }
    57     public IFixedValueParameter<PercentValue> ConstantOptimizationRowsPercentageParameter {
    58       get { return (IFixedValueParameter<PercentValue>)Parameters[ConstantOptimizationRowsPercentageParameterName]; }
    59     }
    60     public IFixedValueParameter<BoolValue> UpdateConstantsInTreeParameter {
    61       get { return (IFixedValueParameter<BoolValue>)Parameters[UpdateConstantsInTreeParameterName]; }
    62     }
    63     public IFixedValueParameter<BoolValue> UpdateVariableWeightsParameter {
    64       get { return (IFixedValueParameter<BoolValue>)Parameters[UpdateVariableWeightsParameterName]; }
    65     }
    66 
    67     public IResultParameter<IntValue> FunctionEvaluationsResultParameter {
    68       get { return (IResultParameter<IntValue>)Parameters[FunctionEvaluationsResultParameterName]; }
    69     }
    70     public IResultParameter<IntValue> GradientEvaluationsResultParameter {
    71       get { return (IResultParameter<IntValue>)Parameters[GradientEvaluationsResultParameterName]; }
    72     }
    73     public IFixedValueParameter<BoolValue> CountEvaluationsParameter {
    74       get { return (IFixedValueParameter<BoolValue>)Parameters[CountEvaluationsParameterName]; }
    75     }
    76 
    77 
    78     public IntValue ConstantOptimizationIterations {
    79       get { return ConstantOptimizationIterationsParameter.Value; }
    80     }
    81     public DoubleValue ConstantOptimizationImprovement {
    82       get { return ConstantOptimizationImprovementParameter.Value; }
    83     }
    84     public PercentValue ConstantOptimizationProbability {
    85       get { return ConstantOptimizationProbabilityParameter.Value; }
    86     }
    87     public PercentValue ConstantOptimizationRowsPercentage {
    88       get { return ConstantOptimizationRowsPercentageParameter.Value; }
    89     }
    90     public bool UpdateConstantsInTree {
    91       get { return UpdateConstantsInTreeParameter.Value.Value; }
    92       set { UpdateConstantsInTreeParameter.Value.Value = value; }
    93     }
    94 
    95     public bool UpdateVariableWeights {
    96       get { return UpdateVariableWeightsParameter.Value.Value; }
    97       set { UpdateVariableWeightsParameter.Value.Value = value; }
    98     }
    99 
    100     public bool CountEvaluations {
    101       get { return CountEvaluationsParameter.Value.Value; }
    102       set { CountEvaluationsParameter.Value.Value = value; }
    103     }
    104 
    105     public override bool Maximization {
    106       get { return true; }
    107     }
    108 
     42      get { return (IFixedValueParameter<IntValue>)Parameters[ConstantOptimizationIterationsName]; }
     43    }
     44    #endregion
     45
     46    #region Properties
     47    public int ConstantOptimizationIterations {
     48      get { return ConstantOptimizationIterationsParameter.Value.Value; }
     49    }
     50    #endregion
     51
     52    public NonlinearLeastSquaresConstantOptimizationEvaluator()
     53      : base() {
     54      Parameters.Add(new FixedValueParameter<IntValue>(ConstantOptimizationIterationsName, "Determines how many iterations should be calculated while optimizing the constant of a symbolic expression tree(0 indicates other or default stopping criterion).", new IntValue(10)));
     55    }
     56
     57    protected NonlinearLeastSquaresConstantOptimizationEvaluator(NonlinearLeastSquaresConstantOptimizationEvaluator original, Cloner cloner)
     58      : base(original, cloner) {
     59    }
     60    public override IDeepCloneable Clone(Cloner cloner) {
     61      return new NonlinearLeastSquaresConstantOptimizationEvaluator(this, cloner);
     62    }
    10963    [StorableConstructor]
    110     protected SymbolicRegressionConstantOptimizationEvaluator(StorableConstructorFlag _) : base(_) { }
    111     protected SymbolicRegressionConstantOptimizationEvaluator(SymbolicRegressionConstantOptimizationEvaluator original, Cloner cloner)
    112       : base(original, cloner) {
    113     }
    114     public SymbolicRegressionConstantOptimizationEvaluator()
    115       : base() {
    116       Parameters.Add(new FixedValueParameter<IntValue>(ConstantOptimizationIterationsParameterName, "Determines how many iterations should be calculated while optimizing the constant of a symbolic expression tree (0 indicates other or default stopping criterion).", new IntValue(10)));
    117       Parameters.Add(new FixedValueParameter<DoubleValue>(ConstantOptimizationImprovementParameterName, "Determines the relative improvement which must be achieved in the constant optimization to continue with it (0 indicates other or default stopping criterion).", new DoubleValue(0)) { Hidden = true });
    118       Parameters.Add(new FixedValueParameter<PercentValue>(ConstantOptimizationProbabilityParameterName, "Determines the probability that the constants are optimized", new PercentValue(1)));
    119       Parameters.Add(new FixedValueParameter<PercentValue>(ConstantOptimizationRowsPercentageParameterName, "Determines the percentage of the rows which should be used for constant optimization", new PercentValue(1)));
    120       Parameters.Add(new FixedValueParameter<BoolValue>(UpdateConstantsInTreeParameterName, "Determines if the constants in the tree should be overwritten by the optimized constants.", new BoolValue(true)) { Hidden = true });
    121       Parameters.Add(new FixedValueParameter<BoolValue>(UpdateVariableWeightsParameterName, "Determines if the variable weights in the tree should be  optimized.", new BoolValue(true)) { Hidden = true });
    122 
    123       Parameters.Add(new FixedValueParameter<BoolValue>(CountEvaluationsParameterName, "Determines if function and gradient evaluation should be counted.", new BoolValue(false)));
    124       Parameters.Add(new ResultParameter<IntValue>(FunctionEvaluationsResultParameterName, "The number of function evaluations performed by the constants optimization evaluator", "Results", new IntValue()));
    125       Parameters.Add(new ResultParameter<IntValue>(GradientEvaluationsResultParameterName, "The number of gradient evaluations performed by the constants optimization evaluator", "Results", new IntValue()));
    126     }
    127 
    128     public override IDeepCloneable Clone(Cloner cloner) {
    129       return new SymbolicRegressionConstantOptimizationEvaluator(this, cloner);
    130     }
    131 
    132     [StorableHook(HookType.AfterDeserialization)]
    133     private void AfterDeserialization() {
    134       if (!Parameters.ContainsKey(UpdateConstantsInTreeParameterName))
    135         Parameters.Add(new FixedValueParameter<BoolValue>(UpdateConstantsInTreeParameterName, "Determines if the constants in the tree should be overwritten by the optimized constants.", new BoolValue(true)));
    136       if (!Parameters.ContainsKey(UpdateVariableWeightsParameterName))
    137         Parameters.Add(new FixedValueParameter<BoolValue>(UpdateVariableWeightsParameterName, "Determines if the variable weights in the tree should be  optimized.", new BoolValue(true)));
    138 
    139       if (!Parameters.ContainsKey(CountEvaluationsParameterName))
    140         Parameters.Add(new FixedValueParameter<BoolValue>(CountEvaluationsParameterName, "Determines if function and gradient evaluation should be counted.", new BoolValue(false)));
    141 
    142       if (!Parameters.ContainsKey(FunctionEvaluationsResultParameterName))
    143         Parameters.Add(new ResultParameter<IntValue>(FunctionEvaluationsResultParameterName, "The number of function evaluations performed by the constants optimization evaluator", "Results", new IntValue()));
    144       if (!Parameters.ContainsKey(GradientEvaluationsResultParameterName))
    145         Parameters.Add(new ResultParameter<IntValue>(GradientEvaluationsResultParameterName, "The number of gradient evaluations performed by the constants optimization evaluator", "Results", new IntValue()));
    146     }
    147 
    148     private static readonly object locker = new object();
    149     public override IOperation InstrumentedApply() {
    150       var solution = SymbolicExpressionTreeParameter.ActualValue;
    151       double quality;
    152       if (RandomParameter.ActualValue.NextDouble() < ConstantOptimizationProbability.Value) {
    153         IEnumerable<int> constantOptimizationRows = GenerateRowsToEvaluate(ConstantOptimizationRowsPercentage.Value);
    154         var counter = new EvaluationsCounter();
    155         quality = OptimizeConstants(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, ProblemDataParameter.ActualValue,
    156            constantOptimizationRows, ApplyLinearScalingParameter.ActualValue.Value, ConstantOptimizationIterations.Value, updateVariableWeights: UpdateVariableWeights, lowerEstimationLimit: EstimationLimitsParameter.ActualValue.Lower, upperEstimationLimit: EstimationLimitsParameter.ActualValue.Upper, updateConstantsInTree: UpdateConstantsInTree, counter: counter);
    157 
    158         if (ConstantOptimizationRowsPercentage.Value != RelativeNumberOfEvaluatedSamplesParameter.ActualValue.Value) {
    159           var evaluationRows = GenerateRowsToEvaluate();
    160           quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value);
    161         }
    162 
    163         if (CountEvaluations) {
    164           lock (locker) {
    165             FunctionEvaluationsResultParameter.ActualValue.Value += counter.FunctionEvaluations;
    166             GradientEvaluationsResultParameter.ActualValue.Value += counter.GradientEvaluations;
    167           }
    168         }
    169 
    170       } else {
    171         var evaluationRows = GenerateRowsToEvaluate();
    172         quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value);
    173       }
    174       QualityParameter.ActualValue = new DoubleValue(quality);
    175 
    176       return base.InstrumentedApply();
    177     }
    178 
    179     public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows) {
    180       SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
    181       EstimationLimitsParameter.ExecutionContext = context;
    182       ApplyLinearScalingParameter.ExecutionContext = context;
    183       FunctionEvaluationsResultParameter.ExecutionContext = context;
    184       GradientEvaluationsResultParameter.ExecutionContext = context;
    185 
    186       // Pearson R² evaluator is used on purpose instead of the const-opt evaluator,
    187       // because Evaluate() is used to get the quality of evolved models on
    188       // different partitions of the dataset (e.g., best validation model)
    189       double r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, tree, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, problemData, rows, ApplyLinearScalingParameter.ActualValue.Value);
    190 
    191       SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
    192       EstimationLimitsParameter.ExecutionContext = null;
    193       ApplyLinearScalingParameter.ExecutionContext = null;
    194       FunctionEvaluationsResultParameter.ExecutionContext = null;
    195       GradientEvaluationsResultParameter.ExecutionContext = null;
    196 
    197       return r2;
    198     }
    199 
    200     public class EvaluationsCounter {
    201       public int FunctionEvaluations = 0;
    202       public int GradientEvaluations = 0;
    203     }
    204 
    205     public static double OptimizeConstants(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
    206       ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling,
    207       int maxIterations, bool updateVariableWeights = true,
    208       double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue,
    209       bool updateConstantsInTree = true, Action<double[], double, object> iterationCallback = null, EvaluationsCounter counter = null) {
     64    protected NonlinearLeastSquaresConstantOptimizationEvaluator(StorableConstructorFlag _) : base(_) { }
     65
     66    protected override ISymbolicExpressionTree OptimizeConstants(
     67      ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows,
     68      CancellationToken cancellationToken = default(CancellationToken), EvaluationsCounter counter = null) {
     69      return OptimizeTree(tree,
     70        problemData, rows,
     71        ApplyLinearScalingParameter.ActualValue.Value, ConstantOptimizationIterations, UpdateVariableWeights,
     72        cancellationToken, counter);
     73    }
     74
     75    public static ISymbolicExpressionTree OptimizeTree(
     76      ISymbolicExpressionTree tree,
     77      IRegressionProblemData problemData, IEnumerable<int> rows,
     78      bool applyLinearScaling, int maxIterations, bool updateVariableWeights,
     79      CancellationToken cancellationToken = default(CancellationToken), EvaluationsCounter counter = null, Action<double[], double, object> iterationCallback = null) {
    21080
    21181      // numeric constants in the tree become variables for constant opt
     
    21484      // variable name, variable value (for factor vars) and lag as a DataForVariable object.
    21585      // A dictionary is used to find parameters
    216       double[] initialConstants;
    217       var parameters = new List<TreeToAutoDiffTermConverter.DataForVariable>();
    218 
    219       TreeToAutoDiffTermConverter.ParametricFunction func;
    220       TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad;
    221       if (!TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, updateVariableWeights, applyLinearScaling, out parameters, out initialConstants, out func, out func_grad))
     86      bool success = TreeToAutoDiffTermConverter.TryConvertToAutoDiff(
     87        tree, updateVariableWeights, applyLinearScaling,
     88        out var parameters, out var initialConstants, out var func, out var func_grad);
     89      if (!success)
    22290        throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree.");
    223       if (parameters.Count == 0) return 0.0; // gkronber: constant expressions always have a R² of 0.0
     91      if (parameters.Count == 0) return (ISymbolicExpressionTree)tree.Clone();
    22492      var parameterEntries = parameters.ToArray(); // order of entries must be the same for x
    22593
    226       //extract inital constants
     94      //extract initial constants
    22795      double[] c;
    22896      if (applyLinearScaling) {
     
    234102        c = (double[])initialConstants.Clone();
    235103      }
    236 
    237       double originalQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);
    238 
    239       if (counter == null) counter = new EvaluationsCounter();
    240       var rowEvaluationsCounter = new EvaluationsCounter();
    241 
    242       alglib.lsfitstate state;
    243       alglib.lsfitreport rep;
    244       int retVal;
    245104
    246105      IDataset ds = problemData.Dataset;
     
    266125      alglib.ndimensional_pfunc function_cx_1_func = CreatePFunc(func);
    267126      alglib.ndimensional_pgrad function_cx_1_grad = CreatePGrad(func_grad);
    268       alglib.ndimensional_rep xrep = (p, f, obj) => iterationCallback(p, f, obj);
     127      alglib.ndimensional_rep xrep = (p, f, obj) => {
     128        iterationCallback?.Invoke(p, f, obj);
     129        cancellationToken.ThrowIfCancellationRequested();
     130      };
     131      var rowEvaluationsCounter = new EvaluationsCounter();
    269132
    270133      try {
    271         alglib.lsfitcreatefg(x, y, c, n, m, k, false, out state);
     134        alglib.lsfitcreatefg(x, y, c, n, m, k, false, out var state);
    272135        alglib.lsfitsetcond(state, 0.0, 0.0, maxIterations);
    273         alglib.lsfitsetxrep(state, iterationCallback != null);
     136        alglib.lsfitsetxrep(state, iterationCallback != null || cancellationToken != default(CancellationToken));
    274137        //alglib.lsfitsetgradientcheck(state, 0.001);
    275138        alglib.lsfitfit(state, function_cx_1_func, function_cx_1_grad, xrep, rowEvaluationsCounter);
    276         alglib.lsfitresults(state, out retVal, out c, out rep);
     139        alglib.lsfitresults(state, out var retVal, out c, out alglib.lsfitreport rep);
     140
     141        //retVal == -7  => constant optimization failed due to wrong gradient
     142        if (retVal == -1)
     143          return (ISymbolicExpressionTree)tree.Clone();
    277144      } catch (ArithmeticException) {
    278         return originalQuality;
     145        return (ISymbolicExpressionTree)tree.Clone();
    279146      } catch (alglib.alglibexception) {
    280         return originalQuality;
    281       }
    282 
    283       counter.FunctionEvaluations += rowEvaluationsCounter.FunctionEvaluations / n;
    284       counter.GradientEvaluations += rowEvaluationsCounter.GradientEvaluations / n;
    285 
    286       //retVal == -7  => constant optimization failed due to wrong gradient
    287       if (retVal != -7) {
    288         if (applyLinearScaling) {
    289           var tmp = new double[c.Length - 2];
    290           Array.Copy(c, 2, tmp, 0, tmp.Length);
    291           UpdateConstants(tree, tmp, updateVariableWeights);
    292         } else UpdateConstants(tree, c, updateVariableWeights);
    293       }
    294       var quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);
    295 
    296       if (!updateConstantsInTree) UpdateConstants(tree, initialConstants, updateVariableWeights);
    297 
    298       if (originalQuality - quality > 0.001 || double.IsNaN(quality)) {
    299         UpdateConstants(tree, initialConstants, updateVariableWeights);
    300         return originalQuality;
    301       }
    302       return quality;
     147        return (ISymbolicExpressionTree)tree.Clone();
     148      }
     149
     150      if (counter != null) {
     151        counter.FunctionEvaluations += rowEvaluationsCounter.FunctionEvaluations / n;
     152        counter.GradientEvaluations += rowEvaluationsCounter.GradientEvaluations / n;
     153      }
     154
     155      var newTree = (ISymbolicExpressionTree)tree.Clone();
     156      if (applyLinearScaling) {
     157        var tmp = new double[c.Length - 2];
     158        Array.Copy(c, 2, tmp, 0, tmp.Length);
     159        UpdateConstants(newTree, tmp, updateVariableWeights);
     160      } else
     161        UpdateConstants(newTree, c, updateVariableWeights);
     162
     163      return newTree;
    303164    }
    304165
     
    337198      };
    338199    }
     200
    339201    public static bool CanOptimizeConstants(ISymbolicExpressionTree tree) {
    340202      return TreeToAutoDiffTermConverter.IsCompatible(tree);
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/SymbolicRegressionConstantOptimizationEvaluator.cs

    r17180 r17472  
    2020#endregion
    2121
    22 using System;
    2322using System.Collections.Generic;
    24 using System.Linq;
     23using System.Threading;
    2524using HEAL.Attic;
    2625using HeuristicLab.Common;
     
    3332namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
    3433  [Item("Constant Optimization Evaluator", "Calculates Pearson R² of a symbolic regression solution and optimizes the constant used.")]
    35   [StorableType("24B68851-036D-4446-BD6F-3823E9028FF4")]
    36   public class SymbolicRegressionConstantOptimizationEvaluator : SymbolicRegressionSingleObjectiveEvaluator {
    37     private const string ConstantOptimizationIterationsParameterName = "ConstantOptimizationIterations";
     34  [StorableType("9F3A528C-DF9B-4D8F-8611-314E169A3B34")]
     35  public abstract class SymbolicRegressionConstantOptimizationEvaluator : SymbolicRegressionSingleObjectiveEvaluator {
    3836    private const string ConstantOptimizationImprovementParameterName = "ConstantOptimizationImprovement";
    3937    private const string ConstantOptimizationProbabilityParameterName = "ConstantOptimizationProbability";
     
    4644    private const string CountEvaluationsParameterName = "Count Function and Gradient Evaluations";
    4745
    48     public IFixedValueParameter<IntValue> ConstantOptimizationIterationsParameter {
    49       get { return (IFixedValueParameter<IntValue>)Parameters[ConstantOptimizationIterationsParameterName]; }
    50     }
     46
    5147    public IFixedValueParameter<DoubleValue> ConstantOptimizationImprovementParameter {
    5248      get { return (IFixedValueParameter<DoubleValue>)Parameters[ConstantOptimizationImprovementParameterName]; }
     
    7672
    7773
    78     public IntValue ConstantOptimizationIterations {
    79       get { return ConstantOptimizationIterationsParameter.Value; }
    80     }
    8174    public DoubleValue ConstantOptimizationImprovement {
    8275      get { return ConstantOptimizationImprovementParameter.Value; }
     
    112105      : base(original, cloner) {
    113106    }
    114     public SymbolicRegressionConstantOptimizationEvaluator()
     107    protected SymbolicRegressionConstantOptimizationEvaluator()
    115108      : base() {
    116       Parameters.Add(new FixedValueParameter<IntValue>(ConstantOptimizationIterationsParameterName, "Determines how many iterations should be calculated while optimizing the constant of a symbolic expression tree (0 indicates other or default stopping criterion).", new IntValue(10)));
    117109      Parameters.Add(new FixedValueParameter<DoubleValue>(ConstantOptimizationImprovementParameterName, "Determines the relative improvement which must be achieved in the constant optimization to continue with it (0 indicates other or default stopping criterion).", new DoubleValue(0)) { Hidden = true });
    118110      Parameters.Add(new FixedValueParameter<PercentValue>(ConstantOptimizationProbabilityParameterName, "Determines the probability that the constants are optimized", new PercentValue(1)));
     
    124116      Parameters.Add(new ResultParameter<IntValue>(FunctionEvaluationsResultParameterName, "The number of function evaluations performed by the constants optimization evaluator", "Results", new IntValue()));
    125117      Parameters.Add(new ResultParameter<IntValue>(GradientEvaluationsResultParameterName, "The number of gradient evaluations performed by the constants optimization evaluator", "Results", new IntValue()));
    126     }
    127 
    128     public override IDeepCloneable Clone(Cloner cloner) {
    129       return new SymbolicRegressionConstantOptimizationEvaluator(this, cloner);
    130118    }
    131119
     
    148136    private static readonly object locker = new object();
    149137    public override IOperation InstrumentedApply() {
     138      var originalTree = SymbolicExpressionTreeParameter.ActualValue;
    150139      var solution = SymbolicExpressionTreeParameter.ActualValue;
    151       double quality;
     140      var problemData = ProblemDataParameter.ActualValue;
     141
    152142      if (RandomParameter.ActualValue.NextDouble() < ConstantOptimizationProbability.Value) {
    153143        IEnumerable<int> constantOptimizationRows = GenerateRowsToEvaluate(ConstantOptimizationRowsPercentage.Value);
    154         var counter = new EvaluationsCounter();
    155         quality = OptimizeConstants(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, ProblemDataParameter.ActualValue,
    156            constantOptimizationRows, ApplyLinearScalingParameter.ActualValue.Value, ConstantOptimizationIterations.Value, updateVariableWeights: UpdateVariableWeights, lowerEstimationLimit: EstimationLimitsParameter.ActualValue.Lower, upperEstimationLimit: EstimationLimitsParameter.ActualValue.Upper, updateConstantsInTree: UpdateConstantsInTree, counter: counter);
     144        EvaluationsCounter counter = null;
     145        if (CountEvaluations) counter = new EvaluationsCounter();
     146
     147        var optimizedTree = OptimizeConstants(originalTree, problemData, constantOptimizationRows, counter: counter);
     148
     149        double quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(
     150          SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, optimizedTree,
     151          EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, constantOptimizationRows, ApplyLinearScalingParameter.ActualValue.Value);
    157152
    158153        if (ConstantOptimizationRowsPercentage.Value != RelativeNumberOfEvaluatedSamplesParameter.ActualValue.Value) {
    159154          var evaluationRows = GenerateRowsToEvaluate();
    160           quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value);
     155          quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(
     156            SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution,
     157            EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value);
    161158        }
    162159
     160        SymbolicExpressionTreeParameter.ActualValue = optimizedTree;
     161        QualityParameter.ActualValue = new DoubleValue(quality);
    163162        if (CountEvaluations) {
    164163          lock (locker) {
     
    170169      } else {
    171170        var evaluationRows = GenerateRowsToEvaluate();
    172         quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value);
     171        double quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(
     172          SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution,
     173          EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value);
     174        QualityParameter.ActualValue = new DoubleValue(quality);
    173175      }
    174       QualityParameter.ActualValue = new DoubleValue(quality);
    175176
    176177      return base.InstrumentedApply();
     
    198199    }
    199200
     201    protected abstract ISymbolicExpressionTree OptimizeConstants(
     202      ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows,
     203      CancellationToken cancellationToken = default(CancellationToken), EvaluationsCounter counter = null);
     204
    200205    public class EvaluationsCounter {
    201206      public int FunctionEvaluations = 0;
    202207      public int GradientEvaluations = 0;
    203208    }
    204 
    205     public static double OptimizeConstants(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
    206       ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling,
    207       int maxIterations, bool updateVariableWeights = true,
    208       double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue,
    209       bool updateConstantsInTree = true, Action<double[], double, object> iterationCallback = null, EvaluationsCounter counter = null) {
    210 
    211       // numeric constants in the tree become variables for constant opt
    212       // variables in the tree become parameters (fixed values) for constant opt
    213       // for each parameter (variable in the original tree) we store the
    214       // variable name, variable value (for factor vars) and lag as a DataForVariable object.
    215       // A dictionary is used to find parameters
    216       double[] initialConstants;
    217       var parameters = new List<TreeToAutoDiffTermConverter.DataForVariable>();
    218 
    219       TreeToAutoDiffTermConverter.ParametricFunction func;
    220       TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad;
    221       if (!TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, updateVariableWeights, applyLinearScaling, out parameters, out initialConstants, out func, out func_grad))
    222         throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree.");
    223       if (parameters.Count == 0) return 0.0; // gkronber: constant expressions always have a R² of 0.0
    224       var parameterEntries = parameters.ToArray(); // order of entries must be the same for x
    225 
    226       //extract inital constants
    227       double[] c;
    228       if (applyLinearScaling) {
    229         c = new double[initialConstants.Length + 2];
    230         c[0] = 0.0;
    231         c[1] = 1.0;
    232         Array.Copy(initialConstants, 0, c, 2, initialConstants.Length);
    233       } else {
    234         c = (double[])initialConstants.Clone();
    235       }
    236 
    237       double originalQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);
    238 
    239       if (counter == null) counter = new EvaluationsCounter();
    240       var rowEvaluationsCounter = new EvaluationsCounter();
    241 
    242       alglib.lsfitstate state;
    243       alglib.lsfitreport rep;
    244       int retVal;
    245 
    246       IDataset ds = problemData.Dataset;
    247       double[,] x = new double[rows.Count(), parameters.Count];
    248       int row = 0;
    249       foreach (var r in rows) {
    250         int col = 0;
    251         foreach (var info in parameterEntries) {
    252           if (ds.VariableHasType<double>(info.variableName)) {
    253             x[row, col] = ds.GetDoubleValue(info.variableName, r + info.lag);
    254           } else if (ds.VariableHasType<string>(info.variableName)) {
    255             x[row, col] = ds.GetStringValue(info.variableName, r) == info.variableValue ? 1 : 0;
    256           } else throw new InvalidProgramException("found a variable of unknown type");
    257           col++;
    258         }
    259         row++;
    260       }
    261       double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray();
    262       int n = x.GetLength(0);
    263       int m = x.GetLength(1);
    264       int k = c.Length;
    265 
    266       alglib.ndimensional_pfunc function_cx_1_func = CreatePFunc(func);
    267       alglib.ndimensional_pgrad function_cx_1_grad = CreatePGrad(func_grad);
    268       alglib.ndimensional_rep xrep = (p, f, obj) => iterationCallback(p, f, obj);
    269 
    270       try {
    271         alglib.lsfitcreatefg(x, y, c, n, m, k, false, out state);
    272         alglib.lsfitsetcond(state, 0.0, 0.0, maxIterations);
    273         alglib.lsfitsetxrep(state, iterationCallback != null);
    274         //alglib.lsfitsetgradientcheck(state, 0.001);
    275         alglib.lsfitfit(state, function_cx_1_func, function_cx_1_grad, xrep, rowEvaluationsCounter);
    276         alglib.lsfitresults(state, out retVal, out c, out rep);
    277       } catch (ArithmeticException) {
    278         return originalQuality;
    279       } catch (alglib.alglibexception) {
    280         return originalQuality;
    281       }
    282 
    283       counter.FunctionEvaluations += rowEvaluationsCounter.FunctionEvaluations / n;
    284       counter.GradientEvaluations += rowEvaluationsCounter.GradientEvaluations / n;
    285 
    286       //retVal == -7  => constant optimization failed due to wrong gradient
    287       if (retVal != -7) {
    288         if (applyLinearScaling) {
    289           var tmp = new double[c.Length - 2];
    290           Array.Copy(c, 2, tmp, 0, tmp.Length);
    291           UpdateConstants(tree, tmp, updateVariableWeights);
    292         } else UpdateConstants(tree, c, updateVariableWeights);
    293       }
    294       var quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);
    295 
    296       if (!updateConstantsInTree) UpdateConstants(tree, initialConstants, updateVariableWeights);
    297 
    298       if (originalQuality - quality > 0.001 || double.IsNaN(quality)) {
    299         UpdateConstants(tree, initialConstants, updateVariableWeights);
    300         return originalQuality;
    301       }
    302       return quality;
    303     }
    304 
    305     private static void UpdateConstants(ISymbolicExpressionTree tree, double[] constants, bool updateVariableWeights) {
    306       int i = 0;
    307       foreach (var node in tree.Root.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>()) {
    308         ConstantTreeNode constantTreeNode = node as ConstantTreeNode;
    309         VariableTreeNodeBase variableTreeNodeBase = node as VariableTreeNodeBase;
    310         FactorVariableTreeNode factorVarTreeNode = node as FactorVariableTreeNode;
    311         if (constantTreeNode != null)
    312           constantTreeNode.Value = constants[i++];
    313         else if (updateVariableWeights && variableTreeNodeBase != null)
    314           variableTreeNodeBase.Weight = constants[i++];
    315         else if (factorVarTreeNode != null) {
    316           for (int j = 0; j < factorVarTreeNode.Weights.Length; j++)
    317             factorVarTreeNode.Weights[j] = constants[i++];
    318         }
    319       }
    320     }
    321 
    322     private static alglib.ndimensional_pfunc CreatePFunc(TreeToAutoDiffTermConverter.ParametricFunction func) {
    323       return (double[] c, double[] x, ref double fx, object o) => {
    324         fx = func(c, x);
    325         var counter = (EvaluationsCounter)o;
    326         counter.FunctionEvaluations++;
    327       };
    328     }
    329 
    330     private static alglib.ndimensional_pgrad CreatePGrad(TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad) {
    331       return (double[] c, double[] x, ref double fx, double[] grad, object o) => {
    332         var tuple = func_grad(c, x);
    333         fx = tuple.Item2;
    334         Array.Copy(tuple.Item1, grad, grad.Length);
    335         var counter = (EvaluationsCounter)o;
    336         counter.GradientEvaluations++;
    337       };
    338     }
    339     public static bool CanOptimizeConstants(ISymbolicExpressionTree tree) {
    340       return TreeToAutoDiffTermConverter.IsCompatible(tree);
    341     }
    342209  }
    343210}
Note: See TracChangeset for help on using the changeset viewer.