Changeset 17472 for branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective
- Timestamp:
- 03/10/20 08:28:49 (5 years ago)
- Location:
- branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective
- Files:
-
- 2 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/ConstantOptimizationAnalyzer.cs
r17180 r17472 93 93 : base() { 94 94 Parameters.Add(new FixedValueParameter<PercentValue>(PercentageOfBestSolutionsParameterName, "The percentage of the top solutions which should be analyzed.", new PercentValue(0.1))); 95 Parameters.Add(new FixedValueParameter< SymbolicRegressionConstantOptimizationEvaluator>(ConstantOptimizationEvaluatorParameterName, "The operator used to perform the constant optimization"));95 Parameters.Add(new FixedValueParameter<NonlinearLeastSquaresConstantOptimizationEvaluator>(ConstantOptimizationEvaluatorParameterName, "The operator used to perform the constant optimization")); 96 96 97 97 //Changed the ActualName of the EvaluationPartitionParameter so that it matches the parameter name of symbolic regression problems. -
branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/NonlinearLeastSquaresConstantOptimizationEvaluator.cs
r17455 r17472 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using HEAL.Attic;25 using System.Threading; 26 26 using HeuristicLab.Common; 27 27 using HeuristicLab.Core; 28 28 using HeuristicLab.Data; 29 29 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; 30 using HeuristicLab.Optimization;31 30 using HeuristicLab.Parameters; 31 using HEAL.Attic; 32 32 33 33 namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression { 34 [Item("Constant Optimization Evaluator", "Calculates Pearson R² of a symbolic regression solution and optimizes the constant used.")]35 34 [StorableType("24B68851-036D-4446-BD6F-3823E9028FF4")] 36 public class SymbolicRegressionConstantOptimizationEvaluator : SymbolicRegressionSingleObjectiveEvaluator { 37 private const string ConstantOptimizationIterationsParameterName = "ConstantOptimizationIterations"; 38 private const string ConstantOptimizationImprovementParameterName = "ConstantOptimizationImprovement"; 39 private const string ConstantOptimizationProbabilityParameterName = "ConstantOptimizationProbability"; 40 private const string ConstantOptimizationRowsPercentageParameterName = "ConstantOptimizationRowsPercentage"; 41 private const string UpdateConstantsInTreeParameterName = "UpdateConstantsInSymbolicExpressionTree"; 42 private const string UpdateVariableWeightsParameterName = "Update Variable Weights"; 43 44 private const string FunctionEvaluationsResultParameterName = "Constants Optimization Function Evaluations"; 45 private const string GradientEvaluationsResultParameterName = "Constants Optimization Gradient Evaluations"; 46 private const string CountEvaluationsParameterName = "Count Function and Gradient Evaluations"; 47 35 [Item("NonlinearLeastSquaresOptimizer", "")] 36 public class NonlinearLeastSquaresConstantOptimizationEvaluator : SymbolicRegressionConstantOptimizationEvaluator { 37 38 private const string ConstantOptimizationIterationsName = "ConstantOptimizationIterations"; 39 40 #region Parameter Properties 48 41 public IFixedValueParameter<IntValue> ConstantOptimizationIterationsParameter { 49 get { return (IFixedValueParameter<IntValue>)Parameters[ConstantOptimizationIterationsParameterName]; } 50 } 51 public IFixedValueParameter<DoubleValue> ConstantOptimizationImprovementParameter { 52 get { return (IFixedValueParameter<DoubleValue>)Parameters[ConstantOptimizationImprovementParameterName]; } 53 } 54 public IFixedValueParameter<PercentValue> ConstantOptimizationProbabilityParameter { 55 get { return (IFixedValueParameter<PercentValue>)Parameters[ConstantOptimizationProbabilityParameterName]; } 56 } 57 public IFixedValueParameter<PercentValue> ConstantOptimizationRowsPercentageParameter { 58 get { return (IFixedValueParameter<PercentValue>)Parameters[ConstantOptimizationRowsPercentageParameterName]; } 59 } 60 public IFixedValueParameter<BoolValue> UpdateConstantsInTreeParameter { 61 get { return (IFixedValueParameter<BoolValue>)Parameters[UpdateConstantsInTreeParameterName]; } 62 } 63 public IFixedValueParameter<BoolValue> UpdateVariableWeightsParameter { 64 get { return (IFixedValueParameter<BoolValue>)Parameters[UpdateVariableWeightsParameterName]; } 65 } 66 67 public IResultParameter<IntValue> FunctionEvaluationsResultParameter { 68 get { return (IResultParameter<IntValue>)Parameters[FunctionEvaluationsResultParameterName]; } 69 } 70 public IResultParameter<IntValue> GradientEvaluationsResultParameter { 71 get { return (IResultParameter<IntValue>)Parameters[GradientEvaluationsResultParameterName]; } 72 } 73 public IFixedValueParameter<BoolValue> CountEvaluationsParameter { 74 get { return (IFixedValueParameter<BoolValue>)Parameters[CountEvaluationsParameterName]; } 75 } 76 77 78 public IntValue ConstantOptimizationIterations { 79 get { return ConstantOptimizationIterationsParameter.Value; } 80 } 81 public DoubleValue ConstantOptimizationImprovement { 82 get { return ConstantOptimizationImprovementParameter.Value; } 83 } 84 public PercentValue ConstantOptimizationProbability { 85 get { return ConstantOptimizationProbabilityParameter.Value; } 86 } 87 public PercentValue ConstantOptimizationRowsPercentage { 88 get { return ConstantOptimizationRowsPercentageParameter.Value; } 89 } 90 public bool UpdateConstantsInTree { 91 get { return UpdateConstantsInTreeParameter.Value.Value; } 92 set { UpdateConstantsInTreeParameter.Value.Value = value; } 93 } 94 95 public bool UpdateVariableWeights { 96 get { return UpdateVariableWeightsParameter.Value.Value; } 97 set { UpdateVariableWeightsParameter.Value.Value = value; } 98 } 99 100 public bool CountEvaluations { 101 get { return CountEvaluationsParameter.Value.Value; } 102 set { CountEvaluationsParameter.Value.Value = value; } 103 } 104 105 public override bool Maximization { 106 get { return true; } 107 } 108 42 get { return (IFixedValueParameter<IntValue>)Parameters[ConstantOptimizationIterationsName]; } 43 } 44 #endregion 45 46 #region Properties 47 public int ConstantOptimizationIterations { 48 get { return ConstantOptimizationIterationsParameter.Value.Value; } 49 } 50 #endregion 51 52 public NonlinearLeastSquaresConstantOptimizationEvaluator() 53 : base() { 54 Parameters.Add(new FixedValueParameter<IntValue>(ConstantOptimizationIterationsName, "Determines how many iterations should be calculated while optimizing the constant of a symbolic expression tree(0 indicates other or default stopping criterion).", new IntValue(10))); 55 } 56 57 protected NonlinearLeastSquaresConstantOptimizationEvaluator(NonlinearLeastSquaresConstantOptimizationEvaluator original, Cloner cloner) 58 : base(original, cloner) { 59 } 60 public override IDeepCloneable Clone(Cloner cloner) { 61 return new NonlinearLeastSquaresConstantOptimizationEvaluator(this, cloner); 62 } 109 63 [StorableConstructor] 110 protected SymbolicRegressionConstantOptimizationEvaluator(StorableConstructorFlag _) : base(_) { } 111 protected SymbolicRegressionConstantOptimizationEvaluator(SymbolicRegressionConstantOptimizationEvaluator original, Cloner cloner) 112 : base(original, cloner) { 113 } 114 public SymbolicRegressionConstantOptimizationEvaluator() 115 : base() { 116 Parameters.Add(new FixedValueParameter<IntValue>(ConstantOptimizationIterationsParameterName, "Determines how many iterations should be calculated while optimizing the constant of a symbolic expression tree (0 indicates other or default stopping criterion).", new IntValue(10))); 117 Parameters.Add(new FixedValueParameter<DoubleValue>(ConstantOptimizationImprovementParameterName, "Determines the relative improvement which must be achieved in the constant optimization to continue with it (0 indicates other or default stopping criterion).", new DoubleValue(0)) { Hidden = true }); 118 Parameters.Add(new FixedValueParameter<PercentValue>(ConstantOptimizationProbabilityParameterName, "Determines the probability that the constants are optimized", new PercentValue(1))); 119 Parameters.Add(new FixedValueParameter<PercentValue>(ConstantOptimizationRowsPercentageParameterName, "Determines the percentage of the rows which should be used for constant optimization", new PercentValue(1))); 120 Parameters.Add(new FixedValueParameter<BoolValue>(UpdateConstantsInTreeParameterName, "Determines if the constants in the tree should be overwritten by the optimized constants.", new BoolValue(true)) { Hidden = true }); 121 Parameters.Add(new FixedValueParameter<BoolValue>(UpdateVariableWeightsParameterName, "Determines if the variable weights in the tree should be optimized.", new BoolValue(true)) { Hidden = true }); 122 123 Parameters.Add(new FixedValueParameter<BoolValue>(CountEvaluationsParameterName, "Determines if function and gradient evaluation should be counted.", new BoolValue(false))); 124 Parameters.Add(new ResultParameter<IntValue>(FunctionEvaluationsResultParameterName, "The number of function evaluations performed by the constants optimization evaluator", "Results", new IntValue())); 125 Parameters.Add(new ResultParameter<IntValue>(GradientEvaluationsResultParameterName, "The number of gradient evaluations performed by the constants optimization evaluator", "Results", new IntValue())); 126 } 127 128 public override IDeepCloneable Clone(Cloner cloner) { 129 return new SymbolicRegressionConstantOptimizationEvaluator(this, cloner); 130 } 131 132 [StorableHook(HookType.AfterDeserialization)] 133 private void AfterDeserialization() { 134 if (!Parameters.ContainsKey(UpdateConstantsInTreeParameterName)) 135 Parameters.Add(new FixedValueParameter<BoolValue>(UpdateConstantsInTreeParameterName, "Determines if the constants in the tree should be overwritten by the optimized constants.", new BoolValue(true))); 136 if (!Parameters.ContainsKey(UpdateVariableWeightsParameterName)) 137 Parameters.Add(new FixedValueParameter<BoolValue>(UpdateVariableWeightsParameterName, "Determines if the variable weights in the tree should be optimized.", new BoolValue(true))); 138 139 if (!Parameters.ContainsKey(CountEvaluationsParameterName)) 140 Parameters.Add(new FixedValueParameter<BoolValue>(CountEvaluationsParameterName, "Determines if function and gradient evaluation should be counted.", new BoolValue(false))); 141 142 if (!Parameters.ContainsKey(FunctionEvaluationsResultParameterName)) 143 Parameters.Add(new ResultParameter<IntValue>(FunctionEvaluationsResultParameterName, "The number of function evaluations performed by the constants optimization evaluator", "Results", new IntValue())); 144 if (!Parameters.ContainsKey(GradientEvaluationsResultParameterName)) 145 Parameters.Add(new ResultParameter<IntValue>(GradientEvaluationsResultParameterName, "The number of gradient evaluations performed by the constants optimization evaluator", "Results", new IntValue())); 146 } 147 148 private static readonly object locker = new object(); 149 public override IOperation InstrumentedApply() { 150 var solution = SymbolicExpressionTreeParameter.ActualValue; 151 double quality; 152 if (RandomParameter.ActualValue.NextDouble() < ConstantOptimizationProbability.Value) { 153 IEnumerable<int> constantOptimizationRows = GenerateRowsToEvaluate(ConstantOptimizationRowsPercentage.Value); 154 var counter = new EvaluationsCounter(); 155 quality = OptimizeConstants(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, ProblemDataParameter.ActualValue, 156 constantOptimizationRows, ApplyLinearScalingParameter.ActualValue.Value, ConstantOptimizationIterations.Value, updateVariableWeights: UpdateVariableWeights, lowerEstimationLimit: EstimationLimitsParameter.ActualValue.Lower, upperEstimationLimit: EstimationLimitsParameter.ActualValue.Upper, updateConstantsInTree: UpdateConstantsInTree, counter: counter); 157 158 if (ConstantOptimizationRowsPercentage.Value != RelativeNumberOfEvaluatedSamplesParameter.ActualValue.Value) { 159 var evaluationRows = GenerateRowsToEvaluate(); 160 quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value); 161 } 162 163 if (CountEvaluations) { 164 lock (locker) { 165 FunctionEvaluationsResultParameter.ActualValue.Value += counter.FunctionEvaluations; 166 GradientEvaluationsResultParameter.ActualValue.Value += counter.GradientEvaluations; 167 } 168 } 169 170 } else { 171 var evaluationRows = GenerateRowsToEvaluate(); 172 quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value); 173 } 174 QualityParameter.ActualValue = new DoubleValue(quality); 175 176 return base.InstrumentedApply(); 177 } 178 179 public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows) { 180 SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context; 181 EstimationLimitsParameter.ExecutionContext = context; 182 ApplyLinearScalingParameter.ExecutionContext = context; 183 FunctionEvaluationsResultParameter.ExecutionContext = context; 184 GradientEvaluationsResultParameter.ExecutionContext = context; 185 186 // Pearson R² evaluator is used on purpose instead of the const-opt evaluator, 187 // because Evaluate() is used to get the quality of evolved models on 188 // different partitions of the dataset (e.g., best validation model) 189 double r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, tree, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, problemData, rows, ApplyLinearScalingParameter.ActualValue.Value); 190 191 SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null; 192 EstimationLimitsParameter.ExecutionContext = null; 193 ApplyLinearScalingParameter.ExecutionContext = null; 194 FunctionEvaluationsResultParameter.ExecutionContext = null; 195 GradientEvaluationsResultParameter.ExecutionContext = null; 196 197 return r2; 198 } 199 200 public class EvaluationsCounter { 201 public int FunctionEvaluations = 0; 202 public int GradientEvaluations = 0; 203 } 204 205 public static double OptimizeConstants(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, 206 ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling, 207 int maxIterations, bool updateVariableWeights = true, 208 double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue, 209 bool updateConstantsInTree = true, Action<double[], double, object> iterationCallback = null, EvaluationsCounter counter = null) { 64 protected NonlinearLeastSquaresConstantOptimizationEvaluator(StorableConstructorFlag _) : base(_) { } 65 66 protected override ISymbolicExpressionTree OptimizeConstants( 67 ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows, 68 CancellationToken cancellationToken = default(CancellationToken), EvaluationsCounter counter = null) { 69 return OptimizeTree(tree, 70 problemData, rows, 71 ApplyLinearScalingParameter.ActualValue.Value, ConstantOptimizationIterations, UpdateVariableWeights, 72 cancellationToken, counter); 73 } 74 75 public static ISymbolicExpressionTree OptimizeTree( 76 ISymbolicExpressionTree tree, 77 IRegressionProblemData problemData, IEnumerable<int> rows, 78 bool applyLinearScaling, int maxIterations, bool updateVariableWeights, 79 CancellationToken cancellationToken = default(CancellationToken), EvaluationsCounter counter = null, Action<double[], double, object> iterationCallback = null) { 210 80 211 81 // numeric constants in the tree become variables for constant opt … … 214 84 // variable name, variable value (for factor vars) and lag as a DataForVariable object. 215 85 // A dictionary is used to find parameters 216 double[] initialConstants; 217 var parameters = new List<TreeToAutoDiffTermConverter.DataForVariable>(); 218 219 TreeToAutoDiffTermConverter.ParametricFunction func; 220 TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad; 221 if (!TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, updateVariableWeights, applyLinearScaling, out parameters, out initialConstants, out func, out func_grad)) 86 bool success = TreeToAutoDiffTermConverter.TryConvertToAutoDiff( 87 tree, updateVariableWeights, applyLinearScaling, 88 out var parameters, out var initialConstants, out var func, out var func_grad); 89 if (!success) 222 90 throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree."); 223 if (parameters.Count == 0) return 0.0; // gkronber: constant expressions always have a R² of 0.091 if (parameters.Count == 0) return (ISymbolicExpressionTree)tree.Clone(); 224 92 var parameterEntries = parameters.ToArray(); // order of entries must be the same for x 225 93 226 //extract init al constants94 //extract initial constants 227 95 double[] c; 228 96 if (applyLinearScaling) { … … 234 102 c = (double[])initialConstants.Clone(); 235 103 } 236 237 double originalQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);238 239 if (counter == null) counter = new EvaluationsCounter();240 var rowEvaluationsCounter = new EvaluationsCounter();241 242 alglib.lsfitstate state;243 alglib.lsfitreport rep;244 int retVal;245 104 246 105 IDataset ds = problemData.Dataset; … … 266 125 alglib.ndimensional_pfunc function_cx_1_func = CreatePFunc(func); 267 126 alglib.ndimensional_pgrad function_cx_1_grad = CreatePGrad(func_grad); 268 alglib.ndimensional_rep xrep = (p, f, obj) => iterationCallback(p, f, obj); 127 alglib.ndimensional_rep xrep = (p, f, obj) => { 128 iterationCallback?.Invoke(p, f, obj); 129 cancellationToken.ThrowIfCancellationRequested(); 130 }; 131 var rowEvaluationsCounter = new EvaluationsCounter(); 269 132 270 133 try { 271 alglib.lsfitcreatefg(x, y, c, n, m, k, false, out state);134 alglib.lsfitcreatefg(x, y, c, n, m, k, false, out var state); 272 135 alglib.lsfitsetcond(state, 0.0, 0.0, maxIterations); 273 alglib.lsfitsetxrep(state, iterationCallback != null );136 alglib.lsfitsetxrep(state, iterationCallback != null || cancellationToken != default(CancellationToken)); 274 137 //alglib.lsfitsetgradientcheck(state, 0.001); 275 138 alglib.lsfitfit(state, function_cx_1_func, function_cx_1_grad, xrep, rowEvaluationsCounter); 276 alglib.lsfitresults(state, out retVal, out c, out rep); 139 alglib.lsfitresults(state, out var retVal, out c, out alglib.lsfitreport rep); 140 141 //retVal == -7 => constant optimization failed due to wrong gradient 142 if (retVal == -1) 143 return (ISymbolicExpressionTree)tree.Clone(); 277 144 } catch (ArithmeticException) { 278 return originalQuality;145 return (ISymbolicExpressionTree)tree.Clone(); 279 146 } catch (alglib.alglibexception) { 280 return originalQuality; 281 } 282 283 counter.FunctionEvaluations += rowEvaluationsCounter.FunctionEvaluations / n; 284 counter.GradientEvaluations += rowEvaluationsCounter.GradientEvaluations / n; 285 286 //retVal == -7 => constant optimization failed due to wrong gradient 287 if (retVal != -7) { 288 if (applyLinearScaling) { 289 var tmp = new double[c.Length - 2]; 290 Array.Copy(c, 2, tmp, 0, tmp.Length); 291 UpdateConstants(tree, tmp, updateVariableWeights); 292 } else UpdateConstants(tree, c, updateVariableWeights); 293 } 294 var quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling); 295 296 if (!updateConstantsInTree) UpdateConstants(tree, initialConstants, updateVariableWeights); 297 298 if (originalQuality - quality > 0.001 || double.IsNaN(quality)) { 299 UpdateConstants(tree, initialConstants, updateVariableWeights); 300 return originalQuality; 301 } 302 return quality; 147 return (ISymbolicExpressionTree)tree.Clone(); 148 } 149 150 if (counter != null) { 151 counter.FunctionEvaluations += rowEvaluationsCounter.FunctionEvaluations / n; 152 counter.GradientEvaluations += rowEvaluationsCounter.GradientEvaluations / n; 153 } 154 155 var newTree = (ISymbolicExpressionTree)tree.Clone(); 156 if (applyLinearScaling) { 157 var tmp = new double[c.Length - 2]; 158 Array.Copy(c, 2, tmp, 0, tmp.Length); 159 UpdateConstants(newTree, tmp, updateVariableWeights); 160 } else 161 UpdateConstants(newTree, c, updateVariableWeights); 162 163 return newTree; 303 164 } 304 165 … … 337 198 }; 338 199 } 200 339 201 public static bool CanOptimizeConstants(ISymbolicExpressionTree tree) { 340 202 return TreeToAutoDiffTermConverter.IsCompatible(tree); -
branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/SymbolicRegressionConstantOptimizationEvaluator.cs
r17180 r17472 20 20 #endregion 21 21 22 using System;23 22 using System.Collections.Generic; 24 using System. Linq;23 using System.Threading; 25 24 using HEAL.Attic; 26 25 using HeuristicLab.Common; … … 33 32 namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression { 34 33 [Item("Constant Optimization Evaluator", "Calculates Pearson R² of a symbolic regression solution and optimizes the constant used.")] 35 [StorableType("24B68851-036D-4446-BD6F-3823E9028FF4")] 36 public class SymbolicRegressionConstantOptimizationEvaluator : SymbolicRegressionSingleObjectiveEvaluator { 37 private const string ConstantOptimizationIterationsParameterName = "ConstantOptimizationIterations"; 34 [StorableType("9F3A528C-DF9B-4D8F-8611-314E169A3B34")] 35 public abstract class SymbolicRegressionConstantOptimizationEvaluator : SymbolicRegressionSingleObjectiveEvaluator { 38 36 private const string ConstantOptimizationImprovementParameterName = "ConstantOptimizationImprovement"; 39 37 private const string ConstantOptimizationProbabilityParameterName = "ConstantOptimizationProbability"; … … 46 44 private const string CountEvaluationsParameterName = "Count Function and Gradient Evaluations"; 47 45 48 public IFixedValueParameter<IntValue> ConstantOptimizationIterationsParameter { 49 get { return (IFixedValueParameter<IntValue>)Parameters[ConstantOptimizationIterationsParameterName]; } 50 } 46 51 47 public IFixedValueParameter<DoubleValue> ConstantOptimizationImprovementParameter { 52 48 get { return (IFixedValueParameter<DoubleValue>)Parameters[ConstantOptimizationImprovementParameterName]; } … … 76 72 77 73 78 public IntValue ConstantOptimizationIterations {79 get { return ConstantOptimizationIterationsParameter.Value; }80 }81 74 public DoubleValue ConstantOptimizationImprovement { 82 75 get { return ConstantOptimizationImprovementParameter.Value; } … … 112 105 : base(original, cloner) { 113 106 } 114 p ublicSymbolicRegressionConstantOptimizationEvaluator()107 protected SymbolicRegressionConstantOptimizationEvaluator() 115 108 : base() { 116 Parameters.Add(new FixedValueParameter<IntValue>(ConstantOptimizationIterationsParameterName, "Determines how many iterations should be calculated while optimizing the constant of a symbolic expression tree (0 indicates other or default stopping criterion).", new IntValue(10)));117 109 Parameters.Add(new FixedValueParameter<DoubleValue>(ConstantOptimizationImprovementParameterName, "Determines the relative improvement which must be achieved in the constant optimization to continue with it (0 indicates other or default stopping criterion).", new DoubleValue(0)) { Hidden = true }); 118 110 Parameters.Add(new FixedValueParameter<PercentValue>(ConstantOptimizationProbabilityParameterName, "Determines the probability that the constants are optimized", new PercentValue(1))); … … 124 116 Parameters.Add(new ResultParameter<IntValue>(FunctionEvaluationsResultParameterName, "The number of function evaluations performed by the constants optimization evaluator", "Results", new IntValue())); 125 117 Parameters.Add(new ResultParameter<IntValue>(GradientEvaluationsResultParameterName, "The number of gradient evaluations performed by the constants optimization evaluator", "Results", new IntValue())); 126 }127 128 public override IDeepCloneable Clone(Cloner cloner) {129 return new SymbolicRegressionConstantOptimizationEvaluator(this, cloner);130 118 } 131 119 … … 148 136 private static readonly object locker = new object(); 149 137 public override IOperation InstrumentedApply() { 138 var originalTree = SymbolicExpressionTreeParameter.ActualValue; 150 139 var solution = SymbolicExpressionTreeParameter.ActualValue; 151 double quality; 140 var problemData = ProblemDataParameter.ActualValue; 141 152 142 if (RandomParameter.ActualValue.NextDouble() < ConstantOptimizationProbability.Value) { 153 143 IEnumerable<int> constantOptimizationRows = GenerateRowsToEvaluate(ConstantOptimizationRowsPercentage.Value); 154 var counter = new EvaluationsCounter(); 155 quality = OptimizeConstants(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, ProblemDataParameter.ActualValue, 156 constantOptimizationRows, ApplyLinearScalingParameter.ActualValue.Value, ConstantOptimizationIterations.Value, updateVariableWeights: UpdateVariableWeights, lowerEstimationLimit: EstimationLimitsParameter.ActualValue.Lower, upperEstimationLimit: EstimationLimitsParameter.ActualValue.Upper, updateConstantsInTree: UpdateConstantsInTree, counter: counter); 144 EvaluationsCounter counter = null; 145 if (CountEvaluations) counter = new EvaluationsCounter(); 146 147 var optimizedTree = OptimizeConstants(originalTree, problemData, constantOptimizationRows, counter: counter); 148 149 double quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate( 150 SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, optimizedTree, 151 EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, constantOptimizationRows, ApplyLinearScalingParameter.ActualValue.Value); 157 152 158 153 if (ConstantOptimizationRowsPercentage.Value != RelativeNumberOfEvaluatedSamplesParameter.ActualValue.Value) { 159 154 var evaluationRows = GenerateRowsToEvaluate(); 160 quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value); 155 quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate( 156 SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, 157 EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value); 161 158 } 162 159 160 SymbolicExpressionTreeParameter.ActualValue = optimizedTree; 161 QualityParameter.ActualValue = new DoubleValue(quality); 163 162 if (CountEvaluations) { 164 163 lock (locker) { … … 170 169 } else { 171 170 var evaluationRows = GenerateRowsToEvaluate(); 172 quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value); 171 double quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate( 172 SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, 173 EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value); 174 QualityParameter.ActualValue = new DoubleValue(quality); 173 175 } 174 QualityParameter.ActualValue = new DoubleValue(quality);175 176 176 177 return base.InstrumentedApply(); … … 198 199 } 199 200 201 protected abstract ISymbolicExpressionTree OptimizeConstants( 202 ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows, 203 CancellationToken cancellationToken = default(CancellationToken), EvaluationsCounter counter = null); 204 200 205 public class EvaluationsCounter { 201 206 public int FunctionEvaluations = 0; 202 207 public int GradientEvaluations = 0; 203 208 } 204 205 public static double OptimizeConstants(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,206 ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling,207 int maxIterations, bool updateVariableWeights = true,208 double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue,209 bool updateConstantsInTree = true, Action<double[], double, object> iterationCallback = null, EvaluationsCounter counter = null) {210 211 // numeric constants in the tree become variables for constant opt212 // variables in the tree become parameters (fixed values) for constant opt213 // for each parameter (variable in the original tree) we store the214 // variable name, variable value (for factor vars) and lag as a DataForVariable object.215 // A dictionary is used to find parameters216 double[] initialConstants;217 var parameters = new List<TreeToAutoDiffTermConverter.DataForVariable>();218 219 TreeToAutoDiffTermConverter.ParametricFunction func;220 TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad;221 if (!TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, updateVariableWeights, applyLinearScaling, out parameters, out initialConstants, out func, out func_grad))222 throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree.");223 if (parameters.Count == 0) return 0.0; // gkronber: constant expressions always have a R² of 0.0224 var parameterEntries = parameters.ToArray(); // order of entries must be the same for x225 226 //extract inital constants227 double[] c;228 if (applyLinearScaling) {229 c = new double[initialConstants.Length + 2];230 c[0] = 0.0;231 c[1] = 1.0;232 Array.Copy(initialConstants, 0, c, 2, initialConstants.Length);233 } else {234 c = (double[])initialConstants.Clone();235 }236 237 double originalQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);238 239 if (counter == null) counter = new EvaluationsCounter();240 var rowEvaluationsCounter = new EvaluationsCounter();241 242 alglib.lsfitstate state;243 alglib.lsfitreport rep;244 int retVal;245 246 IDataset ds = problemData.Dataset;247 double[,] x = new double[rows.Count(), parameters.Count];248 int row = 0;249 foreach (var r in rows) {250 int col = 0;251 foreach (var info in parameterEntries) {252 if (ds.VariableHasType<double>(info.variableName)) {253 x[row, col] = ds.GetDoubleValue(info.variableName, r + info.lag);254 } else if (ds.VariableHasType<string>(info.variableName)) {255 x[row, col] = ds.GetStringValue(info.variableName, r) == info.variableValue ? 1 : 0;256 } else throw new InvalidProgramException("found a variable of unknown type");257 col++;258 }259 row++;260 }261 double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray();262 int n = x.GetLength(0);263 int m = x.GetLength(1);264 int k = c.Length;265 266 alglib.ndimensional_pfunc function_cx_1_func = CreatePFunc(func);267 alglib.ndimensional_pgrad function_cx_1_grad = CreatePGrad(func_grad);268 alglib.ndimensional_rep xrep = (p, f, obj) => iterationCallback(p, f, obj);269 270 try {271 alglib.lsfitcreatefg(x, y, c, n, m, k, false, out state);272 alglib.lsfitsetcond(state, 0.0, 0.0, maxIterations);273 alglib.lsfitsetxrep(state, iterationCallback != null);274 //alglib.lsfitsetgradientcheck(state, 0.001);275 alglib.lsfitfit(state, function_cx_1_func, function_cx_1_grad, xrep, rowEvaluationsCounter);276 alglib.lsfitresults(state, out retVal, out c, out rep);277 } catch (ArithmeticException) {278 return originalQuality;279 } catch (alglib.alglibexception) {280 return originalQuality;281 }282 283 counter.FunctionEvaluations += rowEvaluationsCounter.FunctionEvaluations / n;284 counter.GradientEvaluations += rowEvaluationsCounter.GradientEvaluations / n;285 286 //retVal == -7 => constant optimization failed due to wrong gradient287 if (retVal != -7) {288 if (applyLinearScaling) {289 var tmp = new double[c.Length - 2];290 Array.Copy(c, 2, tmp, 0, tmp.Length);291 UpdateConstants(tree, tmp, updateVariableWeights);292 } else UpdateConstants(tree, c, updateVariableWeights);293 }294 var quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);295 296 if (!updateConstantsInTree) UpdateConstants(tree, initialConstants, updateVariableWeights);297 298 if (originalQuality - quality > 0.001 || double.IsNaN(quality)) {299 UpdateConstants(tree, initialConstants, updateVariableWeights);300 return originalQuality;301 }302 return quality;303 }304 305 private static void UpdateConstants(ISymbolicExpressionTree tree, double[] constants, bool updateVariableWeights) {306 int i = 0;307 foreach (var node in tree.Root.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>()) {308 ConstantTreeNode constantTreeNode = node as ConstantTreeNode;309 VariableTreeNodeBase variableTreeNodeBase = node as VariableTreeNodeBase;310 FactorVariableTreeNode factorVarTreeNode = node as FactorVariableTreeNode;311 if (constantTreeNode != null)312 constantTreeNode.Value = constants[i++];313 else if (updateVariableWeights && variableTreeNodeBase != null)314 variableTreeNodeBase.Weight = constants[i++];315 else if (factorVarTreeNode != null) {316 for (int j = 0; j < factorVarTreeNode.Weights.Length; j++)317 factorVarTreeNode.Weights[j] = constants[i++];318 }319 }320 }321 322 private static alglib.ndimensional_pfunc CreatePFunc(TreeToAutoDiffTermConverter.ParametricFunction func) {323 return (double[] c, double[] x, ref double fx, object o) => {324 fx = func(c, x);325 var counter = (EvaluationsCounter)o;326 counter.FunctionEvaluations++;327 };328 }329 330 private static alglib.ndimensional_pgrad CreatePGrad(TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad) {331 return (double[] c, double[] x, ref double fx, double[] grad, object o) => {332 var tuple = func_grad(c, x);333 fx = tuple.Item2;334 Array.Copy(tuple.Item1, grad, grad.Length);335 var counter = (EvaluationsCounter)o;336 counter.GradientEvaluations++;337 };338 }339 public static bool CanOptimizeConstants(ISymbolicExpressionTree tree) {340 return TreeToAutoDiffTermConverter.IsCompatible(tree);341 }342 209 } 343 210 }
Note: See TracChangeset
for help on using the changeset viewer.