Changeset 4191 for trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionTournamentPruning.cs
- Timestamp:
- 08/11/10 12:00:53 (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionTournamentPruning.cs
r4068 r4191 30 30 using HeuristicLab.Problems.DataAnalysis.Symbolic; 31 31 using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols; 32 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 32 33 33 34 namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers { … … 38 39 private const string SamplesStartParameterName = "SamplesStart"; 39 40 private const string SamplesEndParameterName = "SamplesEnd"; 41 private const string EvaluatorParameterName = "Evaluator"; 42 private const string MaximizationParameterName = "Maximization"; 40 43 private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter"; 41 44 private const string UpperEstimationLimitParameterName = "UpperEstimationLimit"; … … 77 80 get { return (IValueLookupParameter<IntValue>)Parameters[SamplesEndParameterName]; } 78 81 } 82 public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter { 83 get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; } 84 } 85 public ILookupParameter<BoolValue> MaximizationParameter { 86 get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; } 87 } 79 88 public IValueLookupParameter<DoubleValue> MaxPruningRatioParameter { 80 89 get { return (IValueLookupParameter<DoubleValue>)Parameters[MaxPruningRatioParameterName]; } … … 133 142 get { return SamplesEndParameter.ActualValue; } 134 143 } 144 public ISymbolicRegressionEvaluator Evaluator { 145 get { return EvaluatorParameter.ActualValue; } 146 } 147 public BoolValue Maximization { 148 get { return MaximizationParameter.ActualValue; } 149 } 135 150 public DoubleValue MaxPruningRatio { 136 151 get { return MaxPruningRatioParameter.ActualValue; } … … 161 176 } 162 177 #endregion 178 protected SymbolicRegressionTournamentPruning(bool deserializing) : base(deserializing) { } 163 179 public SymbolicRegressionTournamentPruning() 164 180 : base() { … … 169 185 Parameters.Add(new ValueLookupParameter<IntValue>(SamplesStartParameterName, "The first row index of the dataset partition to use for branch impact evaluation.")); 170 186 Parameters.Add(new ValueLookupParameter<IntValue>(SamplesEndParameterName, "The last row index of the dataset partition to use for branch impact evaluation.")); 187 Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator that should be used to determine which branches are not relevant.")); 188 Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization.")); 171 189 Parameters.Add(new ValueLookupParameter<DoubleValue>(MaxPruningRatioParameterName, "The maximal relative size of the pruned branch.", new DoubleValue(0.5))); 172 190 Parameters.Add(new ValueLookupParameter<IntValue>(TournamentSizeParameterName, "The number of branches to compare for pruning", new IntValue(10))); … … 181 199 Parameters.Add(new LookupParameter<IntValue>(GenerationParameterName, "The current generation.")); 182 200 Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The results collection.")); 201 } 202 203 [StorableHook(HookType.AfterDeserialization)] 204 private void AfterDeserialization() { 205 #region compatibility remove before releasing 3.3.1 206 if (!Parameters.ContainsKey(EvaluatorParameterName)) { 207 Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set.")); 208 } 209 if (!Parameters.ContainsKey(MaximizationParameterName)) { 210 Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization.")); 211 } 212 #endregion 183 213 } 184 214 … … 198 228 Prune(Random, tree, Iterations.Value, TournamentSize.Value, 199 229 DataAnalysisProblemData, SamplesStart.Value, SamplesEnd.Value, 200 SymbolicExpressionTreeInterpreter, 230 SymbolicExpressionTreeInterpreter, Evaluator, Maximization.Value, 201 231 LowerEstimationLimit.Value, UpperEstimationLimit.Value, 202 232 MaxPruningRatio.Value, QualityGainWeight.Value); … … 208 238 public static void Prune(IRandom random, SymbolicExpressionTree tree, int iterations, int tournamentSize, 209 239 DataAnalysisProblemData problemData, int samplesStart, int samplesEnd, 210 ISymbolicExpressionTreeInterpreter interpreter, 240 ISymbolicExpressionTreeInterpreter interpreter, ISymbolicRegressionEvaluator evaluator, bool maximization, 211 241 double lowerEstimationLimit, double upperEstimationLimit, 212 242 double maxPruningRatio, double qualityGainWeight) { 213 243 IEnumerable<int> rows = Enumerable.Range(samplesStart, samplesEnd - samplesStart); 214 244 int originalSize = tree.Size; 215 double original Mse = SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(interpreter, tree,216 lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, Enumerable.Range(samplesStart, samplesEnd - samplesStart));245 double originalQuality = evaluator.Evaluate(interpreter, tree, 246 lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, rows); 217 247 218 248 int minPrunedSize = (int)(originalSize * (1 - maxPruningRatio)); … … 249 279 selectedPrunePoint.Parent.InsertSubTree(selectedPrunePoint.SubTreeIndex, constNode); 250 280 251 double pruned Mse = SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(interpreter, clonedTree,281 double prunedQuality = evaluator.Evaluate(interpreter, clonedTree, 252 282 lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, Enumerable.Range(samplesStart, samplesEnd - samplesStart)); 253 283 double prunedSize = clonedTree.Size; 254 // MSE of the pruned tree is larger than the original tree in most cases 284 // deteriation in quality: 285 // exp: MSE : newMse < origMse (improvement) => prefer the larger improvement 286 // MSE : newMse > origMse (deteriation) => prefer the smaller deteriation 287 // MSE : minimize: newMse / origMse 288 // R² : newR² > origR² (improvment) => prefer the larger improvment 289 // R² : newR² < origR² (deteriation) => prefer smaller deteriation 290 // R² : minimize: origR² / newR² 291 double qualityDeteriation = maximization ? originalQuality / prunedQuality : prunedQuality / originalQuality; 255 292 // size of the pruned tree is always smaller than the size of the original tree 256 293 // same change in quality => prefer pruning operation that removes a larger tree 257 double gain = ( (prunedMse / originalMse)* qualityGainWeight) /294 double gain = (qualityDeteriation * qualityGainWeight) / 258 295 (originalSize / prunedSize); 259 296 if (gain < bestGain) {
Note: See TracChangeset
for help on using the changeset viewer.