Changeset 4191
- Timestamp:
- 08/11/10 12:00:53 (14 years ago)
- Location:
- trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs
r4127 r4191 46 46 private const string ValidationSamplesStartParameterName = "SamplesStart"; 47 47 private const string ValidationSamplesEndParameterName = "SamplesEnd"; 48 private const string QualityParameterName = "Quality";48 // private const string QualityParameterName = "Quality"; 49 49 private const string UpperEstimationLimitParameterName = "UpperEstimationLimit"; 50 50 private const string LowerEstimationLimitParameterName = "LowerEstimationLimit"; 51 private const string EvaluatorParameterName = "Evaluator"; 52 private const string MaximizationParameterName = "Maximization"; 51 53 private const string BestSolutionParameterName = "Best solution (validation)"; 52 54 private const string BestSolutionQualityParameterName = "Best solution quality (validation)"; … … 109 111 get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; } 110 112 } 113 public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter { 114 get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; } 115 } 116 public ILookupParameter<BoolValue> MaximizationParameter { 117 get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; } 118 } 111 119 public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter { 112 120 get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; } … … 158 166 get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; } 159 167 } 168 public ISymbolicRegressionEvaluator Evaluator { 169 get { return EvaluatorParameter.ActualValue; } 170 } 171 public BoolValue Maximization { 172 get { return MaximizationParameter.ActualValue; } 173 } 160 174 public DataAnalysisProblemData ProblemData { 161 175 get { return ProblemDataParameter.ActualValue; } … … 185 199 public IntValue Generations { 186 200 get { return GenerationsParameter.ActualValue; } 201 } 202 public DoubleValue BestSolutionQuality { 203 get { return BestSolutionQualityParameter.ActualValue; } 187 204 } 188 205 … … 192 209 : base() { 193 210 Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use.")); 211 Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set.")); 194 212 Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze.")); 195 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The quality of the symbolic expression trees to analyze."));213 Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization.")); 196 214 Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees.")); 197 215 Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution.")); … … 212 230 private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base() { } 213 231 232 [StorableHook(HookType.AfterDeserialization)] 233 private void AfterDeserialization() { 234 #region compatibility remove before releasing 3.3.1 235 if (!Parameters.ContainsKey(EvaluatorParameterName)) { 236 Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set.")); 237 } 238 if (!Parameters.ContainsKey(MaximizationParameterName)) { 239 Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization.")); 240 } 241 #endregion 242 } 243 214 244 public override IOperation Apply() { 215 245 var trees = SymbolicExpressionTree; … … 228 258 double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity; 229 259 230 double best ValidationRSquared = -1.0;260 double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity; 231 261 SymbolicExpressionTree bestTree = null; 232 262 233 263 foreach (var tree in trees) { 234 double validationRSquared = SymbolicRegressionPearsonsRSquaredEvaluator.Calculate(SymbolicExpressionTreeInterpreter, tree,264 double quality = Evaluator.Evaluate(SymbolicExpressionTreeInterpreter, tree, 235 265 lowerEstimationLimit, upperEstimationLimit, 236 266 ProblemData.Dataset, targetVariable, 237 267 rows); 238 268 239 if (validationRSquared > bestValidationRSquared) { 240 bestValidationRSquared = validationRSquared; 269 if ((Maximization.Value && quality > bestQuality) || 270 (!Maximization.Value && quality < bestQuality)) { 271 bestQuality = quality; 241 272 bestTree = tree; 242 273 } 243 274 } 244 275 245 246 276 // if the best validation tree is better than the current best solution => update 247 if (BestSolutionQualityParameter.ActualValue == null || BestSolutionQualityParameter.ActualValue.Value < bestValidationRSquared) { 248 // calculate scaling parameters and validation MSE only for the best tree 249 // scale tree for solution 277 bool newBest = 278 BestSolutionQuality == null || 279 (Maximization.Value && bestQuality > BestSolutionQuality.Value) || 280 (!Maximization.Value && bestQuality < BestSolutionQuality.Value); 281 if (newBest) { 282 // calculate scaling parameters and only for the best tree using the full training set 250 283 double alpha, beta; 251 double validationMSE = SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree, 284 int trainingStart = ProblemData.TrainingSamplesStart.Value; 285 int trainingEnd = ProblemData.TrainingSamplesEnd.Value; 286 IEnumerable<int> trainingRows = Enumerable.Range(trainingStart, trainingEnd - trainingStart); 287 SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree, 252 288 lowerEstimationLimit, upperEstimationLimit, 253 289 ProblemData.Dataset, targetVariable, 254 rows, out beta, out alpha); 255 290 trainingRows, out beta, out alpha); 291 292 // scale tree for solution 256 293 var scaledTree = SymbolicRegressionSolutionLinearScaler.Scale(bestTree, alpha, beta); 257 294 var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(), … … 262 299 263 300 BestSolutionParameter.ActualValue = solution; 264 BestSolutionQualityParameter.ActualValue = new DoubleValue(best ValidationRSquared);301 BestSolutionQualityParameter.ActualValue = new DoubleValue(bestQuality); 265 302 266 303 BestSymbolicRegressionSolutionAnalyzer.UpdateBestSolutionResults(solution, ProblemData, Results, Generations, VariableFrequencies); 267 304 } 305 268 306 269 307 if (!Results.ContainsKey(BestSolutionQualityValuesParameterName)) { … … 273 311 } 274 312 Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value); 275 Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(best ValidationRSquared);313 Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestQuality); 276 314 277 315 DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value; 278 316 AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName); 279 AddValue(validationValues, best ValidationRSquared, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);317 AddValue(validationValues, bestQuality, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName); 280 318 return base.Apply(); 281 319 } -
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionTournamentPruning.cs
r4068 r4191 30 30 using HeuristicLab.Problems.DataAnalysis.Symbolic; 31 31 using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols; 32 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 32 33 33 34 namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers { … … 38 39 private const string SamplesStartParameterName = "SamplesStart"; 39 40 private const string SamplesEndParameterName = "SamplesEnd"; 41 private const string EvaluatorParameterName = "Evaluator"; 42 private const string MaximizationParameterName = "Maximization"; 40 43 private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter"; 41 44 private const string UpperEstimationLimitParameterName = "UpperEstimationLimit"; … … 77 80 get { return (IValueLookupParameter<IntValue>)Parameters[SamplesEndParameterName]; } 78 81 } 82 public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter { 83 get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; } 84 } 85 public ILookupParameter<BoolValue> MaximizationParameter { 86 get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; } 87 } 79 88 public IValueLookupParameter<DoubleValue> MaxPruningRatioParameter { 80 89 get { return (IValueLookupParameter<DoubleValue>)Parameters[MaxPruningRatioParameterName]; } … … 133 142 get { return SamplesEndParameter.ActualValue; } 134 143 } 144 public ISymbolicRegressionEvaluator Evaluator { 145 get { return EvaluatorParameter.ActualValue; } 146 } 147 public BoolValue Maximization { 148 get { return MaximizationParameter.ActualValue; } 149 } 135 150 public DoubleValue MaxPruningRatio { 136 151 get { return MaxPruningRatioParameter.ActualValue; } … … 161 176 } 162 177 #endregion 178 protected SymbolicRegressionTournamentPruning(bool deserializing) : base(deserializing) { } 163 179 public SymbolicRegressionTournamentPruning() 164 180 : base() { … … 169 185 Parameters.Add(new ValueLookupParameter<IntValue>(SamplesStartParameterName, "The first row index of the dataset partition to use for branch impact evaluation.")); 170 186 Parameters.Add(new ValueLookupParameter<IntValue>(SamplesEndParameterName, "The last row index of the dataset partition to use for branch impact evaluation.")); 187 Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator that should be used to determine which branches are not relevant.")); 188 Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization.")); 171 189 Parameters.Add(new ValueLookupParameter<DoubleValue>(MaxPruningRatioParameterName, "The maximal relative size of the pruned branch.", new DoubleValue(0.5))); 172 190 Parameters.Add(new ValueLookupParameter<IntValue>(TournamentSizeParameterName, "The number of branches to compare for pruning", new IntValue(10))); … … 181 199 Parameters.Add(new LookupParameter<IntValue>(GenerationParameterName, "The current generation.")); 182 200 Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The results collection.")); 201 } 202 203 [StorableHook(HookType.AfterDeserialization)] 204 private void AfterDeserialization() { 205 #region compatibility remove before releasing 3.3.1 206 if (!Parameters.ContainsKey(EvaluatorParameterName)) { 207 Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set.")); 208 } 209 if (!Parameters.ContainsKey(MaximizationParameterName)) { 210 Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization.")); 211 } 212 #endregion 183 213 } 184 214 … … 198 228 Prune(Random, tree, Iterations.Value, TournamentSize.Value, 199 229 DataAnalysisProblemData, SamplesStart.Value, SamplesEnd.Value, 200 SymbolicExpressionTreeInterpreter, 230 SymbolicExpressionTreeInterpreter, Evaluator, Maximization.Value, 201 231 LowerEstimationLimit.Value, UpperEstimationLimit.Value, 202 232 MaxPruningRatio.Value, QualityGainWeight.Value); … … 208 238 public static void Prune(IRandom random, SymbolicExpressionTree tree, int iterations, int tournamentSize, 209 239 DataAnalysisProblemData problemData, int samplesStart, int samplesEnd, 210 ISymbolicExpressionTreeInterpreter interpreter, 240 ISymbolicExpressionTreeInterpreter interpreter, ISymbolicRegressionEvaluator evaluator, bool maximization, 211 241 double lowerEstimationLimit, double upperEstimationLimit, 212 242 double maxPruningRatio, double qualityGainWeight) { 213 243 IEnumerable<int> rows = Enumerable.Range(samplesStart, samplesEnd - samplesStart); 214 244 int originalSize = tree.Size; 215 double original Mse = SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(interpreter, tree,216 lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, Enumerable.Range(samplesStart, samplesEnd - samplesStart));245 double originalQuality = evaluator.Evaluate(interpreter, tree, 246 lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, rows); 217 247 218 248 int minPrunedSize = (int)(originalSize * (1 - maxPruningRatio)); … … 249 279 selectedPrunePoint.Parent.InsertSubTree(selectedPrunePoint.SubTreeIndex, constNode); 250 280 251 double pruned Mse = SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(interpreter, clonedTree,281 double prunedQuality = evaluator.Evaluate(interpreter, clonedTree, 252 282 lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, Enumerable.Range(samplesStart, samplesEnd - samplesStart)); 253 283 double prunedSize = clonedTree.Size; 254 // MSE of the pruned tree is larger than the original tree in most cases 284 // deteriation in quality: 285 // exp: MSE : newMse < origMse (improvement) => prefer the larger improvement 286 // MSE : newMse > origMse (deteriation) => prefer the smaller deteriation 287 // MSE : minimize: newMse / origMse 288 // R² : newR² > origR² (improvment) => prefer the larger improvment 289 // R² : newR² < origR² (deteriation) => prefer smaller deteriation 290 // R² : minimize: origR² / newR² 291 double qualityDeteriation = maximization ? originalQuality / prunedQuality : prunedQuality / originalQuality; 255 292 // size of the pruned tree is always smaller than the size of the original tree 256 293 // same change in quality => prefer pruning operation that removes a larger tree 257 double gain = ( (prunedMse / originalMse)* qualityGainWeight) /294 double gain = (qualityDeteriation * qualityGainWeight) / 258 295 (originalSize / prunedSize); 259 296 if (gain < bestGain) {
Note: See TracChangeset
for help on using the changeset viewer.