Changeset 4297
- Timestamp:
- 08/23/10 18:54:35 (14 years ago)
- Location:
- branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs
r4272 r4297 146 146 get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters["Quality"]; } 147 147 } 148 public ScopeTreeLookupParameter<DoubleValue> ValidationQualityParameter { 149 get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters["ValidationQuality"]; } 150 } 148 151 149 152 public ILookupParameter<IntValue> GenerationsParameter { … … 237 240 Parameters.Add(new LookupParameter<SymbolicRegressionSolution>("BestTrainingSolution")); 238 241 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("Quality")); 242 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ValidationQuality")); 239 243 Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far.")); 240 244 Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution.")); … … 267 271 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("Quality")); 268 272 } 273 if (!Parameters.ContainsKey("ValidationQuality")) { 274 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ValidationQuality")); 275 } 269 276 #endregion 270 277 } … … 291 298 SymbolicExpressionTree bestTrainingTree = trees[0]; 292 299 double bestTrainingQuality = qualities[0].Value; 300 ItemArray<DoubleValue> validationQualites = new ItemArray<DoubleValue>(qualities.Length); 293 301 for (int i = 0; i < trees.Length; i++) { 294 302 SymbolicExpressionTree tree = trees[i]; … … 297 305 ProblemData.Dataset, targetVariable, 298 306 rows); 299 307 validationQualites[i] = new DoubleValue(quality); 300 308 if ((Maximization.Value && quality > bestQuality) || 301 309 (!Maximization.Value && quality < bestQuality)) { … … 309 317 } 310 318 } 319 ValidationQualityParameter.ActualValue = validationQualites; 311 320 312 321 var scaledBestTrainingTree = GetScaledTree(bestTrainingTree); -
branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/OverfittingAnalyzer.cs
r4275 r4297 61 61 get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters["Quality"]; } 62 62 } 63 public ScopeTreeLookupParameter<DoubleValue> ValidationQualityParameter { 64 get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters["ValidationQuality"]; } 65 } 63 66 public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter { 64 67 get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; } … … 163 166 Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze.")); 164 167 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("Quality")); 168 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ValidationQuality")); 165 169 Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization.")); 166 170 Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees.")); … … 180 184 Parameters.Add(new LookupParameter<DoubleValue>("InitialTrainingQuality")); 181 185 Parameters.Add(new LookupParameter<DoubleMatrix>("TrainingAndValidationQualities")); 182 Parameters.Add(new ValueLookupParameter<DoubleValue>("Percentile", new DoubleValue( 0.1)));186 Parameters.Add(new ValueLookupParameter<DoubleValue>("Percentile", new DoubleValue(1))); 183 187 184 188 } … … 202 206 } 203 207 if (!Parameters.ContainsKey("Percentile")) { 204 Parameters.Add(new ValueLookupParameter<DoubleValue>("Percentile", new DoubleValue(0.1))); 208 Parameters.Add(new ValueLookupParameter<DoubleValue>("Percentile", new DoubleValue(1))); 209 } 210 if (!Parameters.ContainsKey("ValidationQuality")) { 211 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ValidationQuality")); 205 212 } 206 213 } … … 209 216 var trees = SymbolicExpressionTree; 210 217 ItemArray<DoubleValue> qualities = QualityParameter.ActualValue; 218 ItemArray<DoubleValue> validationQualities = ValidationQualityParameter.ActualValue; 211 219 212 220 string targetVariable = ProblemData.TargetVariable.Value; … … 226 234 //SymbolicExpressionTree bestTree = null; 227 235 228 List<double> validationQualities = new List<double>();229 foreach (var tree in trees) {230 double quality = Evaluator.Evaluate(SymbolicExpressionTreeInterpreter, tree,231 lowerEstimationLimit, upperEstimationLimit,232 ProblemData.Dataset, targetVariable,233 rows);234 validationQualities.Add(quality);235 //if ((Maximization.Value && quality > bestQuality) ||236 // (!Maximization.Value && quality < bestQuality)) {237 // bestQuality = quality;238 // bestTree = tree;239 //}240 }236 //List<double> validationQualities = new List<double>(); 237 //foreach (var tree in trees) { 238 // double quality = Evaluator.Evaluate(SymbolicExpressionTreeInterpreter, tree, 239 // lowerEstimationLimit, upperEstimationLimit, 240 // ProblemData.Dataset, targetVariable, 241 // rows); 242 // validationQualities.Add(quality); 243 // //if ((Maximization.Value && quality > bestQuality) || 244 // // (!Maximization.Value && quality < bestQuality)) { 245 // // bestQuality = quality; 246 // // bestTree = tree; 247 // //} 248 //} 241 249 242 250 //if (RelativeValidationQualityParameter.ActualValue == null) { 243 251 // first call initialize the relative quality using the difference between average training and validation quality 244 252 double avgTrainingQuality = qualities.Select(x => x.Value).Median(); 245 double avgValidationQuality = validationQualities. Median();253 double avgValidationQuality = validationQualities.Select(x => x.Value).Median(); 246 254 247 255 if (Maximization.Value) … … 254 262 // best first (only for maximization 255 263 var orderedDistinctPairs = (from index in Enumerable.Range(0, qualities.Length) 256 select new { Training = qualities[index].Value, Validation = validationQualities[index] }) 257 .Distinct() 264 select new { Training = qualities[index].Value, Validation = validationQualities[index].Value }) 258 265 .OrderBy(x => -x.Training) 259 266 .ToList(); -
branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionTournamentPruning.cs
r4195 r4297 31 31 using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols; 32 32 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 33 using System; 33 34 34 35 namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers { … … 62 63 get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; } 63 64 } 65 public ScopeTreeLookupParameter<DoubleValue> QualityParameter { 66 get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters["Quality"]; } 67 } 64 68 public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter { 65 69 get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; } … … 80 84 get { return (IValueLookupParameter<IntValue>)Parameters[SamplesEndParameterName]; } 81 85 } 86 public IValueLookupParameter<PercentValue> RelativeNumberOfEvaluatedRowsParameters { 87 get { return (IValueLookupParameter<PercentValue>)Parameters["RelativeNumberOfEvaluatedRows"]; } 88 } 82 89 public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter { 83 90 get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; } … … 115 122 public ILookupParameter<ResultCollection> ResultsParameter { 116 123 get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; } 124 } 125 public IValueLookupParameter<BoolValue> ApplyPruningParameter { 126 get { return (IValueLookupParameter<BoolValue>)Parameters["ApplyPruning"]; } 117 127 } 118 128 #endregion … … 176 186 } 177 187 #endregion 188 [StorableConstructor] 178 189 protected SymbolicRegressionTournamentPruning(bool deserializing) : base(deserializing) { } 179 190 public SymbolicRegressionTournamentPruning() … … 181 192 Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "A random number generator.")); 182 193 Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to prune.")); 194 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("Quality")); 183 195 Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The data analysis problem data to use for branch impact evaluation.")); 184 196 Parameters.Add(new LookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter to use for node impact evaluation")); … … 187 199 Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator that should be used to determine which branches are not relevant.")); 188 200 Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization.")); 201 Parameters.Add(new ValueLookupParameter<BoolValue>("ApplyPruning")); 189 202 Parameters.Add(new ValueLookupParameter<DoubleValue>(MaxPruningRatioParameterName, "The maximal relative size of the pruned branch.", new DoubleValue(0.5))); 190 203 Parameters.Add(new ValueLookupParameter<IntValue>(TournamentSizeParameterName, "The number of branches to compare for pruning", new IntValue(10))); … … 199 212 Parameters.Add(new LookupParameter<IntValue>(GenerationParameterName, "The current generation.")); 200 213 Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The results collection.")); 214 Parameters.Add(new ValueLookupParameter<PercentValue>("RelativeNumberOfEvaluatedRows", new PercentValue(1.0))); 201 215 } 202 216 … … 210 224 Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization.")); 211 225 } 226 if (!Parameters.ContainsKey("ApplyPruning")) { 227 Parameters.Add(new ValueLookupParameter<BoolValue>("ApplyPruning")); 228 } 229 if (!Parameters.ContainsKey("Quality")) { 230 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("Quality")); 231 } 232 if (!Parameters.ContainsKey("RelativeNumberOfEvaluatedRows")) { 233 Parameters.Add(new ValueLookupParameter<PercentValue>("RelativeNumberOfEvaluatedRows", new PercentValue(1.0))); 234 } 235 212 236 #endregion 213 237 } … … 215 239 public override IOperation Apply() { 216 240 bool pruningCondition = 241 (ApplyPruningParameter.ActualValue.Value) && 217 242 (Generation.Value >= FirstPruningGeneration.Value) && 218 243 ((Generation.Value - FirstPruningGeneration.Value) % PruningFrequency.Value == 0); … … 222 247 double percentileEnd = PopulationPercentileEnd.Value; 223 248 // for each tree in the given percentile 224 var trees = SymbolicExpressionTree 225 .Skip((int)(n * percentileStart)) 226 .Take((int)(n * (percentileEnd - percentileStart))); 227 foreach (var tree in trees) { 228 Prune(Random, tree, Iterations.Value, TournamentSize.Value, 229 DataAnalysisProblemData, SamplesStart.Value, SamplesEnd.Value, 249 ItemArray<SymbolicExpressionTree> trees = SymbolicExpressionTree; 250 ItemArray<DoubleValue> quality = QualityParameter.ActualValue; 251 bool maximization = Maximization.Value; 252 var selectedTrees = (from index in Enumerable.Range(0, n) 253 orderby maximization ? -quality[index].Value : quality[index].Value 254 select new { Tree = trees[index], Quality = quality[index] }) 255 .Skip((int)(n * percentileStart)) 256 .Take((int)(n * (percentileEnd - percentileStart))); 257 foreach (var pair in selectedTrees) { 258 Prune(Random, pair.Tree, pair.Quality, Iterations.Value, TournamentSize.Value, 259 DataAnalysisProblemData, SamplesStart.Value, SamplesEnd.Value, RelativeNumberOfEvaluatedRowsParameters.ActualValue.Value, 230 260 SymbolicExpressionTreeInterpreter, Evaluator, Maximization.Value, 231 261 LowerEstimationLimit.Value, UpperEstimationLimit.Value, … … 236 266 } 237 267 238 public static void Prune(IRandom random, SymbolicExpressionTree tree, int iterations, int tournamentSize,239 DataAnalysisProblemData problemData, int samplesStart, int samplesEnd, 268 public static void Prune(IRandom random, SymbolicExpressionTree tree, DoubleValue quality, int iterations, int tournamentSize, 269 DataAnalysisProblemData problemData, int samplesStart, int samplesEnd, double relativeNumberOfEvaluatedRows, 240 270 ISymbolicExpressionTreeInterpreter interpreter, ISymbolicRegressionEvaluator evaluator, bool maximization, 241 271 double lowerEstimationLimit, double upperEstimationLimit, 242 272 double maxPruningRatio, double qualityGainWeight) { 243 IEnumerable<int> rows = Enumerable.Range(samplesStart, samplesEnd - samplesStart); 273 274 IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(samplesStart, samplesEnd, (int)Math.Ceiling((samplesEnd - samplesStart) * relativeNumberOfEvaluatedRows)); 244 275 int originalSize = tree.Size; 245 double originalQuality = evaluator.Evaluate(interpreter, tree,246 lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, rows);247 276 248 277 int minPrunedSize = (int)(originalSize * (1 - maxPruningRatio)); 249 250 278 // tree for branch evaluation 251 279 SymbolicExpressionTree templateTree = (SymbolicExpressionTree)tree.Clone(); … … 253 281 254 282 SymbolicExpressionTree prunedTree = tree; 283 double currentQuality = quality.Value; 255 284 for (int iteration = 0; iteration < iterations; iteration++) { 256 285 SymbolicExpressionTree iterationBestTree = prunedTree; … … 261 290 var clonedTree = (SymbolicExpressionTree)prunedTree.Clone(); 262 291 int clonedTreeSize = clonedTree.Size; 263 var prunePoints = (from node in clonedTree. IterateNodesPostfix()292 var prunePoints = (from node in clonedTree.Root.SubTrees[0].IterateNodesPostfix() 264 293 from subTree in node.SubTrees 265 294 let subTreeSize = subTree.GetSize() … … 280 309 281 310 double prunedQuality = evaluator.Evaluate(interpreter, clonedTree, 282 lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, Enumerable.Range(samplesStart, samplesEnd - samplesStart));311 lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, rows); 283 312 double prunedSize = clonedTree.Size; 284 313 // deteriation in quality: … … 289 318 // R² : newR² < origR² (deteriation) => prefer smaller deteriation 290 319 // R² : minimize: origR² / newR² 291 double qualityDeteriation = maximization ? originalQuality / prunedQuality : prunedQuality / originalQuality;320 double qualityDeteriation = maximization ? quality.Value / prunedQuality : prunedQuality / quality.Value; 292 321 // size of the pruned tree is always smaller than the size of the original tree 293 322 // same change in quality => prefer pruning operation that removes a larger tree … … 297 326 bestGain = gain; 298 327 iterationBestTree = clonedTree; 328 currentQuality = prunedQuality; 299 329 } 300 330 } … … 302 332 prunedTree = iterationBestTree; 303 333 } 334 335 quality.Value = currentQuality; 304 336 tree.Root = prunedTree.Root; 305 337 }
Note: See TracChangeset
for help on using the changeset viewer.