Changeset 4297 for branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionTournamentPruning.cs
- Timestamp:
- 08/23/10 18:54:35 (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionTournamentPruning.cs
r4195 r4297 31 31 using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols; 32 32 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 33 using System; 33 34 34 35 namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers { … … 62 63 get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; } 63 64 } 65 public ScopeTreeLookupParameter<DoubleValue> QualityParameter { 66 get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters["Quality"]; } 67 } 64 68 public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter { 65 69 get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; } … … 80 84 get { return (IValueLookupParameter<IntValue>)Parameters[SamplesEndParameterName]; } 81 85 } 86 public IValueLookupParameter<PercentValue> RelativeNumberOfEvaluatedRowsParameters { 87 get { return (IValueLookupParameter<PercentValue>)Parameters["RelativeNumberOfEvaluatedRows"]; } 88 } 82 89 public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter { 83 90 get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; } … … 115 122 public ILookupParameter<ResultCollection> ResultsParameter { 116 123 get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; } 124 } 125 public IValueLookupParameter<BoolValue> ApplyPruningParameter { 126 get { return (IValueLookupParameter<BoolValue>)Parameters["ApplyPruning"]; } 117 127 } 118 128 #endregion … … 176 186 } 177 187 #endregion 188 [StorableConstructor] 178 189 protected SymbolicRegressionTournamentPruning(bool deserializing) : base(deserializing) { } 179 190 public SymbolicRegressionTournamentPruning() … … 181 192 Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "A random number generator.")); 182 193 Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to prune.")); 194 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("Quality")); 183 195 Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The data analysis problem data to use for branch impact evaluation.")); 184 196 Parameters.Add(new LookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter to use for node impact evaluation")); … … 187 199 Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator that should be used to determine which branches are not relevant.")); 188 200 Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization.")); 201 Parameters.Add(new ValueLookupParameter<BoolValue>("ApplyPruning")); 189 202 Parameters.Add(new ValueLookupParameter<DoubleValue>(MaxPruningRatioParameterName, "The maximal relative size of the pruned branch.", new DoubleValue(0.5))); 190 203 Parameters.Add(new ValueLookupParameter<IntValue>(TournamentSizeParameterName, "The number of branches to compare for pruning", new IntValue(10))); … … 199 212 Parameters.Add(new LookupParameter<IntValue>(GenerationParameterName, "The current generation.")); 200 213 Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The results collection.")); 214 Parameters.Add(new ValueLookupParameter<PercentValue>("RelativeNumberOfEvaluatedRows", new PercentValue(1.0))); 201 215 } 202 216 … … 210 224 Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization.")); 211 225 } 226 if (!Parameters.ContainsKey("ApplyPruning")) { 227 Parameters.Add(new ValueLookupParameter<BoolValue>("ApplyPruning")); 228 } 229 if (!Parameters.ContainsKey("Quality")) { 230 Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("Quality")); 231 } 232 if (!Parameters.ContainsKey("RelativeNumberOfEvaluatedRows")) { 233 Parameters.Add(new ValueLookupParameter<PercentValue>("RelativeNumberOfEvaluatedRows", new PercentValue(1.0))); 234 } 235 212 236 #endregion 213 237 } … … 215 239 public override IOperation Apply() { 216 240 bool pruningCondition = 241 (ApplyPruningParameter.ActualValue.Value) && 217 242 (Generation.Value >= FirstPruningGeneration.Value) && 218 243 ((Generation.Value - FirstPruningGeneration.Value) % PruningFrequency.Value == 0); … … 222 247 double percentileEnd = PopulationPercentileEnd.Value; 223 248 // for each tree in the given percentile 224 var trees = SymbolicExpressionTree 225 .Skip((int)(n * percentileStart)) 226 .Take((int)(n * (percentileEnd - percentileStart))); 227 foreach (var tree in trees) { 228 Prune(Random, tree, Iterations.Value, TournamentSize.Value, 229 DataAnalysisProblemData, SamplesStart.Value, SamplesEnd.Value, 249 ItemArray<SymbolicExpressionTree> trees = SymbolicExpressionTree; 250 ItemArray<DoubleValue> quality = QualityParameter.ActualValue; 251 bool maximization = Maximization.Value; 252 var selectedTrees = (from index in Enumerable.Range(0, n) 253 orderby maximization ? -quality[index].Value : quality[index].Value 254 select new { Tree = trees[index], Quality = quality[index] }) 255 .Skip((int)(n * percentileStart)) 256 .Take((int)(n * (percentileEnd - percentileStart))); 257 foreach (var pair in selectedTrees) { 258 Prune(Random, pair.Tree, pair.Quality, Iterations.Value, TournamentSize.Value, 259 DataAnalysisProblemData, SamplesStart.Value, SamplesEnd.Value, RelativeNumberOfEvaluatedRowsParameters.ActualValue.Value, 230 260 SymbolicExpressionTreeInterpreter, Evaluator, Maximization.Value, 231 261 LowerEstimationLimit.Value, UpperEstimationLimit.Value, … … 236 266 } 237 267 238 public static void Prune(IRandom random, SymbolicExpressionTree tree, int iterations, int tournamentSize,239 DataAnalysisProblemData problemData, int samplesStart, int samplesEnd, 268 public static void Prune(IRandom random, SymbolicExpressionTree tree, DoubleValue quality, int iterations, int tournamentSize, 269 DataAnalysisProblemData problemData, int samplesStart, int samplesEnd, double relativeNumberOfEvaluatedRows, 240 270 ISymbolicExpressionTreeInterpreter interpreter, ISymbolicRegressionEvaluator evaluator, bool maximization, 241 271 double lowerEstimationLimit, double upperEstimationLimit, 242 272 double maxPruningRatio, double qualityGainWeight) { 243 IEnumerable<int> rows = Enumerable.Range(samplesStart, samplesEnd - samplesStart); 273 274 IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(samplesStart, samplesEnd, (int)Math.Ceiling((samplesEnd - samplesStart) * relativeNumberOfEvaluatedRows)); 244 275 int originalSize = tree.Size; 245 double originalQuality = evaluator.Evaluate(interpreter, tree,246 lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, rows);247 276 248 277 int minPrunedSize = (int)(originalSize * (1 - maxPruningRatio)); 249 250 278 // tree for branch evaluation 251 279 SymbolicExpressionTree templateTree = (SymbolicExpressionTree)tree.Clone(); … … 253 281 254 282 SymbolicExpressionTree prunedTree = tree; 283 double currentQuality = quality.Value; 255 284 for (int iteration = 0; iteration < iterations; iteration++) { 256 285 SymbolicExpressionTree iterationBestTree = prunedTree; … … 261 290 var clonedTree = (SymbolicExpressionTree)prunedTree.Clone(); 262 291 int clonedTreeSize = clonedTree.Size; 263 var prunePoints = (from node in clonedTree. IterateNodesPostfix()292 var prunePoints = (from node in clonedTree.Root.SubTrees[0].IterateNodesPostfix() 264 293 from subTree in node.SubTrees 265 294 let subTreeSize = subTree.GetSize() … … 280 309 281 310 double prunedQuality = evaluator.Evaluate(interpreter, clonedTree, 282 lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, Enumerable.Range(samplesStart, samplesEnd - samplesStart));311 lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, rows); 283 312 double prunedSize = clonedTree.Size; 284 313 // deteriation in quality: … … 289 318 // R² : newR² < origR² (deteriation) => prefer smaller deteriation 290 319 // R² : minimize: origR² / newR² 291 double qualityDeteriation = maximization ? originalQuality / prunedQuality : prunedQuality / originalQuality;320 double qualityDeteriation = maximization ? quality.Value / prunedQuality : prunedQuality / quality.Value; 292 321 // size of the pruned tree is always smaller than the size of the original tree 293 322 // same change in quality => prefer pruning operation that removes a larger tree … … 297 326 bestGain = gain; 298 327 iterationBestTree = clonedTree; 328 currentQuality = prunedQuality; 299 329 } 300 330 } … … 302 332 prunedTree = iterationBestTree; 303 333 } 334 335 quality.Value = currentQuality; 304 336 tree.Root = prunedTree.Root; 305 337 }
Note: See TracChangeset
for help on using the changeset viewer.