Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/23/10 18:54:35 (14 years ago)
Author:
gkronber
Message:

Added output parameter for validation quality to validation analyzer, added input parameter for validation quality to overfitting analyzer, and fixed bugs in pruning operator. #1142

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionTournamentPruning.cs

    r4195 r4297  
    3131using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
    3232using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     33using System;
    3334
    3435namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
     
    6263      get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
    6364    }
     65    public ScopeTreeLookupParameter<DoubleValue> QualityParameter {
     66      get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters["Quality"]; }
     67    }
    6468    public ILookupParameter<DataAnalysisProblemData> DataAnalysisProblemDataParameter {
    6569      get { return (ILookupParameter<DataAnalysisProblemData>)Parameters[DataAnalysisProblemDataParameterName]; }
     
    8084      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesEndParameterName]; }
    8185    }
     86    public IValueLookupParameter<PercentValue> RelativeNumberOfEvaluatedRowsParameters {
     87      get { return (IValueLookupParameter<PercentValue>)Parameters["RelativeNumberOfEvaluatedRows"]; }
     88    }
    8289    public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
    8390      get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; }
     
    115122    public ILookupParameter<ResultCollection> ResultsParameter {
    116123      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
     124    }
     125    public IValueLookupParameter<BoolValue> ApplyPruningParameter {
     126      get { return (IValueLookupParameter<BoolValue>)Parameters["ApplyPruning"]; }
    117127    }
    118128    #endregion
     
    176186    }
    177187    #endregion
     188    [StorableConstructor]
    178189    protected SymbolicRegressionTournamentPruning(bool deserializing) : base(deserializing) { }
    179190    public SymbolicRegressionTournamentPruning()
     
    181192      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "A random number generator."));
    182193      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to prune."));
     194      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("Quality"));
    183195      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The data analysis problem data to use for branch impact evaluation."));
    184196      Parameters.Add(new LookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter to use for node impact evaluation"));
     
    187199      Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator that should be used to determine which branches are not relevant."));
    188200      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
     201      Parameters.Add(new ValueLookupParameter<BoolValue>("ApplyPruning"));
    189202      Parameters.Add(new ValueLookupParameter<DoubleValue>(MaxPruningRatioParameterName, "The maximal relative size of the pruned branch.", new DoubleValue(0.5)));
    190203      Parameters.Add(new ValueLookupParameter<IntValue>(TournamentSizeParameterName, "The number of branches to compare for pruning", new IntValue(10)));
     
    199212      Parameters.Add(new LookupParameter<IntValue>(GenerationParameterName, "The current generation."));
    200213      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The results collection."));
     214      Parameters.Add(new ValueLookupParameter<PercentValue>("RelativeNumberOfEvaluatedRows", new PercentValue(1.0)));
    201215    }
    202216
     
    210224        Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
    211225      }
     226      if (!Parameters.ContainsKey("ApplyPruning")) {
     227        Parameters.Add(new ValueLookupParameter<BoolValue>("ApplyPruning"));
     228      }
     229      if (!Parameters.ContainsKey("Quality")) {
     230        Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("Quality"));
     231      }
     232      if (!Parameters.ContainsKey("RelativeNumberOfEvaluatedRows")) {
     233        Parameters.Add(new ValueLookupParameter<PercentValue>("RelativeNumberOfEvaluatedRows", new PercentValue(1.0)));
     234      }
     235
    212236      #endregion
    213237    }
     
    215239    public override IOperation Apply() {
    216240      bool pruningCondition =
     241        (ApplyPruningParameter.ActualValue.Value) &&
    217242        (Generation.Value >= FirstPruningGeneration.Value) &&
    218243        ((Generation.Value - FirstPruningGeneration.Value) % PruningFrequency.Value == 0);
     
    222247        double percentileEnd = PopulationPercentileEnd.Value;
    223248        // for each tree in the given percentile
    224         var trees = SymbolicExpressionTree
    225           .Skip((int)(n * percentileStart))
    226           .Take((int)(n * (percentileEnd - percentileStart)));
    227         foreach (var tree in trees) {
    228           Prune(Random, tree, Iterations.Value, TournamentSize.Value,
    229             DataAnalysisProblemData, SamplesStart.Value, SamplesEnd.Value,
     249        ItemArray<SymbolicExpressionTree> trees = SymbolicExpressionTree;
     250        ItemArray<DoubleValue> quality = QualityParameter.ActualValue;
     251        bool maximization = Maximization.Value;
     252        var selectedTrees = (from index in Enumerable.Range(0, n)
     253                             orderby maximization ? -quality[index].Value : quality[index].Value
     254                             select new { Tree = trees[index], Quality = quality[index] })
     255                                                            .Skip((int)(n * percentileStart))
     256                                                            .Take((int)(n * (percentileEnd - percentileStart)));
     257        foreach (var pair in selectedTrees) {
     258          Prune(Random, pair.Tree, pair.Quality, Iterations.Value, TournamentSize.Value,
     259            DataAnalysisProblemData, SamplesStart.Value, SamplesEnd.Value, RelativeNumberOfEvaluatedRowsParameters.ActualValue.Value,
    230260            SymbolicExpressionTreeInterpreter, Evaluator, Maximization.Value,
    231261            LowerEstimationLimit.Value, UpperEstimationLimit.Value,
     
    236266    }
    237267
    238     public static void Prune(IRandom random, SymbolicExpressionTree tree, int iterations, int tournamentSize,
    239       DataAnalysisProblemData problemData, int samplesStart, int samplesEnd,
     268    public static void Prune(IRandom random, SymbolicExpressionTree tree, DoubleValue quality, int iterations, int tournamentSize,
     269      DataAnalysisProblemData problemData, int samplesStart, int samplesEnd, double relativeNumberOfEvaluatedRows,
    240270      ISymbolicExpressionTreeInterpreter interpreter, ISymbolicRegressionEvaluator evaluator, bool maximization,
    241271      double lowerEstimationLimit, double upperEstimationLimit,
    242272      double maxPruningRatio, double qualityGainWeight) {
    243       IEnumerable<int> rows = Enumerable.Range(samplesStart, samplesEnd - samplesStart);
     273
     274      IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(samplesStart, samplesEnd, (int)Math.Ceiling((samplesEnd - samplesStart) * relativeNumberOfEvaluatedRows));
    244275      int originalSize = tree.Size;
    245       double originalQuality = evaluator.Evaluate(interpreter, tree,
    246         lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, rows);
    247276
    248277      int minPrunedSize = (int)(originalSize * (1 - maxPruningRatio));
    249 
    250278      // tree for branch evaluation
    251279      SymbolicExpressionTree templateTree = (SymbolicExpressionTree)tree.Clone();
     
    253281
    254282      SymbolicExpressionTree prunedTree = tree;
     283      double currentQuality = quality.Value;
    255284      for (int iteration = 0; iteration < iterations; iteration++) {
    256285        SymbolicExpressionTree iterationBestTree = prunedTree;
     
    261290          var clonedTree = (SymbolicExpressionTree)prunedTree.Clone();
    262291          int clonedTreeSize = clonedTree.Size;
    263           var prunePoints = (from node in clonedTree.IterateNodesPostfix()
     292          var prunePoints = (from node in clonedTree.Root.SubTrees[0].IterateNodesPostfix()
    264293                             from subTree in node.SubTrees
    265294                             let subTreeSize = subTree.GetSize()
     
    280309
    281310            double prunedQuality = evaluator.Evaluate(interpreter, clonedTree,
    282         lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, Enumerable.Range(samplesStart, samplesEnd - samplesStart));
     311        lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, rows);
    283312            double prunedSize = clonedTree.Size;
    284313            // deteriation in quality:
     
    289318            //      R²  : newR² < origR²   (deteriation) => prefer smaller deteriation
    290319            //      R²  : minimize: origR² / newR²
    291             double qualityDeteriation = maximization ? originalQuality / prunedQuality : prunedQuality / originalQuality;
     320            double qualityDeteriation = maximization ? quality.Value / prunedQuality : prunedQuality / quality.Value;
    292321            // size of the pruned tree is always smaller than the size of the original tree
    293322            // same change in quality => prefer pruning operation that removes a larger tree
     
    297326              bestGain = gain;
    298327              iterationBestTree = clonedTree;
     328              currentQuality = prunedQuality;
    299329            }
    300330          }
     
    302332        prunedTree = iterationBestTree;
    303333      }
     334
     335      quality.Value = currentQuality;
    304336      tree.Root = prunedTree.Root;
    305337    }
Note: See TracChangeset for help on using the changeset viewer.