Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/11/10 12:00:53 (12 years ago)
Author:
gkronber
Message:

Changed validation best solution analyzer and tournament pruning operator to use the evaluator specified in the problem parameters. #1117

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionTournamentPruning.cs

    r4068 r4191  
    3030using HeuristicLab.Problems.DataAnalysis.Symbolic;
    3131using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
     32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3233
    3334namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
     
    3839    private const string SamplesStartParameterName = "SamplesStart";
    3940    private const string SamplesEndParameterName = "SamplesEnd";
     41    private const string EvaluatorParameterName = "Evaluator";
     42    private const string MaximizationParameterName = "Maximization";
    4043    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
    4144    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
     
    7780      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesEndParameterName]; }
    7881    }
     82    public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
     83      get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; }
     84    }
     85    public ILookupParameter<BoolValue> MaximizationParameter {
     86      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
     87    }
    7988    public IValueLookupParameter<DoubleValue> MaxPruningRatioParameter {
    8089      get { return (IValueLookupParameter<DoubleValue>)Parameters[MaxPruningRatioParameterName]; }
     
    133142      get { return SamplesEndParameter.ActualValue; }
    134143    }
     144    public ISymbolicRegressionEvaluator Evaluator {
     145      get { return EvaluatorParameter.ActualValue; }
     146    }
     147    public BoolValue Maximization {
     148      get { return MaximizationParameter.ActualValue; }
     149    }
    135150    public DoubleValue MaxPruningRatio {
    136151      get { return MaxPruningRatioParameter.ActualValue; }
     
    161176    }
    162177    #endregion
     178    protected SymbolicRegressionTournamentPruning(bool deserializing) : base(deserializing) { }
    163179    public SymbolicRegressionTournamentPruning()
    164180      : base() {
     
    169185      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesStartParameterName, "The first row index of the dataset partition to use for branch impact evaluation."));
    170186      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesEndParameterName, "The last row index of the dataset partition to use for branch impact evaluation."));
     187      Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator that should be used to determine which branches are not relevant."));
     188      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
    171189      Parameters.Add(new ValueLookupParameter<DoubleValue>(MaxPruningRatioParameterName, "The maximal relative size of the pruned branch.", new DoubleValue(0.5)));
    172190      Parameters.Add(new ValueLookupParameter<IntValue>(TournamentSizeParameterName, "The number of branches to compare for pruning", new IntValue(10)));
     
    181199      Parameters.Add(new LookupParameter<IntValue>(GenerationParameterName, "The current generation."));
    182200      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The results collection."));
     201    }
     202
     203    [StorableHook(HookType.AfterDeserialization)]
     204    private void AfterDeserialization() {
     205      #region compatibility remove before releasing 3.3.1
     206      if (!Parameters.ContainsKey(EvaluatorParameterName)) {
     207        Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
     208      }
     209      if (!Parameters.ContainsKey(MaximizationParameterName)) {
     210        Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
     211      }
     212      #endregion
    183213    }
    184214
     
    198228          Prune(Random, tree, Iterations.Value, TournamentSize.Value,
    199229            DataAnalysisProblemData, SamplesStart.Value, SamplesEnd.Value,
    200             SymbolicExpressionTreeInterpreter,
     230            SymbolicExpressionTreeInterpreter, Evaluator, Maximization.Value,
    201231            LowerEstimationLimit.Value, UpperEstimationLimit.Value,
    202232            MaxPruningRatio.Value, QualityGainWeight.Value);
     
    208238    public static void Prune(IRandom random, SymbolicExpressionTree tree, int iterations, int tournamentSize,
    209239      DataAnalysisProblemData problemData, int samplesStart, int samplesEnd,
    210       ISymbolicExpressionTreeInterpreter interpreter,
     240      ISymbolicExpressionTreeInterpreter interpreter, ISymbolicRegressionEvaluator evaluator, bool maximization,
    211241      double lowerEstimationLimit, double upperEstimationLimit,
    212242      double maxPruningRatio, double qualityGainWeight) {
    213243      IEnumerable<int> rows = Enumerable.Range(samplesStart, samplesEnd - samplesStart);
    214244      int originalSize = tree.Size;
    215       double originalMse = SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(interpreter, tree,
    216         lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, Enumerable.Range(samplesStart, samplesEnd - samplesStart));
     245      double originalQuality = evaluator.Evaluate(interpreter, tree,
     246        lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, rows);
    217247
    218248      int minPrunedSize = (int)(originalSize * (1 - maxPruningRatio));
     
    249279            selectedPrunePoint.Parent.InsertSubTree(selectedPrunePoint.SubTreeIndex, constNode);
    250280
    251             double prunedMse = SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(interpreter, clonedTree,
     281            double prunedQuality = evaluator.Evaluate(interpreter, clonedTree,
    252282        lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, Enumerable.Range(samplesStart, samplesEnd - samplesStart));
    253283            double prunedSize = clonedTree.Size;
    254             // MSE of the pruned tree is larger than the original tree in most cases
     284            // deteriation in quality:
     285            // exp: MSE : newMse < origMse (improvement) => prefer the larger improvement
     286            //      MSE : newMse > origMse (deteriation) => prefer the smaller deteriation
     287            //      MSE : minimize: newMse / origMse
     288            //      R²  : newR² > origR²   (improvment) => prefer the larger improvment
     289            //      R²  : newR² < origR²   (deteriation) => prefer smaller deteriation
     290            //      R²  : minimize: origR² / newR²
     291            double qualityDeteriation = maximization ? originalQuality / prunedQuality : prunedQuality / originalQuality;
    255292            // size of the pruned tree is always smaller than the size of the original tree
    256293            // same change in quality => prefer pruning operation that removes a larger tree
    257             double gain = ((prunedMse / originalMse) * qualityGainWeight) /
     294            double gain = (qualityDeteriation * qualityGainWeight) /
    258295                           (originalSize / prunedSize);
    259296            if (gain < bestGain) {
Note: See TracChangeset for help on using the changeset viewer.