Changeset 4191


Ignore:
Timestamp:
08/11/10 12:00:53 (9 years ago)
Author:
gkronber
Message:

Changed validation best solution analyzer and tournament pruning operator to use the evaluator specified in the problem parameters. #1117

Location:
trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs

    r4127 r4191  
    4646    private const string ValidationSamplesStartParameterName = "SamplesStart";
    4747    private const string ValidationSamplesEndParameterName = "SamplesEnd";
    48     private const string QualityParameterName = "Quality";
     48    // private const string QualityParameterName = "Quality";
    4949    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
    5050    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
     51    private const string EvaluatorParameterName = "Evaluator";
     52    private const string MaximizationParameterName = "Maximization";
    5153    private const string BestSolutionParameterName = "Best solution (validation)";
    5254    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
     
    109111      get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
    110112    }
     113    public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
     114      get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; }
     115    }
     116    public ILookupParameter<BoolValue> MaximizationParameter {
     117      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
     118    }
    111119    public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {
    112120      get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
     
    158166      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
    159167    }
     168    public ISymbolicRegressionEvaluator Evaluator {
     169      get { return EvaluatorParameter.ActualValue; }
     170    }
     171    public BoolValue Maximization {
     172      get { return MaximizationParameter.ActualValue; }
     173    }
    160174    public DataAnalysisProblemData ProblemData {
    161175      get { return ProblemDataParameter.ActualValue; }
     
    185199    public IntValue Generations {
    186200      get { return GenerationsParameter.ActualValue; }
     201    }
     202    public DoubleValue BestSolutionQuality {
     203      get { return BestSolutionQualityParameter.ActualValue; }
    187204    }
    188205
     
    192209      : base() {
    193210      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use."));
     211      Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
    194212      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
    195       Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The quality of the symbolic expression trees to analyze."));
     213      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
    196214      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
    197215      Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
     
    212230    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base() { }
    213231
     232    [StorableHook(HookType.AfterDeserialization)]
     233    private void AfterDeserialization() {
     234      #region compatibility remove before releasing 3.3.1
     235      if (!Parameters.ContainsKey(EvaluatorParameterName)) {
     236        Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
     237      }
     238      if (!Parameters.ContainsKey(MaximizationParameterName)) {
     239        Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
     240      }
     241      #endregion
     242    }
     243
    214244    public override IOperation Apply() {
    215245      var trees = SymbolicExpressionTree;
     
    228258      double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;
    229259
    230       double bestValidationRSquared = -1.0;
     260      double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity;
    231261      SymbolicExpressionTree bestTree = null;
    232262
    233263      foreach (var tree in trees) {
    234         double validationRSquared = SymbolicRegressionPearsonsRSquaredEvaluator.Calculate(SymbolicExpressionTreeInterpreter, tree,
     264        double quality = Evaluator.Evaluate(SymbolicExpressionTreeInterpreter, tree,
    235265          lowerEstimationLimit, upperEstimationLimit,
    236266          ProblemData.Dataset, targetVariable,
    237267         rows);
    238268
    239         if (validationRSquared > bestValidationRSquared) {
    240           bestValidationRSquared = validationRSquared;
     269        if ((Maximization.Value && quality > bestQuality) ||
     270            (!Maximization.Value && quality < bestQuality)) {
     271          bestQuality = quality;
    241272          bestTree = tree;
    242273        }
    243274      }
    244275
    245 
    246276      // if the best validation tree is better than the current best solution => update
    247       if (BestSolutionQualityParameter.ActualValue == null || BestSolutionQualityParameter.ActualValue.Value < bestValidationRSquared) {
    248         // calculate scaling parameters and validation MSE only for the best tree
    249         // scale tree for solution
     277      bool newBest =
     278        BestSolutionQuality == null ||
     279        (Maximization.Value && bestQuality > BestSolutionQuality.Value) ||
     280        (!Maximization.Value && bestQuality < BestSolutionQuality.Value);
     281      if (newBest) {
     282        // calculate scaling parameters and only for the best tree using the full training set
    250283        double alpha, beta;
    251         double validationMSE = SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree,
     284        int trainingStart = ProblemData.TrainingSamplesStart.Value;
     285        int trainingEnd = ProblemData.TrainingSamplesEnd.Value;
     286        IEnumerable<int> trainingRows = Enumerable.Range(trainingStart, trainingEnd - trainingStart);
     287        SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree,
    252288          lowerEstimationLimit, upperEstimationLimit,
    253289          ProblemData.Dataset, targetVariable,
    254           rows, out beta, out alpha);
    255 
     290          trainingRows, out beta, out alpha);
     291
     292        // scale tree for solution
    256293        var scaledTree = SymbolicRegressionSolutionLinearScaler.Scale(bestTree, alpha, beta);
    257294        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(),
     
    262299
    263300        BestSolutionParameter.ActualValue = solution;
    264         BestSolutionQualityParameter.ActualValue = new DoubleValue(bestValidationRSquared);
     301        BestSolutionQualityParameter.ActualValue = new DoubleValue(bestQuality);
    265302
    266303        BestSymbolicRegressionSolutionAnalyzer.UpdateBestSolutionResults(solution, ProblemData, Results, Generations, VariableFrequencies);
    267304      }
     305
    268306
    269307      if (!Results.ContainsKey(BestSolutionQualityValuesParameterName)) {
     
    273311      }
    274312      Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
    275       Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestValidationRSquared);
     313      Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestQuality);
    276314
    277315      DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value;
    278316      AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName);
    279       AddValue(validationValues, bestValidationRSquared, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
     317      AddValue(validationValues, bestQuality, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
    280318      return base.Apply();
    281319    }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionTournamentPruning.cs

    r4068 r4191  
    3030using HeuristicLab.Problems.DataAnalysis.Symbolic;
    3131using HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols;
     32using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3233
    3334namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
     
    3839    private const string SamplesStartParameterName = "SamplesStart";
    3940    private const string SamplesEndParameterName = "SamplesEnd";
     41    private const string EvaluatorParameterName = "Evaluator";
     42    private const string MaximizationParameterName = "Maximization";
    4043    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
    4144    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
     
    7780      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesEndParameterName]; }
    7881    }
     82    public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
     83      get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; }
     84    }
     85    public ILookupParameter<BoolValue> MaximizationParameter {
     86      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
     87    }
    7988    public IValueLookupParameter<DoubleValue> MaxPruningRatioParameter {
    8089      get { return (IValueLookupParameter<DoubleValue>)Parameters[MaxPruningRatioParameterName]; }
     
    133142      get { return SamplesEndParameter.ActualValue; }
    134143    }
     144    public ISymbolicRegressionEvaluator Evaluator {
     145      get { return EvaluatorParameter.ActualValue; }
     146    }
     147    public BoolValue Maximization {
     148      get { return MaximizationParameter.ActualValue; }
     149    }
    135150    public DoubleValue MaxPruningRatio {
    136151      get { return MaxPruningRatioParameter.ActualValue; }
     
    161176    }
    162177    #endregion
     178    protected SymbolicRegressionTournamentPruning(bool deserializing) : base(deserializing) { }
    163179    public SymbolicRegressionTournamentPruning()
    164180      : base() {
     
    169185      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesStartParameterName, "The first row index of the dataset partition to use for branch impact evaluation."));
    170186      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesEndParameterName, "The last row index of the dataset partition to use for branch impact evaluation."));
     187      Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator that should be used to determine which branches are not relevant."));
     188      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
    171189      Parameters.Add(new ValueLookupParameter<DoubleValue>(MaxPruningRatioParameterName, "The maximal relative size of the pruned branch.", new DoubleValue(0.5)));
    172190      Parameters.Add(new ValueLookupParameter<IntValue>(TournamentSizeParameterName, "The number of branches to compare for pruning", new IntValue(10)));
     
    181199      Parameters.Add(new LookupParameter<IntValue>(GenerationParameterName, "The current generation."));
    182200      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The results collection."));
     201    }
     202
     203    [StorableHook(HookType.AfterDeserialization)]
     204    private void AfterDeserialization() {
     205      #region compatibility remove before releasing 3.3.1
     206      if (!Parameters.ContainsKey(EvaluatorParameterName)) {
     207        Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
     208      }
     209      if (!Parameters.ContainsKey(MaximizationParameterName)) {
     210        Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
     211      }
     212      #endregion
    183213    }
    184214
     
    198228          Prune(Random, tree, Iterations.Value, TournamentSize.Value,
    199229            DataAnalysisProblemData, SamplesStart.Value, SamplesEnd.Value,
    200             SymbolicExpressionTreeInterpreter,
     230            SymbolicExpressionTreeInterpreter, Evaluator, Maximization.Value,
    201231            LowerEstimationLimit.Value, UpperEstimationLimit.Value,
    202232            MaxPruningRatio.Value, QualityGainWeight.Value);
     
    208238    public static void Prune(IRandom random, SymbolicExpressionTree tree, int iterations, int tournamentSize,
    209239      DataAnalysisProblemData problemData, int samplesStart, int samplesEnd,
    210       ISymbolicExpressionTreeInterpreter interpreter,
     240      ISymbolicExpressionTreeInterpreter interpreter, ISymbolicRegressionEvaluator evaluator, bool maximization,
    211241      double lowerEstimationLimit, double upperEstimationLimit,
    212242      double maxPruningRatio, double qualityGainWeight) {
    213243      IEnumerable<int> rows = Enumerable.Range(samplesStart, samplesEnd - samplesStart);
    214244      int originalSize = tree.Size;
    215       double originalMse = SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(interpreter, tree,
    216         lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, Enumerable.Range(samplesStart, samplesEnd - samplesStart));
     245      double originalQuality = evaluator.Evaluate(interpreter, tree,
     246        lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, rows);
    217247
    218248      int minPrunedSize = (int)(originalSize * (1 - maxPruningRatio));
     
    249279            selectedPrunePoint.Parent.InsertSubTree(selectedPrunePoint.SubTreeIndex, constNode);
    250280
    251             double prunedMse = SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(interpreter, clonedTree,
     281            double prunedQuality = evaluator.Evaluate(interpreter, clonedTree,
    252282        lowerEstimationLimit, upperEstimationLimit, problemData.Dataset, problemData.TargetVariable.Value, Enumerable.Range(samplesStart, samplesEnd - samplesStart));
    253283            double prunedSize = clonedTree.Size;
    254             // MSE of the pruned tree is larger than the original tree in most cases
     284            // deteriation in quality:
     285            // exp: MSE : newMse < origMse (improvement) => prefer the larger improvement
     286            //      MSE : newMse > origMse (deteriation) => prefer the smaller deteriation
     287            //      MSE : minimize: newMse / origMse
     288            //      R²  : newR² > origR²   (improvment) => prefer the larger improvment
     289            //      R²  : newR² < origR²   (deteriation) => prefer smaller deteriation
     290            //      R²  : minimize: origR² / newR²
     291            double qualityDeteriation = maximization ? originalQuality / prunedQuality : prunedQuality / originalQuality;
    255292            // size of the pruned tree is always smaller than the size of the original tree
    256293            // same change in quality => prefer pruning operation that removes a larger tree
    257             double gain = ((prunedMse / originalMse) * qualityGainWeight) /
     294            double gain = (qualityDeteriation * qualityGainWeight) /
    258295                           (originalSize / prunedSize);
    259296            if (gain < bestGain) {
Note: See TracChangeset for help on using the changeset viewer.