Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/11/10 12:00:53 (12 years ago)
Author:
gkronber
Message:

Changed validation best solution analyzer and tournament pruning operator to use the evaluator specified in the problem parameters. #1117

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs

    r4127 r4191  
    4646    private const string ValidationSamplesStartParameterName = "SamplesStart";
    4747    private const string ValidationSamplesEndParameterName = "SamplesEnd";
    48     private const string QualityParameterName = "Quality";
     48    // private const string QualityParameterName = "Quality";
    4949    private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
    5050    private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
     51    private const string EvaluatorParameterName = "Evaluator";
     52    private const string MaximizationParameterName = "Maximization";
    5153    private const string BestSolutionParameterName = "Best solution (validation)";
    5254    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
     
    109111      get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
    110112    }
     113    public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
     114      get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; }
     115    }
     116    public ILookupParameter<BoolValue> MaximizationParameter {
     117      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
     118    }
    111119    public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {
    112120      get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
     
    158166      get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
    159167    }
     168    public ISymbolicRegressionEvaluator Evaluator {
     169      get { return EvaluatorParameter.ActualValue; }
     170    }
     171    public BoolValue Maximization {
     172      get { return MaximizationParameter.ActualValue; }
     173    }
    160174    public DataAnalysisProblemData ProblemData {
    161175      get { return ProblemDataParameter.ActualValue; }
     
    185199    public IntValue Generations {
    186200      get { return GenerationsParameter.ActualValue; }
     201    }
     202    public DoubleValue BestSolutionQuality {
     203      get { return BestSolutionQualityParameter.ActualValue; }
    187204    }
    188205
     
    192209      : base() {
    193210      Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use."));
     211      Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
    194212      Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
    195       Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "The quality of the symbolic expression trees to analyze."));
     213      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
    196214      Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
    197215      Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
     
    212230    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base() { }
    213231
     232    [StorableHook(HookType.AfterDeserialization)]
     233    private void AfterDeserialization() {
     234      #region compatibility remove before releasing 3.3.1
     235      if (!Parameters.ContainsKey(EvaluatorParameterName)) {
     236        Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
     237      }
     238      if (!Parameters.ContainsKey(MaximizationParameterName)) {
     239        Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
     240      }
     241      #endregion
     242    }
     243
    214244    public override IOperation Apply() {
    215245      var trees = SymbolicExpressionTree;
     
    228258      double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;
    229259
    230       double bestValidationRSquared = -1.0;
     260      double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity;
    231261      SymbolicExpressionTree bestTree = null;
    232262
    233263      foreach (var tree in trees) {
    234         double validationRSquared = SymbolicRegressionPearsonsRSquaredEvaluator.Calculate(SymbolicExpressionTreeInterpreter, tree,
     264        double quality = Evaluator.Evaluate(SymbolicExpressionTreeInterpreter, tree,
    235265          lowerEstimationLimit, upperEstimationLimit,
    236266          ProblemData.Dataset, targetVariable,
    237267         rows);
    238268
    239         if (validationRSquared > bestValidationRSquared) {
    240           bestValidationRSquared = validationRSquared;
     269        if ((Maximization.Value && quality > bestQuality) ||
     270            (!Maximization.Value && quality < bestQuality)) {
     271          bestQuality = quality;
    241272          bestTree = tree;
    242273        }
    243274      }
    244275
    245 
    246276      // if the best validation tree is better than the current best solution => update
    247       if (BestSolutionQualityParameter.ActualValue == null || BestSolutionQualityParameter.ActualValue.Value < bestValidationRSquared) {
    248         // calculate scaling parameters and validation MSE only for the best tree
    249         // scale tree for solution
     277      bool newBest =
     278        BestSolutionQuality == null ||
     279        (Maximization.Value && bestQuality > BestSolutionQuality.Value) ||
     280        (!Maximization.Value && bestQuality < BestSolutionQuality.Value);
     281      if (newBest) {
     282        // calculate scaling parameters and only for the best tree using the full training set
    250283        double alpha, beta;
    251         double validationMSE = SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree,
     284        int trainingStart = ProblemData.TrainingSamplesStart.Value;
     285        int trainingEnd = ProblemData.TrainingSamplesEnd.Value;
     286        IEnumerable<int> trainingRows = Enumerable.Range(trainingStart, trainingEnd - trainingStart);
     287        SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree,
    252288          lowerEstimationLimit, upperEstimationLimit,
    253289          ProblemData.Dataset, targetVariable,
    254           rows, out beta, out alpha);
    255 
     290          trainingRows, out beta, out alpha);
     291
     292        // scale tree for solution
    256293        var scaledTree = SymbolicRegressionSolutionLinearScaler.Scale(bestTree, alpha, beta);
    257294        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(),
     
    262299
    263300        BestSolutionParameter.ActualValue = solution;
    264         BestSolutionQualityParameter.ActualValue = new DoubleValue(bestValidationRSquared);
     301        BestSolutionQualityParameter.ActualValue = new DoubleValue(bestQuality);
    265302
    266303        BestSymbolicRegressionSolutionAnalyzer.UpdateBestSolutionResults(solution, ProblemData, Results, Generations, VariableFrequencies);
    267304      }
     305
    268306
    269307      if (!Results.ContainsKey(BestSolutionQualityValuesParameterName)) {
     
    273311      }
    274312      Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
    275       Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestValidationRSquared);
     313      Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestQuality);
    276314
    277315      DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value;
    278316      AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName);
    279       AddValue(validationValues, bestValidationRSquared, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
     317      AddValue(validationValues, bestQuality, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
    280318      return base.Apply();
    281319    }
Note: See TracChangeset for help on using the changeset viewer.