Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/20/10 17:42:27 (14 years ago)
Author:
gkronber
Message:

Worked on overfitting analyzer and CPP. #1142

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs

    r4271 r4272  
    140140      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestSolutionParameterName]; }
    141141    }
     142    public ILookupParameter<SymbolicRegressionSolution> BestTrainingSolutionParameter {
     143      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters["BestTrainingSolution"]; }
     144    }
     145    public ScopeTreeLookupParameter<DoubleValue> QualityParameter {
     146      get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters["Quality"]; }
     147    }
     148
    142149    public ILookupParameter<IntValue> GenerationsParameter {
    143150      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
     
    228235      Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
    229236      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestSolutionParameterName, "The best symbolic regression solution."));
     237      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>("BestTrainingSolution"));
     238      Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("Quality"));
    230239      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
    231240      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
     
    252261        Parameters.Add(new LookupParameter<DataTable>(BestSolutionQualityValuesParameterName));
    253262      }
     263      if (!Parameters.ContainsKey("BestTrainingSolution")) {
     264        Parameters.Add(new LookupParameter<SymbolicRegressionSolution>("BestTrainingSolution"));
     265      }
     266      if (!Parameters.ContainsKey("Quality")) {
     267        Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("Quality"));
     268      }
    254269      #endregion
    255270    }
    256271
    257272    public override IOperation Apply() {
    258       var trees = SymbolicExpressionTree;
     273      ItemArray<SymbolicExpressionTree> trees = SymbolicExpressionTree;
     274      ItemArray<DoubleValue> qualities = QualityParameter.ActualValue;
    259275
    260276      string targetVariable = ProblemData.TargetVariable.Value;
     
    273289      double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity;
    274290      SymbolicExpressionTree bestTree = null;
    275 
    276       foreach (var tree in trees) {
     291      SymbolicExpressionTree bestTrainingTree = trees[0];
     292      double bestTrainingQuality = qualities[0].Value;
     293      for (int i = 0; i < trees.Length; i++) {
     294        SymbolicExpressionTree tree = trees[i];
    277295        double quality = Evaluator.Evaluate(SymbolicExpressionTreeInterpreter, tree,
    278296          lowerEstimationLimit, upperEstimationLimit,
     
    285303          bestTree = tree;
    286304        }
    287       }
     305        if ((Maximization.Value && qualities[i].Value > bestTrainingQuality) ||
     306            (!Maximization.Value && qualities[i].Value < bestTrainingQuality)) {
     307          bestTrainingQuality = qualities[i].Value;
     308          bestTrainingTree = tree;
     309        }
     310      }
     311
     312      var scaledBestTrainingTree = GetScaledTree(bestTrainingTree);
     313
     314      SymbolicRegressionSolution bestTrainingSolution = new SymbolicRegressionSolution(ProblemData,
     315        new SymbolicRegressionModel(SymbolicExpressionTreeInterpreter, scaledBestTrainingTree),
     316        lowerEstimationLimit, upperEstimationLimit);
     317      bestTrainingSolution.Name = "Best solution (training)";
     318      bestTrainingSolution.Description = "The solution of the population with the highest fitness";
    288319
    289320      // if the best validation tree is better than the current best solution => update
     
    293324        (!Maximization.Value && bestQuality < BestSolutionQuality.Value);
    294325      if (newBest) {
    295         // calculate scaling parameters and only for the best tree using the full training set
    296         double alpha, beta;
    297         int trainingStart = ProblemData.TrainingSamplesStart.Value;
    298         int trainingEnd = ProblemData.TrainingSamplesEnd.Value;
    299         IEnumerable<int> trainingRows = Enumerable.Range(trainingStart, trainingEnd - trainingStart);
    300         IEnumerable<double> originalValues = ProblemData.Dataset.GetEnumeratedVariableValues(targetVariable, trainingRows);
    301         IEnumerable<double> estimatedValues = SymbolicExpressionTreeInterpreter.GetSymbolicExpressionTreeValues(bestTree, ProblemData.Dataset, trainingRows);
    302 
    303         SymbolicRegressionScaledMeanSquaredErrorEvaluator.CalculateScalingParameters(originalValues, estimatedValues, out beta, out alpha);
    304 
    305         // scale tree for solution
    306         var scaledTree = SymbolicRegressionSolutionLinearScaler.Scale(bestTree, alpha, beta);
     326        var scaledTree = GetScaledTree(bestTree);
    307327        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(),
    308328          scaledTree);
     
    323343        Results.Add(new Result(BestSolutionQualityParameterName, new DoubleValue()));
    324344        Results.Add(new Result(CurrentBestValidationQualityParameterName, new DoubleValue()));
     345        Results.Add(new Result("Best solution (training)", bestTrainingSolution));
    325346      }
    326347      Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
    327348      Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestQuality);
     349      Results["Best solution (training)"].Value = bestTrainingSolution;
    328350
    329351      DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value;
     
    332354
    333355      BestSolutionQualityValuesParameter.ActualValue = validationValues;
    334      
     356
    335357      return base.Apply();
     358    }
     359
     360    private SymbolicExpressionTree GetScaledTree(SymbolicExpressionTree tree) {
     361      // calculate scaling parameters and only for the best tree using the full training set
     362      double alpha, beta;
     363      int trainingStart = ProblemData.TrainingSamplesStart.Value;
     364      int trainingEnd = ProblemData.TrainingSamplesEnd.Value;
     365      IEnumerable<int> trainingRows = Enumerable.Range(trainingStart, trainingEnd - trainingStart);
     366      IEnumerable<double> originalValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable.Value, trainingRows);
     367      IEnumerable<double> estimatedValues = SymbolicExpressionTreeInterpreter.GetSymbolicExpressionTreeValues(tree, ProblemData.Dataset, trainingRows);
     368
     369      SymbolicRegressionScaledMeanSquaredErrorEvaluator.CalculateScalingParameters(originalValues, estimatedValues, out beta, out alpha);
     370
     371      // scale tree for solution
     372      return SymbolicRegressionSolutionLinearScaler.Scale(tree, alpha, beta);
    336373    }
    337374
Note: See TracChangeset for help on using the changeset viewer.