Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/11/11 15:03:46 (13 years ago)
Author:
gkronber
Message:

Merged changes from trunk to data analysis exploration branch and added fractional distance metric evaluator. #1142

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs

    r4484 r5275  
    2323using System.Linq;
    2424using HeuristicLab.Analysis;
     25using HeuristicLab.Common;
    2526using HeuristicLab.Core;
    2627using HeuristicLab.Data;
     
    3031using HeuristicLab.Parameters;
    3132using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    32 using HeuristicLab.Problems.DataAnalysis.Evaluators;
    3333using HeuristicLab.Problems.DataAnalysis.Symbolic;
    34 using System;
    3534
    3635namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
     
    4039  [Item("FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer", "An operator that analyzes the validation best scaled symbolic regression solution.")]
    4140  [StorableClass]
    42   public sealed class FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer : SingleSuccessorOperator, ISymbolicRegressionAnalyzer {
    43     private const string RandomParameterName = "Random";
    44     private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
    45     private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
    46     private const string ProblemDataParameterName = "ProblemData";
    47     private const string ValidationSamplesStartParameterName = "SamplesStart";
    48     private const string ValidationSamplesEndParameterName = "SamplesEnd";
    49     // private const string QualityParameterName = "Quality";
    50     private const string UpperEstimationLimitParameterName = "UpperEstimationLimit";
    51     private const string LowerEstimationLimitParameterName = "LowerEstimationLimit";
    52     private const string EvaluatorParameterName = "Evaluator";
     41  public sealed class FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer : SymbolicRegressionValidationAnalyzer, ISymbolicRegressionAnalyzer {
    5342    private const string MaximizationParameterName = "Maximization";
     43    private const string CalculateSolutionComplexityParameterName = "CalculateSolutionComplexity";
    5444    private const string BestSolutionParameterName = "Best solution (validation)";
    5545    private const string BestSolutionQualityParameterName = "Best solution quality (validation)";
     46    private const string BestSolutionLengthParameterName = "Best solution length (validation)";
     47    private const string BestSolutionHeightParameterName = "Best solution height (validiation)";
    5648    private const string CurrentBestValidationQualityParameterName = "Current best validation quality";
    5749    private const string BestSolutionQualityValuesParameterName = "Validation Quality";
     
    6052    private const string BestKnownQualityParameterName = "BestKnownQuality";
    6153    private const string GenerationsParameterName = "Generations";
    62     private const string RelativeNumberOfEvaluatedSamplesParameterName = "RelativeNumberOfEvaluatedSamples";
    63 
    64     private const string TrainingMeanSquaredErrorQualityParameterName = "Mean squared error (training)";
    65     private const string MinTrainingMeanSquaredErrorQualityParameterName = "Min mean squared error (training)";
    66     private const string MaxTrainingMeanSquaredErrorQualityParameterName = "Max mean squared error (training)";
    67     private const string AverageTrainingMeanSquaredErrorQualityParameterName = "Average mean squared error (training)";
    68     private const string BestTrainingMeanSquaredErrorQualityParameterName = "Best mean squared error (training)";
    69 
    70     private const string TrainingAverageRelativeErrorQualityParameterName = "Average relative error (training)";
    71     private const string MinTrainingAverageRelativeErrorQualityParameterName = "Min average relative error (training)";
    72     private const string MaxTrainingAverageRelativeErrorQualityParameterName = "Max average relative error (training)";
    73     private const string AverageTrainingAverageRelativeErrorQualityParameterName = "Average average relative error (training)";
    74     private const string BestTrainingAverageRelativeErrorQualityParameterName = "Best average relative error (training)";
    75 
    76     private const string TrainingRSquaredQualityParameterName = "R² (training)";
    77     private const string MinTrainingRSquaredQualityParameterName = "Min R² (training)";
    78     private const string MaxTrainingRSquaredQualityParameterName = "Max R² (training)";
    79     private const string AverageTrainingRSquaredQualityParameterName = "Average R² (training)";
    80     private const string BestTrainingRSquaredQualityParameterName = "Best R² (training)";
    81 
    82     private const string TestMeanSquaredErrorQualityParameterName = "Mean squared error (test)";
    83     private const string MinTestMeanSquaredErrorQualityParameterName = "Min mean squared error (test)";
    84     private const string MaxTestMeanSquaredErrorQualityParameterName = "Max mean squared error (test)";
    85     private const string AverageTestMeanSquaredErrorQualityParameterName = "Average mean squared error (test)";
    86     private const string BestTestMeanSquaredErrorQualityParameterName = "Best mean squared error (test)";
    87 
    88     private const string TestAverageRelativeErrorQualityParameterName = "Average relative error (test)";
    89     private const string MinTestAverageRelativeErrorQualityParameterName = "Min average relative error (test)";
    90     private const string MaxTestAverageRelativeErrorQualityParameterName = "Max average relative error (test)";
    91     private const string AverageTestAverageRelativeErrorQualityParameterName = "Average average relative error (test)";
    92     private const string BestTestAverageRelativeErrorQualityParameterName = "Best average relative error (test)";
    93 
    94     private const string TestRSquaredQualityParameterName = "R² (test)";
    95     private const string MinTestRSquaredQualityParameterName = "Min R² (test)";
    96     private const string MaxTestRSquaredQualityParameterName = "Max R² (test)";
    97     private const string AverageTestRSquaredQualityParameterName = "Average R² (test)";
    98     private const string BestTestRSquaredQualityParameterName = "Best R² (test)";
    99 
    100     private const string RSquaredValuesParameterName = "R²";
    101     private const string MeanSquaredErrorValuesParameterName = "Mean squared error";
    102     private const string RelativeErrorValuesParameterName = "Average relative error";
    10354
    10455    #region parameter properties
    105     public ILookupParameter<IRandom> RandomParameter {
    106       get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; }
    107     }
    108     public ScopeTreeLookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
    109       get { return (ScopeTreeLookupParameter<SymbolicExpressionTree>)Parameters[SymbolicExpressionTreeParameterName]; }
    110     }
    111     public IValueLookupParameter<ISymbolicExpressionTreeInterpreter> SymbolicExpressionTreeInterpreterParameter {
    112       get { return (IValueLookupParameter<ISymbolicExpressionTreeInterpreter>)Parameters[SymbolicExpressionTreeInterpreterParameterName]; }
    113     }
    114     public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
    115       get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; }
    116     }
    11756    public ILookupParameter<BoolValue> MaximizationParameter {
    11857      get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
    11958    }
    120     public IValueLookupParameter<DataAnalysisProblemData> ProblemDataParameter {
    121       get { return (IValueLookupParameter<DataAnalysisProblemData>)Parameters[ProblemDataParameterName]; }
    122     }
    123     public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
    124       get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
    125     }
    126     public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
    127       get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
    128     }
    129     public IValueParameter<PercentValue> RelativeNumberOfEvaluatedSamplesParameter {
    130       get { return (IValueParameter<PercentValue>)Parameters[RelativeNumberOfEvaluatedSamplesParameterName]; }
    131     }
    132 
    133     public IValueLookupParameter<DoubleValue> UpperEstimationLimitParameter {
    134       get { return (IValueLookupParameter<DoubleValue>)Parameters[UpperEstimationLimitParameterName]; }
    135     }
    136     public IValueLookupParameter<DoubleValue> LowerEstimationLimitParameter {
    137       get { return (IValueLookupParameter<DoubleValue>)Parameters[LowerEstimationLimitParameterName]; }
     59    public IValueParameter<BoolValue> CalculateSolutionComplexityParameter {
     60      get { return (IValueParameter<BoolValue>)Parameters[CalculateSolutionComplexityParameterName]; }
    13861    }
    13962    public ILookupParameter<SymbolicRegressionSolution> BestSolutionParameter {
    14063      get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters[BestSolutionParameterName]; }
    14164    }
    142     public ILookupParameter<SymbolicRegressionSolution> BestTrainingSolutionParameter {
    143       get { return (ILookupParameter<SymbolicRegressionSolution>)Parameters["BestTrainingSolution"]; }
    144     }
    145     public ScopeTreeLookupParameter<DoubleValue> QualityParameter {
    146       get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters["Quality"]; }
    147     }
    148     public ScopeTreeLookupParameter<DoubleValue> ValidationQualityParameter {
    149       get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters["ValidationQuality"]; }
    150     }
    151 
    15265    public ILookupParameter<IntValue> GenerationsParameter {
    15366      get { return (ILookupParameter<IntValue>)Parameters[GenerationsParameterName]; }
     
    15669      get { return (ILookupParameter<DoubleValue>)Parameters[BestSolutionQualityParameterName]; }
    15770    }
    158     public ILookupParameter<DataTable> BestSolutionQualityValuesParameter {
    159       get { return (ILookupParameter<DataTable>)Parameters[BestSolutionQualityValuesParameterName]; }
     71    public ILookupParameter<IntValue> BestSolutionLengthParameter {
     72      get { return (ILookupParameter<IntValue>)Parameters[BestSolutionLengthParameterName]; }
     73    }
     74    public ILookupParameter<IntValue> BestSolutionHeightParameter {
     75      get { return (ILookupParameter<IntValue>)Parameters[BestSolutionHeightParameterName]; }
    16076    }
    16177    public ILookupParameter<ResultCollection> ResultsParameter {
     
    16581      get { return (ILookupParameter<DoubleValue>)Parameters[BestKnownQualityParameterName]; }
    16682    }
    167     public ILookupParameter<DoubleValue> CurrentBestValidationQualityParameter {
    168       get { return (ILookupParameter<DoubleValue>)Parameters[CurrentBestValidationQualityParameterName]; }
    169     }
    170 
    17183    public ILookupParameter<DataTable> VariableFrequenciesParameter {
    17284      get { return (ILookupParameter<DataTable>)Parameters[VariableFrequenciesParameterName]; }
     
    17587    #endregion
    17688    #region properties
    177     public IRandom Random {
    178       get { return RandomParameter.ActualValue; }
    179     }
    180     public ItemArray<SymbolicExpressionTree> SymbolicExpressionTree {
    181       get { return SymbolicExpressionTreeParameter.ActualValue; }
    182     }
    183     public ISymbolicExpressionTreeInterpreter SymbolicExpressionTreeInterpreter {
    184       get { return SymbolicExpressionTreeInterpreterParameter.ActualValue; }
    185     }
    186     public ISymbolicRegressionEvaluator Evaluator {
    187       get { return EvaluatorParameter.ActualValue; }
    188     }
    18989    public BoolValue Maximization {
    19090      get { return MaximizationParameter.ActualValue; }
    19191    }
    192     public DataAnalysisProblemData ProblemData {
    193       get { return ProblemDataParameter.ActualValue; }
    194     }
    195     public IntValue ValidationSamplesStart {
    196       get { return ValidationSamplesStartParameter.ActualValue; }
    197     }
    198     public IntValue ValidationSamplesEnd {
    199       get { return ValidationSamplesEndParameter.ActualValue; }
    200     }
    201     public PercentValue RelativeNumberOfEvaluatedSamples {
    202       get { return RelativeNumberOfEvaluatedSamplesParameter.Value; }
    203     }
    204 
    205     public DoubleValue UpperEstimationLimit {
    206       get { return UpperEstimationLimitParameter.ActualValue; }
    207     }
    208     public DoubleValue LowerEstimationLimit {
    209       get { return LowerEstimationLimitParameter.ActualValue; }
     92    public BoolValue CalculateSolutionComplexity {
     93      get { return CalculateSolutionComplexityParameter.Value; }
     94      set { CalculateSolutionComplexityParameter.Value = value; }
    21095    }
    21196    public ResultCollection Results {
     
    221106      get { return BestSolutionQualityParameter.ActualValue; }
    222107    }
     108    public IntValue BestSolutionLength {
     109      get { return BestSolutionLengthParameter.ActualValue; }
     110      set { BestSolutionLengthParameter.ActualValue = value; }
     111    }
     112    public IntValue BestSolutionHeight {
     113      get { return BestSolutionHeightParameter.ActualValue; }
     114      set { BestSolutionHeightParameter.ActualValue = value; }
     115    }
    223116
    224117    #endregion
    225118
     119    [StorableConstructor]
     120    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base(deserializing) { }
     121    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer original, Cloner cloner) : base(original, cloner) { }
    226122    public FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer()
    227123      : base() {
    228       Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The random generator to use."));
    229       Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
    230       Parameters.Add(new ScopeTreeLookupParameter<SymbolicExpressionTree>(SymbolicExpressionTreeParameterName, "The symbolic expression trees to analyze."));
    231124      Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
    232       Parameters.Add(new ValueLookupParameter<ISymbolicExpressionTreeInterpreter>(SymbolicExpressionTreeInterpreterParameterName, "The interpreter that should be used for the analysis of symbolic expression trees."));
    233       Parameters.Add(new ValueLookupParameter<DataAnalysisProblemData>(ProblemDataParameterName, "The problem data for which the symbolic expression tree is a solution."));
    234       Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The first index of the validation partition of the data set."));
    235       Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The last index of the validation partition of the data set."));
    236       Parameters.Add(new ValueParameter<PercentValue>(RelativeNumberOfEvaluatedSamplesParameterName, "The relative number of samples of the dataset partition, which should be randomly chosen for evaluation between the start and end index.", new PercentValue(1)));
    237       Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperEstimationLimitParameterName, "The upper estimation limit that was set for the evaluation of the symbolic expression trees."));
    238       Parameters.Add(new ValueLookupParameter<DoubleValue>(LowerEstimationLimitParameterName, "The lower estimation limit that was set for the evaluation of the symbolic expression trees."));
     125      Parameters.Add(new ValueParameter<BoolValue>(CalculateSolutionComplexityParameterName, "Determines if the length and height of the validation best solution should be calculated.", new BoolValue(false)));
    239126      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestSolutionParameterName, "The best symbolic regression solution."));
    240       Parameters.Add(new LookupParameter<SymbolicRegressionSolution>("BestTrainingSolution"));
    241       Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("Quality"));
    242       Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ValidationQuality"));
    243127      Parameters.Add(new LookupParameter<IntValue>(GenerationsParameterName, "The number of generations calculated so far."));
    244128      Parameters.Add(new LookupParameter<DoubleValue>(BestSolutionQualityParameterName, "The quality of the best symbolic regression solution."));
     129      Parameters.Add(new LookupParameter<IntValue>(BestSolutionLengthParameterName, "The length of the best symbolic regression solution."));
     130      Parameters.Add(new LookupParameter<IntValue>(BestSolutionHeightParameterName, "The height of the best symbolic regression solution."));
    245131      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
    246132      Parameters.Add(new LookupParameter<DoubleValue>(BestKnownQualityParameterName, "The best known (validation) quality achieved on the data set."));
    247       Parameters.Add(new LookupParameter<DoubleValue>(CurrentBestValidationQualityParameterName, "The quality of the best solution (on the validation set) of the current generation."));
    248       Parameters.Add(new LookupParameter<DataTable>(BestSolutionQualityValuesParameterName));
    249133      Parameters.Add(new LookupParameter<DataTable>(VariableFrequenciesParameterName, "The variable frequencies table to use for the calculation of variable impacts"));
    250134    }
    251135
    252     [StorableConstructor]
    253     private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base(deserializing) { }
     136    public override IDeepCloneable Clone(Cloner cloner) {
     137      return new FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(this, cloner);
     138    }
    254139
    255140    [StorableHook(HookType.AfterDeserialization)]
    256141    private void AfterDeserialization() {
    257       #region compatibility remove before releasing 3.3.1
    258       if (!Parameters.ContainsKey(EvaluatorParameterName)) {
    259         Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>(EvaluatorParameterName, "The evaluator which should be used to evaluate the solution on the validation set."));
     142      #region compatibility remove before releasing 3.4
     143      if (!Parameters.ContainsKey("Evaluator")) {
     144        Parameters.Add(new LookupParameter<ISymbolicRegressionEvaluator>("Evaluator", "The evaluator which should be used to evaluate the solution on the validation set."));
    260145      }
    261146      if (!Parameters.ContainsKey(MaximizationParameterName)) {
    262147        Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
    263148      }
    264       if (!Parameters.ContainsKey(BestSolutionQualityValuesParameterName)) {
    265         Parameters.Add(new LookupParameter<DataTable>(BestSolutionQualityValuesParameterName));
    266       }
    267       if (!Parameters.ContainsKey("BestTrainingSolution")) {
    268         Parameters.Add(new LookupParameter<SymbolicRegressionSolution>("BestTrainingSolution"));
    269       }
    270       if (!Parameters.ContainsKey("Quality")) {
    271         Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("Quality"));
    272       }
    273       if (!Parameters.ContainsKey("ValidationQuality")) {
    274         Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ValidationQuality"));
     149      if (!Parameters.ContainsKey(CalculateSolutionComplexityParameterName)) {
     150        Parameters.Add(new ValueParameter<BoolValue>(CalculateSolutionComplexityParameterName, "Determines if the length and height of the validation best solution should be calculated.", new BoolValue(false)));
     151      }
     152      if (!Parameters.ContainsKey(BestSolutionLengthParameterName)) {
     153        Parameters.Add(new LookupParameter<IntValue>(BestSolutionLengthParameterName, "The length of the best symbolic regression solution."));
     154      }
     155      if (!Parameters.ContainsKey(BestSolutionHeightParameterName)) {
     156        Parameters.Add(new LookupParameter<IntValue>(BestSolutionHeightParameterName, "The height of the best symbolic regression solution."));
    275157      }
    276158      #endregion
    277159    }
    278160
    279     public override IOperation Apply() {
    280       ItemArray<SymbolicExpressionTree> trees = SymbolicExpressionTree;
    281       ItemArray<DoubleValue> qualities = QualityParameter.ActualValue;
    282 
    283       string targetVariable = ProblemData.TargetVariable.Value;
    284 
    285       // select a random subset of rows in the validation set
    286       int validationStart = ValidationSamplesStart.Value;
    287       int validationEnd = ValidationSamplesEnd.Value;
    288       int seed = Random.Next();
    289       int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);
    290       if (count == 0) count = 1;
    291       IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count);
    292 
    293       double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;
    294       double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;
    295 
     161    protected override void Analyze(SymbolicExpressionTree[] trees, double[] validationQuality) {
    296162      double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity;
    297163      SymbolicExpressionTree bestTree = null;
    298       SymbolicExpressionTree bestTrainingTree = trees[0];
    299       double bestTrainingQuality = qualities[0].Value;
    300       ItemArray<DoubleValue> validationQualites = new ItemArray<DoubleValue>(qualities.Length);
     164
    301165      for (int i = 0; i < trees.Length; i++) {
    302         SymbolicExpressionTree tree = trees[i];
    303         double quality = Evaluator.Evaluate(SymbolicExpressionTreeInterpreter, tree,
    304           lowerEstimationLimit, upperEstimationLimit,
    305           ProblemData.Dataset, targetVariable,
    306          rows);
    307         validationQualites[i] = new DoubleValue(quality);
     166        double quality = validationQuality[i];
    308167        if ((Maximization.Value && quality > bestQuality) ||
    309168            (!Maximization.Value && quality < bestQuality)) {
    310169          bestQuality = quality;
    311           bestTree = tree;
     170          bestTree = trees[i];
    312171        }
    313         if ((Maximization.Value && qualities[i].Value > bestTrainingQuality) ||
    314             (!Maximization.Value && qualities[i].Value < bestTrainingQuality)) {
    315           bestTrainingQuality = qualities[i].Value;
    316           bestTrainingTree = tree;
    317         }
    318       }
    319       ValidationQualityParameter.ActualValue = validationQualites;
    320 
    321       var scaledBestTrainingTree = GetScaledTree(bestTrainingTree);
    322 
    323       SymbolicRegressionSolution bestTrainingSolution = new SymbolicRegressionSolution((DataAnalysisProblemData)ProblemData.Clone(),
    324         new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(), scaledBestTrainingTree),
    325         lowerEstimationLimit, upperEstimationLimit);
    326       bestTrainingSolution.Name = "Best solution (training)";
    327       bestTrainingSolution.Description = "The solution of the population with the highest fitness";
     172      }
    328173
    329174      // if the best validation tree is better than the current best solution => update
     
    333178        (!Maximization.Value && bestQuality < BestSolutionQuality.Value);
    334179      if (newBest) {
    335         var scaledTree = GetScaledTree(bestTree);
     180        double lowerEstimationLimit = LowerEstimationLimit.Value;
     181        double upperEstimationLimit = UpperEstimationLimit.Value;
     182        string targetVariable = ProblemData.TargetVariable.Value;
     183
     184        // calculate scaling parameters and only for the best tree using the full training set
     185        double alpha, beta;
     186        SymbolicRegressionScaledMeanSquaredErrorEvaluator.Calculate(SymbolicExpressionTreeInterpreter, bestTree,
     187          lowerEstimationLimit, upperEstimationLimit,
     188          ProblemData.Dataset, targetVariable,
     189          ProblemData.TrainingIndizes, out beta, out alpha);
     190
     191        // scale tree for solution
     192        var scaledTree = SymbolicRegressionSolutionLinearScaler.Scale(bestTree, alpha, beta);
    336193        var model = new SymbolicRegressionModel((ISymbolicExpressionTreeInterpreter)SymbolicExpressionTreeInterpreter.Clone(),
    337194          scaledTree);
     
    343200        BestSolutionQualityParameter.ActualValue = new DoubleValue(bestQuality);
    344201
     202        if (CalculateSolutionComplexity.Value) {
     203          BestSolutionLength = new IntValue(solution.Model.SymbolicExpressionTree.Size);
     204          BestSolutionHeight = new IntValue(solution.Model.SymbolicExpressionTree.Height);
     205          if (!Results.ContainsKey(BestSolutionLengthParameterName)) {
     206            Results.Add(new Result(BestSolutionLengthParameterName, "Length of the best solution on the validation set", new IntValue()));
     207            Results.Add(new Result(BestSolutionHeightParameterName, "Height of the best solution on the validation set", new IntValue()));
     208          }
     209          Results[BestSolutionLengthParameterName].Value = BestSolutionLength;
     210          Results[BestSolutionHeightParameterName].Value = BestSolutionHeight;
     211        }
     212
    345213        BestSymbolicRegressionSolutionAnalyzer.UpdateBestSolutionResults(solution, ProblemData, Results, Generations, VariableFrequencies);
    346214      }
    347 
    348       CurrentBestValidationQualityParameter.ActualValue = new DoubleValue(bestQuality);
    349215
    350216      if (!Results.ContainsKey(BestSolutionQualityValuesParameterName)) {
     
    352218        Results.Add(new Result(BestSolutionQualityParameterName, new DoubleValue()));
    353219        Results.Add(new Result(CurrentBestValidationQualityParameterName, new DoubleValue()));
    354         Results.Add(new Result("Best solution (training)", bestTrainingSolution));
    355220      }
    356221      Results[BestSolutionQualityParameterName].Value = new DoubleValue(BestSolutionQualityParameter.ActualValue.Value);
    357222      Results[CurrentBestValidationQualityParameterName].Value = new DoubleValue(bestQuality);
    358       Results["Best solution (training)"].Value = bestTrainingSolution;
    359223
    360224      DataTable validationValues = (DataTable)Results[BestSolutionQualityValuesParameterName].Value;
    361225      AddValue(validationValues, BestSolutionQualityParameter.ActualValue.Value, BestSolutionQualityParameterName, BestSolutionQualityParameterName);
    362226      AddValue(validationValues, bestQuality, CurrentBestValidationQualityParameterName, CurrentBestValidationQualityParameterName);
    363 
    364       BestSolutionQualityValuesParameter.ActualValue = validationValues;
    365 
    366       return base.Apply();
    367     }
    368 
    369     private SymbolicExpressionTree GetScaledTree(SymbolicExpressionTree tree) {
    370       // calculate scaling parameters and only for the best tree using the full training set
    371       double alpha, beta;
    372       int trainingStart = ProblemData.TrainingSamplesStart.Value;
    373       int trainingEnd = ProblemData.TrainingSamplesEnd.Value;
    374       IEnumerable<int> trainingRows = Enumerable.Range(trainingStart, trainingEnd - trainingStart);
    375       IEnumerable<double> originalValues = ProblemData.Dataset.GetEnumeratedVariableValues(ProblemData.TargetVariable.Value, trainingRows);
    376       IEnumerable<double> estimatedValues = SymbolicExpressionTreeInterpreter.GetSymbolicExpressionTreeValues(tree, ProblemData.Dataset, trainingRows);
    377 
    378       SymbolicRegressionScaledMeanSquaredErrorEvaluator.CalculateScalingParameters(originalValues, estimatedValues, out beta, out alpha);
    379 
    380       // scale tree for solution
    381       return SymbolicRegressionSolutionLinearScaler.Scale(tree, alpha, beta);
    382     }
    383 
    384     [StorableHook(HookType.AfterDeserialization)]
    385     private void Initialize() { }
     227    }
    386228
    387229    private static void AddValue(DataTable table, double data, string name, string description) {
Note: See TracChangeset for help on using the changeset viewer.