Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
03/18/11 10:01:00 (14 years ago)
Author:
gkronber
Message:

#1418 renamed bounded evaluator, added base classes for single objective and multi objective validation analzers, added overfitting analyzers for symbolic regression and classification.

Location:
branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4
Files:
7 edited
1 copied
1 moved

Legend:

Unmodified
Added
Removed
  • branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification-3.4.csproj

    r5717 r5747  
    110110    <Compile Include="Interfaces\ISymbolicDiscriminantFunctionClassificationModel.cs" />
    111111    <Compile Include="MultiObjective\SymbolicClassificationMultiObjectiveValidationBestSolutionAnalyzer.cs" />
     112    <Compile Include="SingleObjective\SymbolicClassificationSingleObjectiveBoundedMeanSquaredErrorEvaluator.cs" />
     113    <Compile Include="SingleObjective\SymbolicClassificationSingleObjectiveOverfittingAnalyzer.cs" />
    112114    <Compile Include="SingleObjective\SymbolicClassificationSingleObjectiveValidationBestSolutionAnalyzer.cs" />
    113115    <Compile Include="SymbolicDiscriminantFunctionClassificationModel.cs" />
     
    119121    <Compile Include="MultiObjective\SymbolicClassificationMultiObjectiveProblem.cs" />
    120122    <Compile Include="SingleObjective\SymbolicClassificationSingleObjectiveEvaluator.cs" />
    121     <Compile Include="SingleObjective\SymbolicClassificationSingleObjectiveBoundeMeanSquaredErrorEvaluator.cs" />
    122123    <Compile Include="SingleObjective\SymbolicClassificationSingleObjectiveMeanSquaredErrorEvaluator.cs" />
    123124    <Compile Include="SingleObjective\SymbolicClassificationSingleObjectivePearsonRSquaredEvaluator.cs" />
  • branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/MultiObjective/SymbolicClassificationMultiObjectiveMeanSquaredErrorTreeSizeEvaluator.cs

    r5722 r5747  
    2626using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    2727using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     28using System;
    2829
    2930namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Classification {
  • branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/MultiObjective/SymbolicClassificationMultiObjectivePearsonRSquaredTreeSizeEvaluator.cs

    r5722 r5747  
    55using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    66using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     7using System;
    78
    89namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Classification {
     
    3334      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
    3435      IEnumerable<double> originalValues = problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable, rows);
    35       double r2 = OnlinePearsonsRSquaredEvaluator.Calculate(originalValues, estimatedValues);
    36       return new double[2] { r2, solution.Length };
     36      try {
     37        double r2 = OnlinePearsonsRSquaredEvaluator.Calculate(originalValues, estimatedValues);
     38        return new double[2] { r2, solution.Length };
     39      }
     40      catch (ArgumentException) {
     41        // if R² cannot be calcualted because of infinity or NaN values => return worst possible fitness value
     42        return new double[2] { 0.0, solution.Length };
     43      }
    3744    }
    3845
  • branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/MultiObjective/SymbolicClassificationMultiObjectiveTrainingBestSolutionAnalyzer.cs

    r5736 r5747  
    3838  [StorableClass]
    3939  public sealed class SymbolicClassificationMultiObjectiveTrainingBestSolutionAnalyzer : SymbolicDataAnalysisMultiObjectiveTrainingBestSolutionAnalyzer<ISymbolicClassificationSolution>,
    40     ISymbolicDataAnalysisInterpreterOperator {
     40    ISymbolicDataAnalysisInterpreterOperator, ISymbolicDataAnalysisBoundedOperator {
    4141    private const string ProblemDataParameterName = "ProblemData";
    4242    private const string SymbolicDataAnalysisTreeInterpreterParameterName = "SymbolicDataAnalysisTreeInterpreter";
  • branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectiveBoundedMeanSquaredErrorEvaluator.cs

    r5735 r5747  
    3232  [Item("Bounded Mean squared error Evaluator", "Calculates the bounded mean squared error of a symbolic classification solution (estimations above or below the class values are only penaltilized linearly.")]
    3333  [StorableClass]
    34   public class SymbolicClassificationSingleObjectiveBoundeMeanSquaredErrorEvaluator : SymbolicClassificationSingleObjectiveEvaluator {
     34  public class SymbolicClassificationSingleObjectiveBoundedMeanSquaredErrorEvaluator : SymbolicClassificationSingleObjectiveEvaluator {
    3535
    3636    [StorableConstructor]
    37     protected SymbolicClassificationSingleObjectiveBoundeMeanSquaredErrorEvaluator(bool deserializing) : base(deserializing) { }
    38     protected SymbolicClassificationSingleObjectiveBoundeMeanSquaredErrorEvaluator(SymbolicClassificationSingleObjectiveBoundeMeanSquaredErrorEvaluator original, Cloner cloner) : base(original, cloner) { }
     37    protected SymbolicClassificationSingleObjectiveBoundedMeanSquaredErrorEvaluator(bool deserializing) : base(deserializing) { }
     38    protected SymbolicClassificationSingleObjectiveBoundedMeanSquaredErrorEvaluator(SymbolicClassificationSingleObjectiveBoundedMeanSquaredErrorEvaluator original, Cloner cloner) : base(original, cloner) { }
    3939    public override IDeepCloneable Clone(Cloner cloner) {
    40       return new SymbolicClassificationSingleObjectiveBoundeMeanSquaredErrorEvaluator(this, cloner);
     40      return new SymbolicClassificationSingleObjectiveBoundedMeanSquaredErrorEvaluator(this, cloner);
    4141    }
    4242
    43     public SymbolicClassificationSingleObjectiveBoundeMeanSquaredErrorEvaluator() : base() { }
     43    public SymbolicClassificationSingleObjectiveBoundedMeanSquaredErrorEvaluator() : base() { }
    4444
    4545    public override bool Maximization { get { return false; } }
  • branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectiveMeanSquaredErrorEvaluator.cs

    r5722 r5747  
    2626using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    2727using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     28using System;
    2829
    2930namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Classification {
     
    6263      LowerEstimationLimitParameter.ExecutionContext = context;
    6364      UpperEstimationLimitParameter.ExecutionContext = context;
    64      
     65
    6566      double mse = Calculate(SymbolicDataAnalysisTreeInterpreter, tree, LowerEstimationLimit.Value, UpperEstimationLimit.Value, problemData, rows);
    6667
  • branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectiveOverfittingAnalyzer.cs

    r5735 r5747  
    3131using HeuristicLab.Parameters;
    3232using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    33 using HeuristicLab.Problems.DataAnalysis.Evaluators;
    3433using HeuristicLab.Problems.DataAnalysis.Symbolic;
    3534using System;
    3635
    37 namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic.Analyzers {
    38   [Item("SymbolicRegressionOverfittingAnalyzer", "Calculates and tracks correlation of training and validation fitness of symbolic regression models.")]
     36namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Classification {
     37  [Item("SymbolicClassificationSingleObjectiveOverfittingAnalyzer", "Calculates and tracks correlation of training and validation fitness of symbolic classification models.")]
    3938  [StorableClass]
    40   public sealed class SymbolicRegressionOverfittingAnalyzer : SymbolicRegressionValidationAnalyzer, ISymbolicRegressionAnalyzer {
    41     private const string MaximizationParameterName = "Maximization";
    42     private const string QualityParameterName = "Quality";
    43     private const string TrainingValidationCorrelationParameterName = "TrainingValidationCorrelation";
    44     private const string TrainingValidationCorrelationTableParameterName = "TrainingValidationCorrelationTable";
     39  public sealed class SymbolicClassificationSingleObjectiveOverfittingAnalyzer : SymbolicDataAnalysisSingleObjectiveValidationAnalyzer<ISymbolicClassificationSingleObjectiveEvaluator, IClassificationProblemData> {
     40    private const string TrainingValidationCorrelationParameterName = "Training and validation fitness correlation";
     41    private const string TrainingValidationCorrelationTableParameterName = "Training and validation fitness correlation table";
    4542    private const string LowerCorrelationThresholdParameterName = "LowerCorrelationThreshold";
    4643    private const string UpperCorrelationThresholdParameterName = "UpperCorrelationThreshold";
    4744    private const string OverfittingParameterName = "IsOverfitting";
    48     private const string ResultsParameterName = "Results";
    4945
    5046    #region parameter properties
    51     public ScopeTreeLookupParameter<DoubleValue> QualityParameter {
    52       get { return (ScopeTreeLookupParameter<DoubleValue>)Parameters[QualityParameterName]; }
    53     }
    54     public ILookupParameter<BoolValue> MaximizationParameter {
    55       get { return (ILookupParameter<BoolValue>)Parameters[MaximizationParameterName]; }
    56     }
    5747    public ILookupParameter<DoubleValue> TrainingValidationQualityCorrelationParameter {
    5848      get { return (ILookupParameter<DoubleValue>)Parameters[TrainingValidationCorrelationParameterName]; }
     
    7060      get { return (ILookupParameter<BoolValue>)Parameters[OverfittingParameterName]; }
    7161    }
    72     public ILookupParameter<ResultCollection> ResultsParameter {
    73       get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
    74     }
    75     #endregion
    76     #region properties
    77     public BoolValue Maximization {
    78       get { return MaximizationParameter.ActualValue; }
    79     }
    8062    #endregion
    8163
    8264    [StorableConstructor]
    83     private SymbolicRegressionOverfittingAnalyzer(bool deserializing) : base(deserializing) { }
    84     private SymbolicRegressionOverfittingAnalyzer(SymbolicRegressionOverfittingAnalyzer original, Cloner cloner) : base(original, cloner) { }
    85     public SymbolicRegressionOverfittingAnalyzer()
     65    private SymbolicClassificationSingleObjectiveOverfittingAnalyzer(bool deserializing) : base(deserializing) { }
     66    private SymbolicClassificationSingleObjectiveOverfittingAnalyzer(SymbolicClassificationSingleObjectiveOverfittingAnalyzer original, Cloner cloner) : base(original, cloner) { }
     67    public SymbolicClassificationSingleObjectiveOverfittingAnalyzer()
    8668      : base() {
    87       Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>(QualityParameterName, "Training fitness"));
    88       Parameters.Add(new LookupParameter<BoolValue>(MaximizationParameterName, "The direction of optimization."));
    8969      Parameters.Add(new LookupParameter<DoubleValue>(TrainingValidationCorrelationParameterName, "Correlation of training and validation fitnesses"));
    9070      Parameters.Add(new LookupParameter<DataTable>(TrainingValidationCorrelationTableParameterName, "Data table of training and validation fitness correlation values over the whole run."));
     
    9272      Parameters.Add(new ValueLookupParameter<DoubleValue>(UpperCorrelationThresholdParameterName, "Upper threshold for correlation value that marks the boundary from overfitting to non-overfitting.", new DoubleValue(0.75)));
    9373      Parameters.Add(new LookupParameter<BoolValue>(OverfittingParameterName, "Boolean indicator for overfitting."));
    94       Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The results collection."));
    95     }
    96 
    97     [StorableHook(HookType.AfterDeserialization)]
    98     private void AfterDeserialization() {
    9974    }
    10075
    10176    public override IDeepCloneable Clone(Cloner cloner) {
    102       return new SymbolicRegressionOverfittingAnalyzer(this, cloner);
     77      return new SymbolicClassificationSingleObjectiveOverfittingAnalyzer(this, cloner);
    10378    }
    10479
    105     protected override void Analyze(SymbolicExpressionTree[] trees, double[] validationQuality) {
     80    public override IOperation Apply() {
    10681      double[] trainingQuality = QualityParameter.ActualValue.Select(x => x.Value).ToArray();
    107 
     82      // evaluate on validation partition
     83      int start = ValidationSamplesStart.Value;
     84      int end = ValidationSamplesEnd.Value;
     85      var rows = Enumerable.Range(start, end - start);
     86      IExecutionContext childContext = (IExecutionContext)ExecutionContext.CreateChildOperation(Evaluator);
     87      double[] validationQuality = (from tree in SymbolicExpressionTrees
     88                                    select Evaluator.Evaluate(childContext, tree, ProblemData, rows))
     89                                   .ToArray();
    10890      double r = alglib.spearmancorr2(trainingQuality, validationQuality);
    10991
     
    11193
    11294      if (TrainingValidationQualityCorrelationTableParameter.ActualValue == null) {
    113         var dataTable = new DataTable("Training and validation fitness correlation table", "Data table of training and validation fitness correlation values over the whole run.");
    114         dataTable.Rows.Add(new DataRow("Training and validation fitness correlation", "Training and validation fitness correlation values"));
     95        var dataTable = new DataTable(TrainingValidationQualityCorrelationTableParameter.Name, TrainingValidationQualityCorrelationTableParameter.Description);
     96        dataTable.Rows.Add(new DataRow(TrainingValidationQualityCorrelationParameter.Name, TrainingValidationQualityCorrelationParameter.Description));
    11597        TrainingValidationQualityCorrelationTableParameter.ActualValue = dataTable;
    116         ResultsParameter.ActualValue.Add(new Result(TrainingValidationCorrelationTableParameterName, dataTable));
     98        ResultCollectionParameter.ActualValue.Add(new Result(TrainingValidationQualityCorrelationTableParameter.Name, dataTable));
    11799      }
    118100
    119       TrainingValidationQualityCorrelationTableParameter.ActualValue.Rows["Training and validation fitness correlation"].Values.Add(r);
     101      TrainingValidationQualityCorrelationTableParameter.ActualValue.Rows[TrainingValidationQualityCorrelationParameter.Name].Values.Add(r);
    120102
    121103      if (OverfittingParameter.ActualValue != null && OverfittingParameter.ActualValue.Value) {
     
    128110        OverfittingParameter.ActualValue = new BoolValue(r < LowerCorrelationThresholdParameter.ActualValue.Value);
    129111      }
     112
     113      return base.Apply();
    130114    }
    131115  }
  • branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectivePearsonRSquaredEvaluator.cs

    r5722 r5747  
    2626using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    2727using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     28using System;
    2829
    2930namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Classification {
     
    5455      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
    5556      IEnumerable<double> originalValues = problemData.Dataset.GetEnumeratedVariableValues(problemData.TargetVariable, rows);
    56       return OnlinePearsonsRSquaredEvaluator.Calculate(originalValues, estimatedValues);
     57      try {
     58        return OnlinePearsonsRSquaredEvaluator.Calculate(originalValues, estimatedValues);
     59      }
     60      catch (ArgumentException) {
     61        // if R² cannot be calculated because of NaN or ininity elements => return worst possible fitness valuse
     62        return 0.0;
     63      }
    5764    }
    5865
  • branches/DataAnalysis Refactoring/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SingleObjective/SymbolicClassificationSingleObjectiveProblem.cs

    r5733 r5747  
    7676      Operators.Add(new SymbolicClassificationSingleObjectiveTrainingBestSolutionAnalyzer());
    7777      Operators.Add(new SymbolicClassificationSingleObjectiveValidationBestSolutionAnalyzer());
     78      Operators.Add(new SymbolicClassificationSingleObjectiveOverfittingAnalyzer());
    7879      ParameterizeOperators();
    7980    }
Note: See TracChangeset for help on using the changeset viewer.