Changeset 4271


Ignore:
Timestamp:
08/20/10 12:45:57 (12 years ago)
Author:
gkronber
Message:

Added correlation-based overfitting detection analyzer. #1142

Location:
branches/DataAnalysis
Files:
1 added
3 edited

Legend:

Unmodified
Added
Removed
  • branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/HeuristicLab.Problems.DataAnalysis.Regression-3.3.csproj

    r4166 r4271  
    119119    <Compile Include="SupportVectorRegression\SupportVectorRegressionSolution.cs" />
    120120    <Compile Include="Symbolic\Analyzers\BestSymbolicRegressionSolutionAnalyzer.cs" />
     121    <Compile Include="Symbolic\Analyzers\OverfittingAnalyzer.cs" />
    121122    <Compile Include="Symbolic\Analyzers\FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs" />
    122123    <Compile Include="Symbolic\Analyzers\ISymbolicRegressionAnalyzer.cs" />
  • branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer.cs

    r4255 r4271  
    238238
    239239    [StorableConstructor]
    240     private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base() { }
     240    private FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer(bool deserializing) : base(deserializing) { }
    241241
    242242    [StorableHook(HookType.AfterDeserialization)]
  • branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis/3.3/Operators/CovariantParsimonyPressure.cs

    r4255 r4271  
    6060      get { return (IValueLookupParameter<IntValue>)Parameters["FirstGenerationParameter"]; }
    6161    }
    62     public IValueLookupParameter<BoolValue> AntiOverfitParameter {
    63       get { return (IValueLookupParameter<BoolValue>)Parameters["AntiOverfit"]; }
     62    public IValueLookupParameter<BoolValue> ApplyParsimonyPressureParameter {
     63      get { return (IValueLookupParameter<BoolValue>)Parameters["ApplyParsimonyPressure"]; }
    6464    }
    6565    public ILookupParameter<DataTable> ValidationQualityParameter {
    6666      get { return (ILookupParameter<DataTable>)Parameters["Validation Quality"]; }
    6767    }
    68     public ILookupParameter<DoubleValue> CurrentBestValidationQualityParameter {
    69       get { return (ILookupParameter<DoubleValue>)Parameters["Current best validation quality"]; }
    70     }
    71     public ILookupParameter<DoubleValue> BestValidationQualityParameter {
    72       get { return (ILookupParameter<DoubleValue>)Parameters["Best solution quality (validation)"]; }
    73     }
     68    //public ILookupParameter<DoubleValue> CurrentBestValidationQualityParameter {
     69    //  get { return (ILookupParameter<DoubleValue>)Parameters["Current best validation quality"]; }
     70    //}
     71    //public ILookupParameter<DoubleValue> BestValidationQualityParameter {
     72    //  get { return (ILookupParameter<DoubleValue>)Parameters["Best solution quality (validation)"]; }
     73    //}
    7474    public ILookupParameter<DoubleValue> LengthCorrelationParameter {
    7575      get { return (ILookupParameter<DoubleValue>)Parameters["Correlation(Length, AdjustedFitness)"]; }
     
    7878      get { return (ILookupParameter<DoubleValue>)Parameters["Correlation(Fitness, AdjustedFitness)"]; }
    7979    }
    80     public IValueLookupParameter<IntValue> GenerationSpanParameter {
    81       get { return (IValueLookupParameter<IntValue>)Parameters["GenerationSpan"]; }
    82     }
    83     public IValueLookupParameter<PercentValue> OverfittingLimitParameter {
    84       get { return (IValueLookupParameter<PercentValue>)Parameters["OverfittingLimit"]; }
    85     }
     80    //public IValueLookupParameter<IntValue> GenerationSpanParameter {
     81    //  get { return (IValueLookupParameter<IntValue>)Parameters["GenerationSpan"]; }
     82    //}
     83    //public IValueLookupParameter<PercentValue> OverfittingLimitParameter {
     84    //  get { return (IValueLookupParameter<PercentValue>)Parameters["OverfittingLimit"]; }
     85    //}
    8686    public IValueLookupParameter<PercentValue> ComplexityAdaptionParameter {
    8787      get { return (IValueLookupParameter<PercentValue>)Parameters["ComplexityAdaption"]; }
     
    101101      Parameters.Add(new LookupParameter<IntValue>("Generations"));
    102102      Parameters.Add(new ValueLookupParameter<IntValue>("FirstGenerationParameter", new IntValue(5)));
    103       Parameters.Add(new ValueLookupParameter<BoolValue>("AntiOverfit", new BoolValue(false)));
     103      Parameters.Add(new ValueLookupParameter<BoolValue>("ApplyParsimonyPressure"));
    104104      //Parameters.Add(new LookupParameter<DoubleValue>("Current best validation quality"));
    105105      //Parameters.Add(new LookupParameter<DoubleValue>("Best solution quality (validation)"));
    106106      Parameters.Add(new LookupParameter<DataTable>("Validation Quality"));
    107107      Parameters.Add(new LookupParameter<DataTable>("Qualities"));
    108       Parameters.Add(new ValueLookupParameter<IntValue>("GenerationSpan", new IntValue(5)));
    109       Parameters.Add(new ValueLookupParameter<PercentValue>("OverfittingLimit", new PercentValue(5)));
     108      //Parameters.Add(new ValueLookupParameter<IntValue>("GenerationSpan", new IntValue(5)));
     109      //Parameters.Add(new ValueLookupParameter<PercentValue>("OverfittingLimit", new PercentValue(5)));
    110110      Parameters.Add(new ValueLookupParameter<PercentValue>("ComplexityAdaption", new PercentValue(-5)));
    111111      Parameters.Add(new LookupParameter<DoubleValue>("Correlation(Length, AdjustedFitness)"));
     
    128128        Parameters.Add(new ValueLookupParameter<IntValue>("FirstGenerationParameter", new IntValue(5)));
    129129      }
    130       if (!Parameters.ContainsKey("AntiOverfit")) {
    131         Parameters.Add(new ValueLookupParameter<BoolValue>("AntiOverfit", new BoolValue(false)));
     130      if (!Parameters.ContainsKey("ApplyParsimonyPressure")) {
     131        Parameters.Add(new ValueLookupParameter<BoolValue>("ApplyParsimonyPressure"));
    132132      }
    133133      //if (!Parameters.ContainsKey("Current best validation quality")) {
     
    149149        Parameters.Add(new LookupParameter<DataTable>("Qualities"));
    150150      }
    151       if (!Parameters.ContainsKey("GenerationSpan")) {
    152         Parameters.Add(new ValueLookupParameter<IntValue>("GenerationSpan", new IntValue(5)));
    153       }
    154       if (!Parameters.ContainsKey("OverfittingLimit")) {
    155         Parameters.Add(new ValueLookupParameter<PercentValue>("OverfittingLimit", new PercentValue(5)));
    156       }
    157151      if (!Parameters.ContainsKey("ComplexityAdaption")) {
    158152        Parameters.Add(new ValueLookupParameter<PercentValue>("ComplexityAdaption", new PercentValue(-5)));
     
    163157      ItemArray<SymbolicExpressionTree> trees = SymbolicExpressionTreeParameter.ActualValue;
    164158      ItemArray<DoubleValue> qualities = QualityParameter.ActualValue;
    165       // always apply Parsimony pressure if anti-overfit is false
     159      // always apply Parsimony pressure if overfitting has been detected
    166160      // otherwise appliy PP only when we are currently overfitting
    167161      if (GenerationsParameter.ActualValue != null && GenerationsParameter.ActualValue.Value >= FirstGenerationParameter.ActualValue.Value &&
    168          (AntiOverfitParameter.ActualValue.Value == false || IsOverfitting())) {
     162           ApplyParsimonyPressureParameter.ActualValue.Value == true) {
    169163        var lengths = from tree in trees
    170164                      select tree.Size;
     
    241235      return base.Apply();
    242236    }
    243 
    244     private bool IsOverfitting() {
    245       bool maximization = MaximizationParameter.ActualValue.Value;
    246       DataTable trainingQualities = QualitiesParameter.ActualValue;
    247       DataTable validationQualities = ValidationQualityParameter.ActualValue;
    248       int genSpan = GenerationSpanParameter.ActualValue.Value;
    249       if (validationQualities == null || trainingQualities == null) return false;
    250       if (validationQualities.Rows["Best solution quality (validation)"].Values.Count < genSpan) return false;
    251 
    252       IEnumerable<double> bestTrainingQualities = trainingQualities.Rows["CurrentBestQuality"].Values;
    253       IEnumerable<double> bestValidationQualities = validationQualities.Rows["Current best validation quality"].Values;
    254 
    255       double trainingAvg = bestTrainingQualities.Reverse().Take(genSpan).Average();
    256       double validationAvg = bestValidationQualities.Reverse().Take(genSpan).Average();
    257 
    258       double maxPercentDiff = OverfittingLimitParameter.ActualValue.Value;
    259 
    260       double percentDiff = maximization ? trainingAvg / validationAvg - 1 : validationAvg / trainingAvg - 1;
    261       return percentDiff > maxPercentDiff;
    262     }
    263237  }
    264238}
Note: See TracChangeset for help on using the changeset viewer.