Changeset 4309


Ignore:
Timestamp:
08/24/10 19:25:11 (12 years ago)
Author:
gkronber
Message:

Exploring overfitting countermeasures. #1142

Location:
branches/DataAnalysis
Files:
1 added
3 edited

Legend:

Unmodified
Added
Removed
  • branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/OverfittingAnalyzer.cs

    r4297 r4309  
    218218      ItemArray<DoubleValue> validationQualities = ValidationQualityParameter.ActualValue;
    219219
    220       string targetVariable = ProblemData.TargetVariable.Value;
    221 
    222       // select a random subset of rows in the validation set
    223       int validationStart = ValidiationSamplesStart.Value;
    224       int validationEnd = ValidationSamplesEnd.Value;
    225       int seed = Random.Next();
    226       int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);
    227       if (count == 0) count = 1;
    228       IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count);
    229 
    230       double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;
    231       double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;
     220      //string targetVariable = ProblemData.TargetVariable.Value;
     221
     222      //// select a random subset of rows in the validation set
     223      //int validationStart = ValidiationSamplesStart.Value;
     224      //int validationEnd = ValidationSamplesEnd.Value;
     225      //int seed = Random.Next();
     226      //int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);
     227      //if (count == 0) count = 1;
     228      //IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count);
     229
     230      //double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;
     231      //double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;
    232232
    233233      //double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity;
     
    250250      //if (RelativeValidationQualityParameter.ActualValue == null) {
    251251      // first call initialize the relative quality using the difference between average training and validation quality
    252       double avgTrainingQuality = qualities.Select(x => x.Value).Median();
    253       double avgValidationQuality = validationQualities.Select(x => x.Value).Median();
     252      double avgTrainingQuality = qualities.Select(x => x.Value).Average();
     253      double avgValidationQuality = validationQualities.Select(x => x.Value).Average();
    254254
    255255      if (Maximization.Value)
     
    284284      bool overfitting =
    285285        avgTrainingQuality > InitialTrainingQualityParameter.ActualValue.Value &&  // better on training than in initial generation
     286        // RelativeValidationQualityParameter.ActualValue.Value < 0.0 && // validation quality is worse than training quality
    286287        r < CorrelationLimitParameter.ActualValue.Value;  // low correlation between training and validation quality
    287288
    288       //// if validation quality is within a certain margin of percentage deviation (default -5% .. 5%) then there is no overfitting
    289       //// correlation is also bad when underfitting but validation quality cannot be a lot larger than training quality if overfitting
    290       //(RelativeValidationQualityParameter.ActualValue.Value > RelativeValidationQualityUpperLimitParameter.ActualValue.Value || // better on training than on validation
    291       // RelativeValidationQualityParameter.ActualValue.Value < RelativeValidationQualityLowerLimitParameter.ActualValue.Value); // better on training than on validation
    292289
    293290      OverfittingParameter.ActualValue = new BoolValue(overfitting);
  • branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis/3.3/HeuristicLab.Problems.DataAnalysis-3.3.csproj

    r4255 r4309  
    133133    <Compile Include="Interfaces\IOnlineEvaluator.cs" />
    134134    <Compile Include="MatrixExtensions.cs" />
     135    <Compile Include="Operators\CovariantParsimonyPressureAdder.cs" />
    135136    <Compile Include="Operators\CovariantParsimonyPressure.cs" />
    136137    <Compile Include="Operators\DynamicDepthLimitInitializer.cs" />
  • branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis/3.3/Operators/CovariantParsimonyPressure.cs

    r4272 r4309  
    4747      get { return (IScopeTreeLookupParameter<DoubleValue>)Parameters["AdjustedQuality"]; }
    4848    }
    49 
    5049    public ILookupParameter<BoolValue> MaximizationParameter {
    5150      get { return (ILookupParameter<BoolValue>)Parameters["Maximization"]; }
     
    5453      get { return (IValueLookupParameter<DoubleValue>)Parameters["K"]; }
    5554    }
     55    public ILookupParameter<DoubleValue> CParameter {
     56      get { return (ILookupParameter<DoubleValue>)Parameters["C"]; }
     57    }
    5658    public ILookupParameter<IntValue> GenerationsParameter {
    5759      get { return (ILookupParameter<IntValue>)Parameters["Generations"]; }
     
    6365      get { return (IValueLookupParameter<BoolValue>)Parameters["ApplyParsimonyPressure"]; }
    6466    }
    65     public ILookupParameter<DataTable> ValidationQualityParameter {
    66       get { return (ILookupParameter<DataTable>)Parameters["Validation Quality"]; }
    67     }
    68     //public ILookupParameter<DoubleValue> CurrentBestValidationQualityParameter {
    69     //  get { return (ILookupParameter<DoubleValue>)Parameters["Current best validation quality"]; }
    70     //}
    71     //public ILookupParameter<DoubleValue> BestValidationQualityParameter {
    72     //  get { return (ILookupParameter<DoubleValue>)Parameters["Best solution quality (validation)"]; }
    73     //}
    7467    public ILookupParameter<DoubleValue> LengthCorrelationParameter {
    7568      get { return (ILookupParameter<DoubleValue>)Parameters["Correlation(Length, AdjustedFitness)"]; }
     
    7871      get { return (ILookupParameter<DoubleValue>)Parameters["Correlation(Fitness, AdjustedFitness)"]; }
    7972    }
    80     //public IValueLookupParameter<IntValue> GenerationSpanParameter {
    81     //  get { return (IValueLookupParameter<IntValue>)Parameters["GenerationSpan"]; }
    82     //}
    83     //public IValueLookupParameter<PercentValue> OverfittingLimitParameter {
    84     //  get { return (IValueLookupParameter<PercentValue>)Parameters["OverfittingLimit"]; }
    85     //}
    8673    public IValueLookupParameter<PercentValue> ComplexityAdaptionParameter {
    8774      get { return (IValueLookupParameter<PercentValue>)Parameters["ComplexityAdaption"]; }
    88     }
    89     public ILookupParameter<DataTable> QualitiesParameter {
    90       get { return (ILookupParameter<DataTable>)Parameters["Qualities"]; }
    9175    }
    9276    public IValueLookupParameter<DoubleValue> MinAverageSizeParameter {
     
    10387      Parameters.Add(new ValueLookupParameter<DoubleValue>("K", new DoubleValue(1.0)));
    10488      Parameters.Add(new LookupParameter<IntValue>("Generations"));
    105       Parameters.Add(new ValueLookupParameter<IntValue>("FirstGenerationParameter", new IntValue(5)));
     89      Parameters.Add(new ValueLookupParameter<IntValue>("FirstGenerationParameter", new IntValue(1)));
    10690      Parameters.Add(new ValueLookupParameter<BoolValue>("ApplyParsimonyPressure"));
    107       //Parameters.Add(new LookupParameter<DoubleValue>("Current best validation quality"));
    108       //Parameters.Add(new LookupParameter<DoubleValue>("Best solution quality (validation)"));
    109       Parameters.Add(new LookupParameter<DataTable>("Validation Quality"));
    110       Parameters.Add(new LookupParameter<DataTable>("Qualities"));
    111       //Parameters.Add(new ValueLookupParameter<IntValue>("GenerationSpan", new IntValue(5)));
    112       //Parameters.Add(new ValueLookupParameter<PercentValue>("OverfittingLimit", new PercentValue(5)));
    113       Parameters.Add(new ValueLookupParameter<PercentValue>("ComplexityAdaption", new PercentValue(-5)));
     91      Parameters.Add(new ValueLookupParameter<PercentValue>("ComplexityAdaption", new PercentValue(-0.01)));
    11492      Parameters.Add(new LookupParameter<DoubleValue>("Correlation(Length, AdjustedFitness)"));
    11593      Parameters.Add(new LookupParameter<DoubleValue>("Correlation(Fitness, AdjustedFitness)"));
    11694      Parameters.Add(new ValueLookupParameter<DoubleValue>("MinAverageSize", new DoubleValue(15)));
     95      Parameters.Add(new LookupParameter<DoubleValue>("C"));
    11796    }
    11897
     
    130109      }
    131110      if (!Parameters.ContainsKey("FirstGenerationParameter")) {
    132         Parameters.Add(new ValueLookupParameter<IntValue>("FirstGenerationParameter", new IntValue(5)));
     111        Parameters.Add(new ValueLookupParameter<IntValue>("FirstGenerationParameter", new IntValue(1)));
    133112      }
    134113      if (!Parameters.ContainsKey("ApplyParsimonyPressure")) {
    135114        Parameters.Add(new ValueLookupParameter<BoolValue>("ApplyParsimonyPressure"));
    136115      }
    137       //if (!Parameters.ContainsKey("Current best validation quality")) {
    138       //  Parameters.Add(new LookupParameter<DoubleValue>("Current best validation quality"));
    139       //}
    140       //if (!Parameters.ContainsKey("Best solution quality (validation)")) {
    141       //  Parameters.Add(new LookupParameter<DoubleValue>("Best solution quality (validation)"));
    142       //}
    143       if (!Parameters.ContainsKey("Correlation(Length, AdjustedFitness)")) {
    144         Parameters.Add(new LookupParameter<DoubleValue>("Correlation(Length, AdjustedFitness)"));
    145       }
    146       if (!Parameters.ContainsKey("Correlation(Fitness, AdjustedFitness)")) {
    147         Parameters.Add(new LookupParameter<DoubleValue>("Correlation(Fitness, AdjustedFitness)"));
    148       }
    149       if (!Parameters.ContainsKey("Validation Quality")) {
    150         Parameters.Add(new LookupParameter<DataTable>("Validation Quality"));
    151       }
    152       if (!Parameters.ContainsKey("Qualities")) {
    153         Parameters.Add(new LookupParameter<DataTable>("Qualities"));
    154       }
    155116      if (!Parameters.ContainsKey("ComplexityAdaption")) {
    156         Parameters.Add(new ValueLookupParameter<PercentValue>("ComplexityAdaption", new PercentValue(-5)));
     117        Parameters.Add(new ValueLookupParameter<PercentValue>("ComplexityAdaption", new PercentValue(-0.01)));
    157118      }
    158119      if (!Parameters.ContainsKey("MinAverageSize")) {
    159120        Parameters.Add(new ValueLookupParameter<DoubleValue>("MinAverageSize", new DoubleValue(15)));
     121      }
     122      if (!Parameters.ContainsKey("C")) {
     123        Parameters.Add(new LookupParameter<DoubleValue>("C"));
    160124      }
    161125    }
     
    194158
    195159        double sizeAdaption = lengthMeanCalculator.Mean * ComplexityAdaptionParameter.ActualValue.Value;
    196         if (sizeAdaption < 0) sizeAdaption = Math.Floor(sizeAdaption);
    197         else sizeAdaption = Math.Ceiling(sizeAdaption);
    198         double g = lengthMeanCalculator.Mean + sizeAdaption;
    199         if (g < MinAverageSizeParameter.ActualValue.Value)
    200           g = MinAverageSizeParameter.ActualValue.Value;
     160        if (lengthMeanCalculator.Mean + sizeAdaption < MinAverageSizeParameter.ActualValue.Value)
     161          sizeAdaption = 0.0;
    201162
    202163        //            cov(l, f) - (g(t+1) - mu(t)) avgF
    203164        // c(t) =  --------------------------------------------
    204165        //           cov(l, l^k) - (g(t+1) - mu(t)) E[l^k]
    205         double c = lengthFitnessCovEvaluator.Covariance - (g - lengthMeanCalculator.Mean) * fitnessMeanCalculator.Mean;
    206         c /= lengthAdjLengthCovEvaluator.Covariance - (g - lengthMeanCalculator.Mean) * adjLengthMeanCalculator.Mean;
     166        double c = lengthFitnessCovEvaluator.Covariance - sizeAdaption * fitnessMeanCalculator.Mean;
     167        c /= lengthAdjLengthCovEvaluator.Covariance - sizeAdaption * adjLengthMeanCalculator.Mean;
     168
     169        CParameter.ActualValue = new DoubleValue(c);
    207170
    208171        // adjust fitness
     
    230193
    231194      } else {
     195        CParameter.ActualValue = new DoubleValue(0.0);
    232196        // adjusted fitness is equal to fitness
    233197        AdjustedQualityParameter.ActualValue = (ItemArray<DoubleValue>)QualityParameter.ActualValue.Clone();
Note: See TracChangeset for help on using the changeset viewer.