Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/24/10 19:25:11 (14 years ago)
Author:
gkronber
Message:

Exploring overfitting countermeasures. #1142

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/OverfittingAnalyzer.cs

    r4297 r4309  
    218218      ItemArray<DoubleValue> validationQualities = ValidationQualityParameter.ActualValue;
    219219
    220       string targetVariable = ProblemData.TargetVariable.Value;
    221 
    222       // select a random subset of rows in the validation set
    223       int validationStart = ValidiationSamplesStart.Value;
    224       int validationEnd = ValidationSamplesEnd.Value;
    225       int seed = Random.Next();
    226       int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);
    227       if (count == 0) count = 1;
    228       IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count);
    229 
    230       double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;
    231       double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;
     220      //string targetVariable = ProblemData.TargetVariable.Value;
     221
     222      //// select a random subset of rows in the validation set
     223      //int validationStart = ValidiationSamplesStart.Value;
     224      //int validationEnd = ValidationSamplesEnd.Value;
     225      //int seed = Random.Next();
     226      //int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);
     227      //if (count == 0) count = 1;
     228      //IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count);
     229
     230      //double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;
     231      //double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;
    232232
    233233      //double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity;
     
    250250      //if (RelativeValidationQualityParameter.ActualValue == null) {
    251251      // first call initialize the relative quality using the difference between average training and validation quality
    252       double avgTrainingQuality = qualities.Select(x => x.Value).Median();
    253       double avgValidationQuality = validationQualities.Select(x => x.Value).Median();
     252      double avgTrainingQuality = qualities.Select(x => x.Value).Average();
     253      double avgValidationQuality = validationQualities.Select(x => x.Value).Average();
    254254
    255255      if (Maximization.Value)
     
    284284      bool overfitting =
    285285        avgTrainingQuality > InitialTrainingQualityParameter.ActualValue.Value &&  // better on training than in initial generation
     286        // RelativeValidationQualityParameter.ActualValue.Value < 0.0 && // validation quality is worse than training quality
    286287        r < CorrelationLimitParameter.ActualValue.Value;  // low correlation between training and validation quality
    287288
    288       //// if validation quality is within a certain margin of percentage deviation (default -5% .. 5%) then there is no overfitting
    289       //// correlation is also bad when underfitting but validation quality cannot be a lot larger than training quality if overfitting
    290       //(RelativeValidationQualityParameter.ActualValue.Value > RelativeValidationQualityUpperLimitParameter.ActualValue.Value || // better on training than on validation
    291       // RelativeValidationQualityParameter.ActualValue.Value < RelativeValidationQualityLowerLimitParameter.ActualValue.Value); // better on training than on validation
    292289
    293290      OverfittingParameter.ActualValue = new BoolValue(overfitting);
Note: See TracChangeset for help on using the changeset viewer.