Changeset 4326


Ignore:
Timestamp:
08/26/10 12:43:02 (12 years ago)
Author:
gkronber
Message:

Changed OverfittingAnalyzer to make overfitting boundaries more fuzzy through upper and lower limits for correlations instead of a hard limit. #1142

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/OverfittingAnalyzer.cs

    r4309 r4326  
    104104      get { return (ILookupParameter<DoubleValue>)Parameters["TrainingValidationCorrelation"]; }
    105105    }
    106     public IValueLookupParameter<DoubleValue> CorrelationLimitParameter {
    107       get { return (IValueLookupParameter<DoubleValue>)Parameters["CorrelationLimit"]; }
     106    public IValueLookupParameter<DoubleValue> LowerCorrelationLimitParameter {
     107      get { return (IValueLookupParameter<DoubleValue>)Parameters["LowerCorrelationLimit"]; }
     108    }
     109    public IValueLookupParameter<DoubleValue> UpperCorrelationLimitParameter {
     110      get { return (IValueLookupParameter<DoubleValue>)Parameters["UpperCorrelationLimit"]; }
    108111    }
    109112    public ILookupParameter<BoolValue> OverfittingParameter {
     
    179182      //Parameters.Add(new ValueLookupParameter<PercentValue>("RelativeValidationQualityLowerLimit", new PercentValue(-0.05)));
    180183      Parameters.Add(new LookupParameter<DoubleValue>("TrainingValidationCorrelation"));
    181       Parameters.Add(new ValueLookupParameter<DoubleValue>("CorrelationLimit", new DoubleValue(0.65)));
     184      Parameters.Add(new ValueLookupParameter<DoubleValue>("LowerCorrelationLimit", new DoubleValue(0.65)));
     185      Parameters.Add(new ValueLookupParameter<DoubleValue>("UpperCorrelationLimit", new DoubleValue(0.75)));
    182186      Parameters.Add(new LookupParameter<BoolValue>("Overfitting"));
    183187      Parameters.Add(new LookupParameter<ResultCollection>("Results"));
     
    211215        Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ValidationQuality"));
    212216      }
     217      if (!Parameters.ContainsKey("LowerCorrelationLimit")) {
     218        Parameters.Add(new ValueLookupParameter<DoubleValue>("LowerCorrelationLimit", new DoubleValue(0.65)));
     219      }
     220      if (!Parameters.ContainsKey("UpperCorrelationLimit")) {
     221        Parameters.Add(new ValueLookupParameter<DoubleValue>("UpperCorrelationLimit", new DoubleValue(0.75)));
     222      }
     223
    213224    }
    214225
     
    218229      ItemArray<DoubleValue> validationQualities = ValidationQualityParameter.ActualValue;
    219230
     231      double correlationLimit;
     232      if (OverfittingParameter.ActualValue != null && OverfittingParameter.ActualValue.Value) {
     233        // if is already overfitting have to reach the upper limit to switch back to non-overfitting state
     234        correlationLimit = UpperCorrelationLimitParameter.ActualValue.Value;
     235      } else {
     236        // if currently in non-overfitting state have to reach to lower limit to switch to overfitting state
     237        correlationLimit = LowerCorrelationLimitParameter.ActualValue.Value;
     238      }
    220239      //string targetVariable = ProblemData.TargetVariable.Value;
    221240
     
    262281      // best first (only for maximization
    263282      var orderedDistinctPairs = (from index in Enumerable.Range(0, qualities.Length)
     283                                  where qualities[index].Value > 0.0
    264284                                  select new { Training = qualities[index].Value, Validation = validationQualities[index].Value })
    265285                                 .OrderBy(x => -x.Training)
     
    285305        avgTrainingQuality > InitialTrainingQualityParameter.ActualValue.Value &&  // better on training than in initial generation
    286306        // RelativeValidationQualityParameter.ActualValue.Value < 0.0 && // validation quality is worse than training quality
    287         r < CorrelationLimitParameter.ActualValue.Value;  // low correlation between training and validation quality
     307        r < correlationLimit;
    288308
    289309
Note: See TracChangeset for help on using the changeset viewer.