- Timestamp:
- 08/24/10 19:25:11 (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/OverfittingAnalyzer.cs
r4297 r4309 218 218 ItemArray<DoubleValue> validationQualities = ValidationQualityParameter.ActualValue; 219 219 220 string targetVariable = ProblemData.TargetVariable.Value;221 222 // select a random subset of rows in the validation set223 int validationStart = ValidiationSamplesStart.Value;224 int validationEnd = ValidationSamplesEnd.Value;225 int seed = Random.Next();226 int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value);227 if (count == 0) count = 1;228 IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count);229 230 double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity;231 double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity;220 //string targetVariable = ProblemData.TargetVariable.Value; 221 222 //// select a random subset of rows in the validation set 223 //int validationStart = ValidiationSamplesStart.Value; 224 //int validationEnd = ValidationSamplesEnd.Value; 225 //int seed = Random.Next(); 226 //int count = (int)((validationEnd - validationStart) * RelativeNumberOfEvaluatedSamples.Value); 227 //if (count == 0) count = 1; 228 //IEnumerable<int> rows = RandomEnumerable.SampleRandomNumbers(seed, validationStart, validationEnd, count); 229 230 //double upperEstimationLimit = UpperEstimationLimit != null ? UpperEstimationLimit.Value : double.PositiveInfinity; 231 //double lowerEstimationLimit = LowerEstimationLimit != null ? LowerEstimationLimit.Value : double.NegativeInfinity; 232 232 233 233 //double bestQuality = Maximization.Value ? double.NegativeInfinity : double.PositiveInfinity; … … 250 250 //if (RelativeValidationQualityParameter.ActualValue == null) { 251 251 // first call initialize the relative quality using the difference between average training and validation quality 252 double avgTrainingQuality = qualities.Select(x => x.Value). Median();253 double avgValidationQuality = validationQualities.Select(x => x.Value). Median();252 double avgTrainingQuality = qualities.Select(x => x.Value).Average(); 253 double avgValidationQuality = validationQualities.Select(x => x.Value).Average(); 254 254 255 255 if (Maximization.Value) … … 284 284 bool overfitting = 285 285 avgTrainingQuality > InitialTrainingQualityParameter.ActualValue.Value && // better on training than in initial generation 286 // RelativeValidationQualityParameter.ActualValue.Value < 0.0 && // validation quality is worse than training quality 286 287 r < CorrelationLimitParameter.ActualValue.Value; // low correlation between training and validation quality 287 288 288 //// if validation quality is within a certain margin of percentage deviation (default -5% .. 5%) then there is no overfitting289 //// correlation is also bad when underfitting but validation quality cannot be a lot larger than training quality if overfitting290 //(RelativeValidationQualityParameter.ActualValue.Value > RelativeValidationQualityUpperLimitParameter.ActualValue.Value || // better on training than on validation291 // RelativeValidationQualityParameter.ActualValue.Value < RelativeValidationQualityLowerLimitParameter.ActualValue.Value); // better on training than on validation292 289 293 290 OverfittingParameter.ActualValue = new BoolValue(overfitting);
Note: See TracChangeset
for help on using the changeset viewer.