Changeset 4275


Ignore:
Timestamp:
08/22/10 19:06:32 (12 years ago)
Author:
gkronber
Message:

Improved overfitting analyzer #1142

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/OverfittingAnalyzer.cs

    r4272 r4275  
    112112    public ILookupParameter<DoubleValue> InitialTrainingQualityParameter {
    113113      get { return (ILookupParameter<DoubleValue>)Parameters["InitialTrainingQuality"]; }
     114    }
     115    public ILookupParameter<DoubleMatrix> TrainingAndValidationQualitiesParameter {
     116      get { return (ILookupParameter<DoubleMatrix>)Parameters["TrainingAndValidationQualities"]; }
     117    }
     118    public IValueLookupParameter<DoubleValue> PercentileParameter {
     119      get { return (IValueLookupParameter<DoubleValue>)Parameters["Percentile"]; }
    114120    }
    115121    #endregion
     
    173179      Parameters.Add(new LookupParameter<ResultCollection>("Results"));
    174180      Parameters.Add(new LookupParameter<DoubleValue>("InitialTrainingQuality"));
     181      Parameters.Add(new LookupParameter<DoubleMatrix>("TrainingAndValidationQualities"));
     182      Parameters.Add(new ValueLookupParameter<DoubleValue>("Percentile", new DoubleValue(0.1)));
     183
    175184    }
    176185
     
    189198      //  Parameters.Add(new ValueLookupParameter<PercentValue>("RelativeValidationQualityLowerLimit", new PercentValue(-0.05)));
    190199      //}
     200      if (!Parameters.ContainsKey("TrainingAndValidationQualities")) {
     201        Parameters.Add(new LookupParameter<DoubleMatrix>("TrainingAndValidationQualities"));
     202      }
     203      if (!Parameters.ContainsKey("Percentile")) {
     204        Parameters.Add(new ValueLookupParameter<DoubleValue>("Percentile", new DoubleValue(0.1)));
     205      }
    191206    }
    192207
     
    237252      //}
    238253
    239       // cut away 0.0 values to make the correlation stronger
    240       // necessary because R² values of 0.0 are strong outliers
    241       //int percentile = (int)Math.Round(0.1 * validationQualities.Count);
    242       //double validationCutOffValue = validationQualities.OrderBy(x => x).ElementAt(percentile);
    243       //double trainingCutOffValue = qualities.Select(x => x.Value).OrderBy(x => x).ElementAt(percentile);
    244       double validationCutOffValue = 0.05;
    245       double trainingCutOffValue = validationCutOffValue;
    246 
    247       double[] validationArr = new double[validationQualities.Count];
    248       double[] trainingArr = new double[validationQualities.Count];
    249       int arrIndex = 0;
    250       for (int i = 0; i < validationQualities.Count; i++) {
    251         if (validationQualities[i] > validationCutOffValue &&
    252             qualities[i].Value > trainingCutOffValue) {
    253           validationArr[arrIndex] = validationQualities[i];
    254           trainingArr[arrIndex] = qualities[i].Value;
    255           arrIndex++;
    256         }
    257       }
    258       double r = alglib.correlation.spearmanrankcorrelation(trainingArr, validationArr, arrIndex);
     254      // best first (only for maximization
     255      var orderedDistinctPairs = (from index in Enumerable.Range(0, qualities.Length)
     256                                  select new { Training = qualities[index].Value, Validation = validationQualities[index] })
     257                                 .Distinct()
     258                                 .OrderBy(x => -x.Training)
     259                                 .ToList();
     260
     261      int n = (int)Math.Round(PercentileParameter.ActualValue.Value * orderedDistinctPairs.Count);
     262
     263      double[] validationArr = new double[n];
     264      double[] trainingArr = new double[n];
     265      //double[,] qualitiesArr = new double[n, 2];
     266      for (int i = 0; i < n; i++) {
     267        validationArr[i] = orderedDistinctPairs[i].Validation;
     268        trainingArr[i] = orderedDistinctPairs[i].Training;
     269
     270        //qualitiesArr[i, 0] = trainingArr[i];
     271        //qualitiesArr[i, 1] = validationArr[i];
     272      }
     273      double r = alglib.correlation.spearmanrankcorrelation(trainingArr, validationArr, n);
    259274      TrainingValidationQualityCorrelationParameter.ActualValue = new DoubleValue(r);
    260275      if (InitialTrainingQualityParameter.ActualValue == null)
     
    270285
    271286      OverfittingParameter.ActualValue = new BoolValue(overfitting);
     287      //TrainingAndValidationQualitiesParameter.ActualValue = new DoubleMatrix(qualitiesArr);
    272288      return base.Apply();
    273289    }
Note: See TracChangeset for help on using the changeset viewer.