Free cookie consent management tool by TermsFeed Policy Generator

Changeset 4224 for branches/DataAnalysis


Ignore:
Timestamp:
08/16/10 10:02:12 (14 years ago)
Author:
gkronber
Message:

Added experimental anti-overfitting code to dynamic operator equalizer to prevent code growth when the current validation quality is below the best validation quality. #1142.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/DataAnalysis/HeuristicLab.Problems.DataAnalysis/3.3/Operators/DynOpEqComparator.cs

    r4193 r4224  
    6868    public ILookupParameter<ItemList<IntValue>> TotalCountsParameter {
    6969      get { return (ILookupParameter<ItemList<IntValue>>)Parameters["TotalCounts"]; }
     70    }
     71    public IValueLookupParameter<BoolValue> AntiOverfitParameter {
     72      get { return (IValueLookupParameter<BoolValue>)Parameters["AntiOverfit"]; }
     73    }
     74    public ILookupParameter<DoubleValue> CurrentBestValidationQualityParameter {
     75      get { return (ILookupParameter<DoubleValue>)Parameters["Current best validation quality"]; }
     76    }
     77    public ILookupParameter<DoubleValue> BestValidationQualityParameter {
     78      get { return (ILookupParameter<DoubleValue>)Parameters["Best solution quality (validation)"]; }
    7079    }
    7180
     
    8392      Parameters.Add(new LookupParameter<ItemList<IntValue>>("AcceptedCounts"));
    8493      Parameters.Add(new LookupParameter<ItemList<IntValue>>("TotalCounts"));
     94      Parameters.Add(new ValueLookupParameter<BoolValue>("AntiOverfit", new BoolValue(false)));
     95      Parameters.Add(new LookupParameter<DoubleValue>("Current best validation quality"));
     96      Parameters.Add(new LookupParameter<DoubleValue>("Best solution quality (validation)"));
    8597    }
    8698
     
    91103        int bin = GetBinIndexForSize(size);
    92104        if (LeftSideParameter.ActualValue == null) {
     105          // not yet evaluated
     106          #region debugging
    93107          ItemList<IntValue> totalCounts = TotalCountsParameter.ActualValue;
    94108          while (bin >= totalCounts.Count) totalCounts.Add(new IntValue(0));
    95109          totalCounts[bin].Value = totalCounts[bin].Value + 1;
    96 
    97           if (!Exists(bin)) ResultParameter.ActualValue = new BoolValue(true);
    98           else ResultParameter.ActualValue = new BoolValue(IsNotFull(bin));
     110          #endregion
     111          if (!Exists(bin)) {
     112
     113            if (AntiOverfitParameter.ActualValue.Value) {
     114              // reject more complex solutions if the current validation quality is worse than the best so far
     115              ResultParameter.ActualValue = new BoolValue(!IsOverfitting());
     116            } else {
     117              // new bin -> evaluate and check later
     118              ResultParameter.ActualValue = new BoolValue(true);
     119            }
     120          } else {
     121            // bin exists:
     122            // if bin is full -> reject
     123            // otherwise -> evaluate and check success criterion
     124            ResultParameter.ActualValue = new BoolValue(IsNotFull(bin));
     125          }
    99126        } else {
    100127          double leftQuality = LeftSideParameter.ActualValue.Value;
    101 
    102 
    103128          ResultParameter.ActualValue = new BoolValue(Accept(size, bin, leftQuality));
    104129        }
     
    107132    }
    108133
     134    private bool IsOverfitting() {
     135      bool maximization = MaximizationParameter.ActualValue.Value;
     136      if (CurrentBestValidationQualityParameter.ActualValue != null && BestValidationQualityParameter.ActualValue != null) {
     137        double currentValidationQuality = CurrentBestValidationQualityParameter.ActualValue.Value;
     138        double bestValidationQuality = BestValidationQualityParameter.ActualValue.Value;
     139        return maximization ? currentValidationQuality < bestValidationQuality : currentValidationQuality > bestValidationQuality;
     140      } else
     141        return false;
     142    }
     143
    109144    private int GetBinIndexForSize(int size) {
    110145      return (int)Math.Floor((size - 3.0) / BinSizeParameter.ActualValue.Value);
     
    114149      bool accept = false;
    115150      if (Exists(binIndex)) {
    116         if (IsNotFull(binIndex) ||
    117           NewBestOfBin(solutionQuality, binIndex)) {
    118           AddToBin(solutionQuality, binIndex);
     151        //if (IsNotFull(binIndex) /*||
     152          //NewBestOfBin(solutionQuality, binIndex)*/) {       
     153        AddToBin(solutionQuality, binIndex);
    119154          accept = true;
    120         }
     155        //}
    121156      } else if (NewBestOfRun(solutionQuality)) {
    122157        CreateNewBin(binIndex);
     
    130165      ItemList<DoubleValue> acceptedBinQualities = AcceptedBinQualitiesParameter.ActualValue[binIndex];
    131166      ItemList<IntValue> acceptedCounts = AcceptedCountsParameter.ActualValue;
    132       if (acceptedBinQualities.Count == 0) {
    133         acceptedBinQualities.Add(new DoubleValue(solutionQuality));
    134         acceptedCounts[binIndex].Value = acceptedCounts[binIndex].Value + 1;
    135       } else {
    136         // insert at beginning if it is a new best or add at end
    137         bool maximization = MaximizationParameter.ActualValue.Value;
    138         if ((maximization && solutionQuality > acceptedBinQualities[0].Value) ||
    139           (!maximization && solutionQuality < acceptedBinQualities[0].Value)) {
    140           acceptedBinQualities.Insert(0, new DoubleValue(solutionQuality));
    141         } else {
    142           acceptedBinQualities.Add(new DoubleValue(solutionQuality));
    143         }
    144         acceptedCounts[binIndex].Value = acceptedCounts[binIndex].Value + 1;
    145       }
     167      acceptedBinQualities.Add(new DoubleValue(solutionQuality));
     168      acceptedCounts[binIndex].Value = acceptedCounts[binIndex].Value + 1;
    146169    }
    147170
     
    165188    }
    166189
    167     private bool NewBestOfBin(double solutionQuality, int binIndex) {
    168       ItemList<ItemList<DoubleValue>> acceptedQualities = AcceptedBinQualitiesParameter.ActualValue;
    169       if (acceptedQualities[binIndex].Count == 0) return true;
    170       bool maximization = MaximizationParameter.ActualValue.Value;
    171       IEnumerable<double> binQualities = acceptedQualities[binIndex].Select(x => x.Value);
    172       // binQualities are always sorted so that the best is in bin 0
    173       return maximization ? solutionQuality > binQualities.First() :
    174         solutionQuality < binQualities.First();
    175     }
     190    //private bool NewBestOfBin(double solutionQuality, int binIndex) {
     191    //  ItemList<ItemList<DoubleValue>> acceptedQualities = AcceptedBinQualitiesParameter.ActualValue;
     192    //  if (acceptedQualities[binIndex].Count == 0) return true;
     193    //  bool maximization = MaximizationParameter.ActualValue.Value;
     194    //  IEnumerable<double> binQualities = acceptedQualities[binIndex].Select(x => x.Value);
     195    //  // binQualities are always sorted so that the best is in bin 0
     196    //  return maximization ? solutionQuality > binQualities.First() :
     197    //    solutionQuality < binQualities.First();
     198    //}
    176199
    177200    private bool IsNotFull(int binIndex) {
Note: See TracChangeset for help on using the changeset viewer.