Ignore:
Timestamp:
09/13/16 16:43:31 (5 years ago)
Author:
bburlacu
Message:

#2635: Implement an alternative way of assessing which offspring should be rejected early: the child is evaluated on part of the training data and the remaining rows are assumed to be perfectly correlated with the target. If the offspring selection criteria is not fulfilled, we can reject early. Otherwise the child is evaluated on an additional slice of the training and so on.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.OSGAEvaluator/HeuristicLab.OSGAEvaluator/SymbolicRegressionSingleObjectiveOSGAEvaluator.cs

    r14231 r14279  
    3939    private const string ResultCollectionParameterName = "Results";
    4040    private const string AggregateStatisticsParameterName = "AggregateStatistics";
     41    private const string ActualSelectionPressureParameterName = "SelectionPressure";
     42    private const string UseAdaptiveQualityThresholdParameterName = "UseAdaptiveQualityThreshold";
     43    private const string UseFixedEvaluationIntervalsParameterName = "UseFixedEvaluationIntervals";
    4144
    4245    #region parameters
     46    public IFixedValueParameter<BoolValue> UseFixedEvaluationIntervalsParameter {
     47      get { return (IFixedValueParameter<BoolValue>)Parameters[UseFixedEvaluationIntervalsParameterName]; }
     48    }
     49    public IFixedValueParameter<BoolValue> UseAdaptiveQualityThresholdParameter {
     50      get { return (IFixedValueParameter<BoolValue>)Parameters[UseAdaptiveQualityThresholdParameterName]; }
     51    }
     52    public ILookupParameter<DoubleValue> ActualSelectionPressureParameter {
     53      get { return (ILookupParameter<DoubleValue>)Parameters[ActualSelectionPressureParameterName]; }
     54    }
    4355    public ILookupParameter<ResultCollection> ResultCollectionParameter {
    4456      get { return (ILookupParameter<ResultCollection>)Parameters[ResultCollectionParameterName]; }
     
    6678
    6779    #region parameter properties
     80    public bool UseFixedEvaluationIntervals {
     81      get { return UseFixedEvaluationIntervalsParameter.Value.Value; }
     82      set { UseFixedEvaluationIntervalsParameter.Value.Value = value; }
     83    }
     84    public bool UseAdaptiveQualityThreshold {
     85      get { return UseAdaptiveQualityThresholdParameter.Value.Value; }
     86      set { UseAdaptiveQualityThresholdParameter.Value.Value = value; }
     87    }
    6888    public double RelativeParentChildQualityThreshold {
    6989      get { return RelativeParentChildQualityThresholdParameter.Value.Value; }
     
    100120      Parameters.Add(new ValueParameter<IntMatrix>("TotalStats", new IntMatrix()));
    101121      Parameters.Add(new ValueParameter<BoolValue>(AggregateStatisticsParameterName, new BoolValue(false)));
     122      Parameters.Add(new LookupParameter<DoubleValue>(ActualSelectionPressureParameterName));
     123      Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
     124      Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
    102125    }
    103126
    104127    [StorableHook(HookType.AfterDeserialization)]
    105128    private void AfterDeserialization() {
    106       if (!Parameters.ContainsKey(ResultCollectionParameterName))
    107         Parameters.Add(new LookupParameter<ResultCollection>(ResultCollectionParameterName));
    108 
    109       if (!Parameters.ContainsKey("ParentQualities"))
    110         Parameters.Add(new ScopeTreeLookupParameter<DoubleValue>("ParentQualities") { ActualName = "Quality" });
    111 
    112       if (!Parameters.ContainsKey("RejectedStats"))
    113         Parameters.Add(new ValueParameter<IntMatrix>("RejectedStats", new IntMatrix()));
    114 
    115       if (!Parameters.ContainsKey("TotalStats"))
    116         Parameters.Add(new ValueParameter<IntMatrix>("TotalStats", new IntMatrix()));
     129      if (!Parameters.ContainsKey(ActualSelectionPressureParameterName))
     130        Parameters.Add(new LookupParameter<DoubleValue>(ActualSelectionPressureParameterName));
     131
     132      if (!Parameters.ContainsKey(UseAdaptiveQualityThresholdParameterName))
     133        Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
     134
     135      if (!Parameters.ContainsKey(UseFixedEvaluationIntervalsParameterName))
     136        Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
    117137    }
    118138
     
    146166      // parent subscopes are not present during evaluation of the initial population
    147167      if (parentQualities.Length > 0) {
    148         quality = Calculate(interpreter, solution, estimationLimits, problemData, rows, applyLinearScaling);
     168        quality = Calculate(interpreter, solution, estimationLimits, problemData, rows);
    149169      } else {
    150170        quality = Calculate(interpreter, solution, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);
     
    174194    }
    175195
    176     private double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, DoubleLimit estimationLimits, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling) {
     196    private double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, DoubleLimit estimationLimits, IRegressionProblemData problemData, IEnumerable<int> rows) {
    177197      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows).LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
    178       var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
     198      var targetValues = problemData.Dataset.GetReadOnlyDoubleValues(problemData.TargetVariable);
    179199
    180200      var parentQualities = ParentQualitiesParameter.ActualValue.Select(x => x.Value);
     
    183203      var comparisonFactor = ComparisonFactorParameter.ActualValue.Value;
    184204      var parentQuality = minQuality + (maxQuality - minQuality) * comparisonFactor;
    185       var threshold = parentQuality * RelativeParentChildQualityThreshold;
    186 
    187       var pearsonRCalculator = new OnlinePearsonsRCalculator();
    188       var targetValuesEnumerator = targetValues.GetEnumerator();
    189       var estimatedValuesEnumerator = estimatedValues.GetEnumerator();
    190       var trainingPartitionSize = problemData.TrainingPartition.Size;
    191       var interval = (int)Math.Floor(trainingPartitionSize * RelativeFitnessEvaluationIntervalSize);
    192 
    193       var aggregateStatistics = AggregateStatisticsParameter.Value.Value;
    194       var i = 0;
    195       if (aggregateStatistics) {
    196         var trainingEnd = problemData.TrainingPartition.End;
    197         var qualityPerInterval = new List<double>();
    198         while (targetValuesEnumerator.MoveNext() && estimatedValuesEnumerator.MoveNext()) {
    199           pearsonRCalculator.Add(targetValuesEnumerator.Current, estimatedValuesEnumerator.Current);
    200           ++i;
    201           if (i % interval == 0 || i == trainingPartitionSize) {
    202             var q = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
    203             qualityPerInterval.Add(q * q);
    204           }
     205
     206      var e = estimatedValues.GetEnumerator();
     207
     208      if (UseFixedEvaluationIntervals) {
     209        double threshold = parentQuality * RelativeParentChildQualityThreshold;
     210        if (UseAdaptiveQualityThreshold) {
     211          var actualSelectionPressure = ActualSelectionPressureParameter.ActualValue;
     212          if (actualSelectionPressure != null)
     213            threshold = parentQuality * (1 - actualSelectionPressure.Value / 100.0);
    205214        }
    206         var r = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
    207         var actualQuality = r * r;
    208 
    209         bool predictedRejected = false;
    210 
    211         i = 0;
    212         double quality = actualQuality;
    213         foreach (var q in qualityPerInterval) {
    214           if (double.IsNaN(q) || !(q > threshold)) {
    215             predictedRejected = true;
    216             quality = q;
     215
     216        var pearsonRCalculator = new OnlinePearsonsRCalculator();
     217        var targetValuesEnumerator = targetValues.GetEnumerator();
     218        var trainingPartitionSize = problemData.TrainingPartition.Size;
     219        var interval = (int)Math.Floor(trainingPartitionSize * RelativeFitnessEvaluationIntervalSize);
     220
     221        var aggregateStatistics = AggregateStatisticsParameter.Value.Value;
     222        var i = 0;
     223        if (aggregateStatistics) {
     224          var trainingEnd = problemData.TrainingPartition.End;
     225          var qualityPerInterval = new List<double>();
     226          while (targetValuesEnumerator.MoveNext() && e.MoveNext()) {
     227            pearsonRCalculator.Add(targetValuesEnumerator.Current, e.Current);
     228            ++i;
     229            if (i % interval == 0 || i == trainingPartitionSize) {
     230              var q = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
     231              qualityPerInterval.Add(q * q);
     232            }
     233          }
     234          var r = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
     235          var actualQuality = r * r;
     236
     237          bool predictedRejected = false;
     238
     239          i = 0;
     240          double quality = actualQuality;
     241          foreach (var q in qualityPerInterval) {
     242            if (double.IsNaN(q) || !(q > threshold)) {
     243              predictedRejected = true;
     244              quality = q;
     245              break;
     246            }
     247            ++i;
     248          }
     249
     250          var actuallyRejected = !(actualQuality > parentQuality);
     251
     252          if (RejectedStats.Rows == 0 || TotalStats.Rows == 0) {
     253            RejectedStats = new IntMatrix(2, qualityPerInterval.Count);
     254            RejectedStats.RowNames = new[] { "Predicted", "Actual" };
     255            RejectedStats.ColumnNames = Enumerable.Range(1, RejectedStats.Columns).Select(x => string.Format("0-{0}", Math.Min(trainingEnd, x * interval)));
     256            TotalStats = new IntMatrix(2, 2);
     257            TotalStats.RowNames = new[] { "Predicted", "Actual" };
     258            TotalStats.ColumnNames = new[] { "Rejected", "Not Rejected" };
     259          }
     260          // gather some statistics
     261          if (predictedRejected) {
     262            RejectedStats[0, i]++;
     263            TotalStats[0, 0]++;
     264          } else {
     265            TotalStats[0, 1]++;
     266          }
     267          if (actuallyRejected) {
     268            TotalStats[1, 0]++;
     269          } else {
     270            TotalStats[1, 1]++;
     271          }
     272          if (predictedRejected && actuallyRejected) {
     273            RejectedStats[1, i]++;
     274          }
     275          return quality;
     276        } else {
     277          while (targetValuesEnumerator.MoveNext() && e.MoveNext()) {
     278            pearsonRCalculator.Add(targetValuesEnumerator.Current, e.Current);
     279            ++i;
     280            if (i % interval == 0 || i == trainingPartitionSize) {
     281              var q = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
     282              var quality = q * q;
     283              if (!(quality > threshold))
     284                return quality;
     285            }
     286          }
     287          var r = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
     288          var actualQuality = r * r;
     289          return actualQuality;
     290        }
     291      } else {
     292        var calculator = new OnlinePearsonsRCalculator();
     293        var trainingPartitionSize = problemData.TrainingPartition.Size;
     294        var interval = (int)Math.Floor(trainingPartitionSize * RelativeFitnessEvaluationIntervalSize);
     295        double quality = double.NaN;
     296        var estimated = new List<double>(); // save estimated values in a list so we don't re-evaluate
     297        // use the actual estimated values for the first i * interval rows of the training partition and and assume the remaining rows are perfectly correlated
     298        // if the quality of the individual still falls below the parent quality, then we can reject it sooner, otherwise as i increases the whole estimated series will be used
     299        for (int i = 0; i < trainingPartitionSize; i += interval) {
     300          int j = i;
     301          int end = Math.Min(trainingPartitionSize, i + interval);
     302          while (j < end && e.MoveNext()) {
     303            estimated.Add(e.Current);
     304            j++;
     305          }
     306
     307          var start = problemData.TrainingPartition.Start;
     308          calculator.Reset();
     309          // add (estimated, target) pairs to the calculator
     310          for (j = 0; j < end; ++j)
     311            calculator.Add(estimated[j], targetValues[j + start]);
     312          // add (target, target) pairs to the calculator (simulate perfect correlation on the remaining rows)
     313          for (; j < trainingPartitionSize; ++j) {
     314            var index = j + start;
     315            calculator.Add(targetValues[index], targetValues[index]);
     316          }
     317          var r = calculator.ErrorState == OnlineCalculatorError.None ? calculator.R : double.NaN;
     318          quality = r * r;
     319          if (!(quality > parentQuality))
    217320            break;
    218           }
    219           ++i;
    220         }
    221 
    222         var actuallyRejected = !(actualQuality > parentQuality);
    223 
    224         if (RejectedStats.Rows == 0 || TotalStats.Rows == 0) {
    225           RejectedStats = new IntMatrix(2, qualityPerInterval.Count);
    226           RejectedStats.RowNames = new[] { "Predicted", "Actual" };
    227           RejectedStats.ColumnNames = Enumerable.Range(1, RejectedStats.Columns).Select(x => string.Format("0-{0}", Math.Min(trainingEnd, x * interval)));
    228           TotalStats = new IntMatrix(2, 2);
    229           TotalStats.RowNames = new[] { "Predicted", "Actual" };
    230           TotalStats.ColumnNames = new[] { "Rejected", "Not Rejected" };
    231         }
    232         // gather some statistics
    233         if (predictedRejected) {
    234           RejectedStats[0, i]++;
    235           TotalStats[0, 0]++;
    236         } else {
    237           TotalStats[0, 1]++;
    238         }
    239         if (actuallyRejected) {
    240           TotalStats[1, 0]++;
    241         } else {
    242           TotalStats[1, 1]++;
    243         }
    244         if (predictedRejected && actuallyRejected) {
    245           RejectedStats[1, i]++;
    246321        }
    247322        return quality;
    248       } else {
    249         while (targetValuesEnumerator.MoveNext() && estimatedValuesEnumerator.MoveNext()) {
    250           pearsonRCalculator.Add(targetValuesEnumerator.Current, estimatedValuesEnumerator.Current);
    251           ++i;
    252           if (i % interval == 0 || i == trainingPartitionSize) {
    253             var q = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
    254             var quality = q * q;
    255             if (!(quality > threshold))
    256               return quality;
    257           }
    258         }
    259         var r = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
    260         var actualQuality = r * r;
    261         return actualQuality;
    262323      }
    263324    }
Note: See TracChangeset for help on using the changeset viewer.