Changeset 14428


Ignore:
Timestamp:
11/29/16 15:30:27 (4 years ago)
Author:
bburlacu
Message:

#2635: Add analyzer for counting the AdjustedEvaluatedSolutions (according to the actual number of evaluated rows). Add option to preserve compatibility with the standard evaluator. Optimize performance.

Location:
branches/HeuristicLab.OSGAEvaluator
Files:
1 added
4 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.OSGAEvaluator/HeuristicLab.OSGAEvaluator.sln

    r14084 r14428  
    22Microsoft Visual Studio Solution File, Format Version 12.00
    33# Visual Studio 14
    4 VisualStudioVersion = 14.0.25123.0
     4VisualStudioVersion = 14.0.25420.1
    55MinimumVisualStudioVersion = 10.0.40219.1
    66Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.OSGAEvaluator", "HeuristicLab.OSGAEvaluator\HeuristicLab.OSGAEvaluator.csproj", "{4148A27C-C6F5-44BA-9A57-1460F3758A24}"
    77EndProject
    88Global
     9  GlobalSection(Performance) = preSolution
     10    HasPerformanceSessions = true
     11  EndGlobalSection
    912  GlobalSection(SolutionConfigurationPlatforms) = preSolution
    1013    Debug|Any CPU = Debug|Any CPU
  • branches/HeuristicLab.OSGAEvaluator/HeuristicLab.OSGAEvaluator/HeuristicLab.OSGAEvaluator.csproj

    r14084 r14428  
    117117  </ItemGroup>
    118118  <ItemGroup>
     119    <Compile Include="AdjustedEvaluatedSolutionsAnalyzer.cs" />
    119120    <Compile Include="OSGAPredictionCountsAnalyzer.cs" />
    120121    <Compile Include="Plugin.cs" />
  • branches/HeuristicLab.OSGAEvaluator/HeuristicLab.OSGAEvaluator/OSGAPredictionCountsAnalyzer.cs

    r14231 r14428  
    1717    private const string ResultCollectionParameterName = "Results";
    1818
    19     public ILookupParameter<SymbolicRegressionSingleObjectiveOsgaEvaluator> EvaluatorParameter {
    20       get { return (ILookupParameter<SymbolicRegressionSingleObjectiveOsgaEvaluator>)Parameters[EvaluatorParameterName]; }
     19    public ILookupParameter<SymbolicRegressionSingleObjectiveEvaluator> EvaluatorParameter {
     20      get { return (ILookupParameter<SymbolicRegressionSingleObjectiveEvaluator>)Parameters[EvaluatorParameterName]; }
    2121    }
    2222
    2323    public OSGAPredictionCountsAnalyzer() {
    24       Parameters.Add(new LookupParameter<SymbolicRegressionSingleObjectiveOsgaEvaluator>(EvaluatorParameterName));
     24      Parameters.Add(new LookupParameter<SymbolicRegressionSingleObjectiveEvaluator>(EvaluatorParameterName));
    2525    }
    2626
     
    3333
    3434    public override IOperation Apply() {
    35       var evaluator = EvaluatorParameter.ActualValue;
     35      var evaluator = EvaluatorParameter.ActualValue as SymbolicRegressionSingleObjectiveOsgaEvaluator;
    3636      if (evaluator == null)
    3737        return base.Apply();
  • branches/HeuristicLab.OSGAEvaluator/HeuristicLab.OSGAEvaluator/SymbolicRegressionSingleObjectiveOSGAEvaluator.cs

    r14302 r14428  
    2222using System;
    2323using System.Collections.Generic;
    24 using System.Diagnostics;
    2524using System.Linq;
    2625using HeuristicLab.Common;
     
    4342    private const string UseAdaptiveQualityThresholdParameterName = "UseAdaptiveQualityThreshold";
    4443    private const string UseFixedEvaluationIntervalsParameterName = "UseFixedEvaluationIntervals";
     44    private const string PreserveResultCompatibilityParameterName = "PreserveEvaluationResultCompatibility";
    4545
    4646    #region parameters
     47    public IFixedValueParameter<BoolValue> PreserveResultCompatibilityParameter {
     48      get { return (IFixedValueParameter<BoolValue>)Parameters[PreserveResultCompatibilityParameterName]; }
     49    }
    4750    public IFixedValueParameter<BoolValue> UseFixedEvaluationIntervalsParameter {
    4851      get { return (IFixedValueParameter<BoolValue>)Parameters[UseFixedEvaluationIntervalsParameterName]; }
     
    7982
    8083    #region parameter properties
     84    public bool AggregateStatistics {
     85      get { return AggregateStatisticsParameter.Value.Value; }
     86      set { AggregateStatisticsParameter.Value.Value = value; }
     87    }
     88    public bool PreserveResultCompatibility {
     89      get { return PreserveResultCompatibilityParameter.Value.Value; }
     90      set { PreserveResultCompatibilityParameter.Value.Value = value; }
     91    }
    8192    public bool UseFixedEvaluationIntervals {
    8293      get { return UseFixedEvaluationIntervalsParameter.Value.Value; }
     
    91102      set { RelativeParentChildQualityThresholdParameter.Value.Value = value; }
    92103    }
    93 
    94104    public double RelativeFitnessEvaluationIntervalSize {
    95105      get { return RelativeFitnessEvaluationIntervalSizeParameter.Value.Value; }
    96106      set { RelativeFitnessEvaluationIntervalSizeParameter.Value.Value = value; }
    97107    }
    98 
    99108    public IntMatrix RejectedStats {
    100109      get { return RejectedStatsParameter.Value; }
    101110      set { RejectedStatsParameter.Value = value; }
    102111    }
    103 
    104112    public IntMatrix TotalStats {
    105113      get { return NotRejectedStatsParameter.Value; }
     
    111119      get { return true; }
    112120    }
     121
     122    public double AdjustedEvaluatedSolutions { get; set; }
    113123
    114124    public SymbolicRegressionSingleObjectiveOsgaEvaluator() {
     
    124134      Parameters.Add(new FixedValueParameter<BoolValue>(UseAdaptiveQualityThresholdParameterName, new BoolValue(false)));
    125135      Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
     136      Parameters.Add(new FixedValueParameter<BoolValue>(PreserveResultCompatibilityParameterName, new BoolValue(false)));
    126137    }
    127138
     
    136147      if (!Parameters.ContainsKey(UseFixedEvaluationIntervalsParameterName))
    137148        Parameters.Add(new FixedValueParameter<BoolValue>(UseFixedEvaluationIntervalsParameterName, new BoolValue(false)));
     149
     150      if (!Parameters.ContainsKey(PreserveResultCompatibilityParameterName))
     151        Parameters.Add(new FixedValueParameter<BoolValue>(PreserveResultCompatibilityParameterName, new BoolValue(false)));
    138152    }
    139153
     
    151165      RejectedStats = new IntMatrix();
    152166      TotalStats = new IntMatrix();
     167      AdjustedEvaluatedSolutions = 0;
    153168    }
    154169
     
    198213      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
    199214      var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows).ToList();
    200 
    201215      var parentQualities = ParentQualitiesParameter.ActualValue.Select(x => x.Value);
    202       var minQuality = parentQualities.Min();
    203       var maxQuality = parentQualities.Max();
     216      var minQuality = double.MaxValue;
     217      var maxQuality = double.MinValue;
     218
     219      foreach (var quality in parentQualities) {
     220        if (minQuality > quality) minQuality = quality;
     221        if (maxQuality < quality) maxQuality = quality;
     222      }
     223
    204224      var comparisonFactor = ComparisonFactorParameter.ActualValue.Value;
    205225      var parentQuality = minQuality + (maxQuality - minQuality) * comparisonFactor;
    206226
    207 
    208 
    209227      #region fixed intervals
    210228      if (UseFixedEvaluationIntervals) {
    211         var e = estimatedValues.GetEnumerator();
    212229        double threshold = parentQuality * RelativeParentChildQualityThreshold;
    213230        if (UseAdaptiveQualityThreshold) {
     
    216233            threshold = parentQuality * (1 - actualSelectionPressure.Value / 100.0);
    217234        }
    218 
    219         var pearsonRCalculator = new OnlinePearsonsRCalculator();
    220         var targetValuesEnumerator = targetValues.GetEnumerator();
     235        var estimatedEnumerator = estimatedValues.GetEnumerator();
     236        var targetEnumerator = targetValues.GetEnumerator();
     237
     238        var rcalc = new OnlinePearsonsRCalculator();
    221239        var trainingPartitionSize = problemData.TrainingPartition.Size;
    222240        var interval = (int)Math.Floor(trainingPartitionSize * RelativeFitnessEvaluationIntervalSize);
    223241
    224         var aggregateStatistics = AggregateStatisticsParameter.Value.Value;
    225         var i = 0;
    226         if (aggregateStatistics) {
     242        var calculatedRows = 0;
     243        #region aggregate statistics
     244        if (AggregateStatistics) {
    227245          var trainingEnd = problemData.TrainingPartition.End;
    228246          var qualityPerInterval = new List<double>();
    229           while (targetValuesEnumerator.MoveNext() && e.MoveNext()) {
    230             pearsonRCalculator.Add(targetValuesEnumerator.Current, e.Current);
    231             ++i;
    232             if (i % interval == 0 || i == trainingPartitionSize) {
    233               var q = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
    234               qualityPerInterval.Add(q * q);
    235             }
    236           }
    237           var r = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
     247          while (estimatedEnumerator.MoveNext() & targetEnumerator.MoveNext()) {
     248            var estimated = estimatedEnumerator.Current;
     249            var target = targetEnumerator.Current;
     250            rcalc.Add(estimated, target);
     251            ++calculatedRows;
     252            if (calculatedRows % interval == 0 || calculatedRows == trainingPartitionSize) {
     253              var r = rcalc.ErrorState == OnlineCalculatorError.None ? rcalc.R : 0d;
     254              qualityPerInterval.Add(r * r);
     255            }
     256          }
     257          double quality;
     258          {
     259            var r = rcalc.ErrorState != OnlineCalculatorError.None ? 0d : rcalc.R;
     260            var actualQuality = r * r;
     261            quality = actualQuality;
     262            bool predictedRejected = false;
     263
     264            calculatedRows = 0;
     265            foreach (var q in qualityPerInterval) {
     266              if (double.IsNaN(q) || !(q > threshold)) {
     267                predictedRejected = true;
     268                quality = q;
     269                break;
     270              }
     271              ++calculatedRows;
     272            }
     273
     274            var actuallyRejected = !(actualQuality > parentQuality);
     275
     276            if (RejectedStats.Rows == 0 || TotalStats.Rows == 0) {
     277              RejectedStats = new IntMatrix(2, qualityPerInterval.Count);
     278              RejectedStats.RowNames = new[] { "Predicted", "Actual" };
     279              RejectedStats.ColumnNames = Enumerable.Range(1, RejectedStats.Columns).Select(x => string.Format("0-{0}", Math.Min(trainingEnd, x * interval)));
     280              TotalStats = new IntMatrix(2, 2);
     281              TotalStats.RowNames = new[] { "Predicted", "Actual" };
     282              TotalStats.ColumnNames = new[] { "Rejected", "Not Rejected" };
     283            }
     284            // gather some statistics
     285            if (predictedRejected) {
     286              RejectedStats[0, calculatedRows]++;
     287              TotalStats[0, 0]++;
     288            } else {
     289              TotalStats[0, 1]++;
     290            }
     291            if (actuallyRejected) {
     292              TotalStats[1, 0]++;
     293            } else {
     294              TotalStats[1, 1]++;
     295            }
     296            if (predictedRejected && actuallyRejected) {
     297              RejectedStats[1, calculatedRows]++;
     298            }
     299          }
     300          return quality;
     301        }
     302        #endregion
     303        else {
     304          while (estimatedEnumerator.MoveNext() & targetEnumerator.MoveNext()) {
     305            rcalc.Add(targetEnumerator.Current, estimatedEnumerator.Current);
     306            ++calculatedRows;
     307            if (calculatedRows % interval == 0 || calculatedRows == trainingPartitionSize) {
     308              var q = rcalc.ErrorState != OnlineCalculatorError.None ? double.NaN : rcalc.R;
     309              var quality = q * q;
     310              if (!(quality > threshold)) {
     311                AdjustedEvaluatedSolutions += (double)calculatedRows / problemData.TrainingPartition.Size;
     312                return quality;
     313              }
     314            }
     315          }
     316          var r = rcalc.ErrorState != OnlineCalculatorError.None ? double.NaN : rcalc.R;
    238317          var actualQuality = r * r;
    239 
    240           bool predictedRejected = false;
    241 
    242           i = 0;
    243           double quality = actualQuality;
    244           foreach (var q in qualityPerInterval) {
    245             if (double.IsNaN(q) || !(q > threshold)) {
    246               predictedRejected = true;
    247               quality = q;
    248               break;
    249             }
    250             ++i;
    251           }
    252 
    253           var actuallyRejected = !(actualQuality > parentQuality);
    254 
    255           if (RejectedStats.Rows == 0 || TotalStats.Rows == 0) {
    256             RejectedStats = new IntMatrix(2, qualityPerInterval.Count);
    257             RejectedStats.RowNames = new[] { "Predicted", "Actual" };
    258             RejectedStats.ColumnNames = Enumerable.Range(1, RejectedStats.Columns).Select(x => string.Format("0-{0}", Math.Min(trainingEnd, x * interval)));
    259             TotalStats = new IntMatrix(2, 2);
    260             TotalStats.RowNames = new[] { "Predicted", "Actual" };
    261             TotalStats.ColumnNames = new[] { "Rejected", "Not Rejected" };
    262           }
    263           // gather some statistics
    264           if (predictedRejected) {
    265             RejectedStats[0, i]++;
    266             TotalStats[0, 0]++;
    267           } else {
    268             TotalStats[0, 1]++;
    269           }
    270           if (actuallyRejected) {
    271             TotalStats[1, 0]++;
    272           } else {
    273             TotalStats[1, 1]++;
    274           }
    275           if (predictedRejected && actuallyRejected) {
    276             RejectedStats[1, i]++;
    277           }
    278           return quality;
    279         } else {
    280           while (targetValuesEnumerator.MoveNext() && e.MoveNext()) {
    281             pearsonRCalculator.Add(targetValuesEnumerator.Current, e.Current);
    282             ++i;
    283             if (i % interval == 0 || i == trainingPartitionSize) {
    284               var q = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
    285               var quality = q * q;
    286               if (!(quality > threshold))
    287                 return quality;
    288             }
    289           }
    290           var r = pearsonRCalculator.ErrorState != OnlineCalculatorError.None ? double.NaN : pearsonRCalculator.R;
    291           var actualQuality = r * r;
     318          AdjustedEvaluatedSolutions += 1d;
    292319          return actualQuality;
    293320        }
    294       #endregion
     321        #endregion
    295322      } else {
    296323        var lsc = new OnlineLinearScalingParameterCalculator();
    297324        var rcalc = new OnlinePearsonsRCalculator();
    298         var actualQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, SymbolicExpressionTreeParameter.ActualValue, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, true);
    299 
    300         var values = estimatedValues.Zip(targetValues, (es, t) => new { Estimated = es, Target = t });
    301         int calculatedRows = 0;
    302         double quality = 0.0;
    303 
    304         foreach (var value in values) {
    305           lsc.Add(value.Estimated, value.Target);
    306           rcalc.Add(value.Estimated, value.Target);
     325        var interval = (int)Math.Round(RelativeFitnessEvaluationIntervalSize * problemData.TrainingPartition.Size);
     326        var quality = 0d;
     327        var calculatedRows = 0;
     328
     329        var cache = PreserveResultCompatibility ? new List<double>(problemData.TrainingPartition.Size) : null;
     330        foreach (var target in estimatedValues.Zip(targetValues, (e, t) => new { EstimatedValue = e, ActualValue = t })) {
     331          if (cache != null)
     332            cache.Add(target.EstimatedValue);
     333
     334          lsc.Add(target.EstimatedValue, target.ActualValue);
     335          rcalc.Add(target.EstimatedValue, target.ActualValue);
     336
    307337          calculatedRows++;
    308338
    309           if (calculatedRows % 5 == 0) {
     339          if (calculatedRows % interval != 0) continue;
     340
     341          var alpha = lsc.Alpha;
     342          var beta = lsc.Beta;
     343          if (lsc.ErrorState != OnlineCalculatorError.None) {
     344            alpha = 0;
     345            beta = 1;
     346          }
     347
     348          var calc = (OnlinePearsonsRCalculator)rcalc.Clone();
     349          foreach (var t in targetValues.Skip(calculatedRows)) {
     350            var s = (t - alpha) / beta; // scaled target
     351            calc.Add(s, t); // add pair (scaled, target) to the calculator
     352          }
     353          var r = calc.ErrorState == OnlineCalculatorError.None ? calc.R : 0d;
     354          quality = r * r;
     355
     356          if (!(quality > parentQuality)) {
     357            AdjustedEvaluatedSolutions += (double)calculatedRows / problemData.TrainingPartition.Size;
     358            return quality;
     359          }
     360        }
     361        if (PreserveResultCompatibility) {
     362          // get quality for all the rows. to ensure reproducibility of results between this evaluator
     363          // and the standard one, we calculate the quality in an identical way (otherwise the returned
     364          // quality could be slightly off due to rounding errors (in the range 1e-15 to 1e-16)
     365          var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;
     366          double r;
     367          OnlineCalculatorError calculatorError;
     368
     369          if (applyLinearScaling) {
    310370            var alpha = lsc.Alpha;
    311371            var beta = lsc.Beta;
    312 
    313             OnlinePearsonsRCalculator calc = (OnlinePearsonsRCalculator)rcalc.Clone();
    314             foreach (var t in targetValues.Skip(calculatedRows)) {
    315               var scaledTarget = (t - alpha) / beta;
    316               calc.Add(scaledTarget, t);
    317             }
    318 
    319             var r = calc.ErrorState == OnlineCalculatorError.None ? calc.R : double.NaN;
    320             quality = r * r;
    321 
    322             if (quality < parentQuality && actualQuality > parentQuality) {
    323               Debugger.Break();
    324             }
    325             if (quality < parentQuality) return quality;
    326           }
     372            if (lsc.ErrorState != OnlineCalculatorError.None) {
     373              alpha = 0;
     374              beta = 1;
     375            }
     376            var boundedEstimatedValues = cache.Select(x => x * beta + alpha).LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
     377            r = OnlinePearsonsRCalculator.Calculate(boundedEstimatedValues, targetValues, out calculatorError);
     378          } else {
     379            var boundedEstimatedValues = cache.LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
     380            r = OnlinePearsonsRCalculator.Calculate(boundedEstimatedValues, targetValues, out calculatorError);
     381          }
     382          quality = calculatorError == OnlineCalculatorError.None ? r * r : 0d;
    327383        }
    328 
    329         //calculate quality for all rows
    330         {
    331           var r = rcalc.ErrorState == OnlineCalculatorError.None ? rcalc.R : double.NaN;
    332           quality = r * r;
    333           if (quality < parentQuality && actualQuality > parentQuality) {
    334             Debugger.Break();
    335           }
    336           if (double.IsNaN(quality)) quality = 0.0;
    337           if (quality != actualQuality) Debugger.Break();
    338 
    339           //necessary due to rounding errors and diff in the range of 10E-8
    340           quality = actualQuality;
    341         }
    342 
     384        AdjustedEvaluatedSolutions += 1d;
    343385        return quality;
    344386      }
Note: See TracChangeset for help on using the changeset viewer.