Changeset 14302


Ignore:
Timestamp:
09/23/16 09:47:25 (5 years ago)
Author:
mkommend
Message:

#2635: Rewrote parts of the OSGA evaluator.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.OSGAEvaluator/HeuristicLab.OSGAEvaluator/SymbolicRegressionSingleObjectiveOSGAEvaluator.cs

    r14301 r14302  
    2222using System;
    2323using System.Collections.Generic;
     24using System.Diagnostics;
    2425using System.Linq;
    2526using HeuristicLab.Common;
     
    195196
    196197    private double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, DoubleLimit estimationLimits, IRegressionProblemData problemData, IEnumerable<int> rows) {
    197       var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows).LimitToRange(estimationLimits.Lower, estimationLimits.Upper);
    198       var targetValues = problemData.Dataset.GetReadOnlyDoubleValues(problemData.TargetVariable);
     198      var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
     199      var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows).ToList();
    199200
    200201      var parentQualities = ParentQualitiesParameter.ActualValue.Select(x => x.Value);
     
    204205      var parentQuality = minQuality + (maxQuality - minQuality) * comparisonFactor;
    205206
    206       var e = estimatedValues.GetEnumerator();
     207
    207208
    208209      #region fixed intervals
    209210      if (UseFixedEvaluationIntervals) {
     211        var e = estimatedValues.GetEnumerator();
    210212        double threshold = parentQuality * RelativeParentChildQualityThreshold;
    211213        if (UseAdaptiveQualityThreshold) {
     
    290292          return actualQuality;
    291293        }
    292         #endregion
     294      #endregion
    293295      } else {
    294         var trainingPartitionSize = problemData.TrainingPartition.Size;
    295         var interval = (int)Math.Floor(trainingPartitionSize * RelativeFitnessEvaluationIntervalSize);
    296         double quality = double.NaN;
    297         var estimated = new List<double>(); // save estimated values in a list so we don't re-evaluate
    298         // use the actual estimated values for the first i * interval rows of the training partition and and assume the remaining rows are perfectly correlated
    299         // if the quality of the individual still falls below the parent quality, then we can reject it sooner, otherwise as i increases the whole estimated series will be used
    300296        var lsc = new OnlineLinearScalingParameterCalculator();
    301297        var rcalc = new OnlinePearsonsRCalculator();
    302         var actualQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, SymbolicExpressionTreeParameter.ActualValue, estimationLimits.Lower, estimationLimits.Upper, problemData, problemData.TrainingIndices, false);
    303         for (int i = 0; i < trainingPartitionSize; i += interval) {
    304           var start = problemData.TrainingPartition.Start;
    305           int end = Math.Min(trainingPartitionSize, i + interval);
    306           // cache estimated values
    307           // scale target values to the range of the estimated values
    308           for (int j = i; j < end && e.MoveNext(); ++j) {
    309             estimated.Add(e.Current);
    310             var index = j + start;
    311             // in the context of the linear scaling calculator, the target value becomes the "original"
    312             // while the estimated value becomes the "target" (because we want to scale the target in the range of the estimated)
    313             lsc.Add(targetValues[index], e.Current);
    314           }
    315           var a = lsc.Alpha; // additive scaling term
    316           var b = lsc.Beta;  // multiplicative scaling factor
    317           // calculate the quality
    318           for (int j = i; j < end; ++j) {
    319             var index = j + start;
    320             rcalc.Add(estimated[j], targetValues[index]);
    321           }
    322           var rcalc2 = (OnlinePearsonsRCalculator)rcalc.Clone();
    323           for (int j = end; j < trainingPartitionSize; ++j) {
    324             var index = j + start;
    325             var v = targetValues[index] * b + a;
    326             rcalc2.Add(v, targetValues[index]);
    327           }
    328           var r = rcalc2.ErrorState == OnlineCalculatorError.None ? rcalc2.R : double.NaN;
     298        var actualQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, SymbolicExpressionTreeParameter.ActualValue, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, true);
     299
     300        var values = estimatedValues.Zip(targetValues, (es, t) => new { Estimated = es, Target = t });
     301        int calculatedRows = 0;
     302        double quality = 0.0;
     303
     304        foreach (var value in values) {
     305          lsc.Add(value.Estimated, value.Target);
     306          rcalc.Add(value.Estimated, value.Target);
     307          calculatedRows++;
     308
     309          if (calculatedRows % 5 == 0) {
     310            var alpha = lsc.Alpha;
     311            var beta = lsc.Beta;
     312
     313            OnlinePearsonsRCalculator calc = (OnlinePearsonsRCalculator)rcalc.Clone();
     314            foreach (var t in targetValues.Skip(calculatedRows)) {
     315              var scaledTarget = (t - alpha) / beta;
     316              calc.Add(scaledTarget, t);
     317            }
     318
     319            var r = calc.ErrorState == OnlineCalculatorError.None ? calc.R : double.NaN;
     320            quality = r * r;
     321
     322            if (quality < parentQuality && actualQuality > parentQuality) {
     323              Debugger.Break();
     324            }
     325            if (quality < parentQuality) return quality;
     326          }
     327        }
     328
     329        //calculate quality for all rows
     330        {
     331          var r = rcalc.ErrorState == OnlineCalculatorError.None ? rcalc.R : double.NaN;
    329332          quality = r * r;
    330           bool falseReject = false;
    331           if (!(quality > parentQuality)) {
    332             if (actualQuality > parentQuality)
    333               falseReject = true;
    334           }
    335           //          if (!(quality > parentQuality))
    336           //            break;
     333          if (quality < parentQuality && actualQuality > parentQuality) {
     334            Debugger.Break();
     335          }
     336          if (double.IsNaN(quality)) quality = 0.0;
     337          if (quality != actualQuality) Debugger.Break();
     338
     339          //necessary due to rounding errors and diff in the range of 10E-8
     340          quality = actualQuality;
    337341        }
    338342
Note: See TracChangeset for help on using the changeset viewer.