Free cookie consent management tool by TermsFeed Policy Generator

Changeset 3996


Ignore:
Timestamp:
07/05/10 17:14:22 (14 years ago)
Author:
gkronber
Message:

Improved efficiency of analyzers and evaluators for regression problems. #1074

Location:
trunk/sources
Files:
5 added
10 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Analyzers/RegressionSolutionAnalyzer.cs

    r3923 r3996  
    119119      var bestSolution = UpdateBestSolution();
    120120      if (prevBestSolutionQuality == null || prevBestSolutionQuality.Value > BestSolutionQualityParameter.ActualValue.Value) {
    121         UpdateBestSolutionResults(bestSolution);
     121        RegressionSolutionAnalyzer.UpdateBestSolutionResults(bestSolution, ProblemData, Results, GenerationsParameter.ActualValue);
    122122      }
    123123
    124124      return base.Apply();
    125125    }
    126     private void UpdateBestSolutionResults(DataAnalysisSolution bestSolution) {
     126
     127    public static void UpdateBestSolutionResults(DataAnalysisSolution bestSolution, DataAnalysisProblemData problemData, ResultCollection results, IntValue CurrentGeneration) {
    127128      var solution = bestSolution;
    128129      #region update R2,MSE, Rel Error
    129       double[] trainingValues = ProblemData.Dataset.GetVariableValues(
    130         ProblemData.TargetVariable.Value,
    131         ProblemData.TrainingSamplesStart.Value,
    132         ProblemData.TrainingSamplesEnd.Value);
    133       double[] testValues = ProblemData.Dataset.GetVariableValues(
    134         ProblemData.TargetVariable.Value,
    135         ProblemData.TestSamplesStart.Value,
    136         ProblemData.TestSamplesEnd.Value);
    137       double trainingR2 = SimpleRSquaredEvaluator.Calculate(trainingValues, solution.EstimatedTrainingValues);
    138       double testR2 = SimpleRSquaredEvaluator.Calculate(testValues, solution.EstimatedTestValues);
    139       double trainingMse = SimpleMSEEvaluator.Calculate(trainingValues, solution.EstimatedTrainingValues);
    140       double testMse = SimpleMSEEvaluator.Calculate(testValues, solution.EstimatedTestValues);
    141       double trainingRelError = SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(trainingValues, solution.EstimatedTrainingValues);
    142       double testRelError = SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(testValues, solution.EstimatedTestValues);
    143       if (Results.ContainsKey(BestSolutionResultName)) {
    144         Results[BestSolutionResultName].Value = solution;
    145         Results[BestSolutionTrainingRSquared].Value = new DoubleValue(trainingR2);
    146         Results[BestSolutionTestRSquared].Value = new DoubleValue(testR2);
    147         Results[BestSolutionTrainingMse].Value = new DoubleValue(trainingMse);
    148         Results[BestSolutionTestMse].Value = new DoubleValue(testMse);
    149         Results[BestSolutionTrainingRelativeError].Value = new DoubleValue(trainingRelError);
    150         Results[BestSolutionTestRelativeError].Value = new DoubleValue(testRelError);
    151         if (GenerationsParameter.ActualValue != null) // this check is needed because linear regression solutions do not have a generations parameter
    152           Results[BestSolutionGeneration].Value = new IntValue(GenerationsParameter.ActualValue.Value);
     130      IEnumerable<double> trainingValues = problemData.Dataset.GetEnumeratedVariableValues(
     131        problemData.TargetVariable.Value,
     132        problemData.TrainingSamplesStart.Value,
     133        problemData.TrainingSamplesEnd.Value);
     134      IEnumerable<double> testValues = problemData.Dataset.GetEnumeratedVariableValues(
     135        problemData.TargetVariable.Value,
     136        problemData.TestSamplesStart.Value,
     137        problemData.TestSamplesEnd.Value);
     138      OnlineMeanSquaredErrorEvaluator mseEvaluator = new OnlineMeanSquaredErrorEvaluator();
     139      OnlineMeanAbsolutePercentageErrorEvaluator relErrorEvaluator = new OnlineMeanAbsolutePercentageErrorEvaluator();
     140      OnlinePearsonsRSquaredEvaluator r2Evaluator = new OnlinePearsonsRSquaredEvaluator();
     141      #region training
     142      var originalEnumerator = trainingValues.GetEnumerator();
     143      var estimatedEnumerator = solution.EstimatedTrainingValues.GetEnumerator();
     144      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
     145        mseEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
     146        r2Evaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
     147        relErrorEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
     148      }
     149      double trainingR2 = r2Evaluator.RSquared;
     150      double trainingMse = mseEvaluator.MeanSquaredError;
     151      double trainingRelError = relErrorEvaluator.MeanAbsolutePercentageError;
     152      #endregion
     153      mseEvaluator.Reset();
     154      relErrorEvaluator.Reset();
     155      r2Evaluator.Reset();
     156      #region test
     157      originalEnumerator = testValues.GetEnumerator();
     158      estimatedEnumerator = solution.EstimatedTestValues.GetEnumerator();
     159      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
     160        mseEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
     161        r2Evaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
     162        relErrorEvaluator.Add(originalEnumerator.Current, estimatedEnumerator.Current);
     163      }
     164      double testR2 = r2Evaluator.RSquared;
     165      double testMse = mseEvaluator.MeanSquaredError;
     166      double testRelError = relErrorEvaluator.MeanAbsolutePercentageError;
     167      #endregion
     168      if (results.ContainsKey(BestSolutionResultName)) {
     169        results[BestSolutionResultName].Value = solution;
     170        results[BestSolutionTrainingRSquared].Value = new DoubleValue(trainingR2);
     171        results[BestSolutionTestRSquared].Value = new DoubleValue(testR2);
     172        results[BestSolutionTrainingMse].Value = new DoubleValue(trainingMse);
     173        results[BestSolutionTestMse].Value = new DoubleValue(testMse);
     174        results[BestSolutionTrainingRelativeError].Value = new DoubleValue(trainingRelError);
     175        results[BestSolutionTestRelativeError].Value = new DoubleValue(testRelError);
     176        if (CurrentGeneration != null) // this check is needed because linear regression solutions do not have a generations parameter
     177          results[BestSolutionGeneration].Value = new IntValue(CurrentGeneration.Value);
    153178      } else {
    154         Results.Add(new Result(BestSolutionResultName, solution));
    155         Results.Add(new Result(BestSolutionTrainingRSquared, new DoubleValue(trainingR2)));
    156         Results.Add(new Result(BestSolutionTestRSquared, new DoubleValue(testR2)));
    157         Results.Add(new Result(BestSolutionTrainingMse, new DoubleValue(trainingMse)));
    158         Results.Add(new Result(BestSolutionTestMse, new DoubleValue(testMse)));
    159         Results.Add(new Result(BestSolutionTrainingRelativeError, new DoubleValue(trainingRelError)));
    160         Results.Add(new Result(BestSolutionTestRelativeError, new DoubleValue(testRelError)));
    161         if (GenerationsParameter.ActualValue != null)
    162           Results.Add(new Result(BestSolutionGeneration, new IntValue(GenerationsParameter.ActualValue.Value)));
     179        results.Add(new Result(BestSolutionResultName, solution));
     180        results.Add(new Result(BestSolutionTrainingRSquared, new DoubleValue(trainingR2)));
     181        results.Add(new Result(BestSolutionTestRSquared, new DoubleValue(testR2)));
     182        results.Add(new Result(BestSolutionTrainingMse, new DoubleValue(trainingMse)));
     183        results.Add(new Result(BestSolutionTestMse, new DoubleValue(testMse)));
     184        results.Add(new Result(BestSolutionTrainingRelativeError, new DoubleValue(trainingRelError)));
     185        results.Add(new Result(BestSolutionTestRelativeError, new DoubleValue(testRelError)));
     186        if (CurrentGeneration != null)
     187          results.Add(new Result(BestSolutionGeneration, new IntValue(CurrentGeneration.Value)));
    163188      }
    164189      #endregion
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/BestSymbolicRegressionSolutionAnalyzer.cs

    r3925 r3996  
    9999          SymbolicExpressionTree[i]);
    100100        var solution = new SymbolicRegressionSolution(ProblemData, model, lowerEstimationLimit, upperEstimationLimit);
    101 
     101        solution.Name = BestSolutionParameterName;
     102        solution.Description = "Best solution on validation partition found over the whole run.";
    102103        BestSolutionParameter.ActualValue = solution;
    103104        BestSolutionQualityParameter.ActualValue = Quality[i];
    104 
    105         if (Results.ContainsKey(BestSolutionInputvariableCountResultName)) {
    106           Results[BestSolutionInputvariableCountResultName].Value = new IntValue(model.InputVariables.Count());
    107           Results[VariableImpactsResultName].Value = CalculateVariableImpacts();
    108         } else {
    109           Results.Add(new Result(BestSolutionInputvariableCountResultName, new IntValue(model.InputVariables.Count())));
    110           Results.Add(new Result(VariableImpactsResultName, CalculateVariableImpacts()));
    111         }
     105        BestSymbolicRegressionSolutionAnalyzer.UpdateSymbolicRegressionBestSolutionResults(solution, ProblemData, Results, VariableFrequencies);
    112106      }
    113107      return BestSolutionParameter.ActualValue;
    114108    }
    115109
    116     private DoubleMatrix CalculateVariableImpacts() {
    117       if (VariableFrequencies != null) {
    118         var impacts = new DoubleMatrix(VariableFrequencies.Rows.Count, 1, new string[] { "Impact" }, VariableFrequencies.Rows.Select(x => x.Name));
     110    public static void UpdateBestSolutionResults(SymbolicRegressionSolution bestSolution, DataAnalysisProblemData problemData, ResultCollection results, IntValue currentGeneration, DataTable variableFrequencies) {
     111      RegressionSolutionAnalyzer.UpdateBestSolutionResults(bestSolution, problemData, results, currentGeneration);
     112      UpdateSymbolicRegressionBestSolutionResults(bestSolution, problemData, results, variableFrequencies);
     113    }
     114
     115    private static void UpdateSymbolicRegressionBestSolutionResults(SymbolicRegressionSolution bestSolution, DataAnalysisProblemData problemData, ResultCollection results, DataTable variableFrequencies) {
     116      if (results.ContainsKey(BestSolutionInputvariableCountResultName)) {
     117        results[BestSolutionInputvariableCountResultName].Value = new IntValue(bestSolution.Model.InputVariables.Count());
     118        results[VariableImpactsResultName].Value = CalculateVariableImpacts(variableFrequencies);
     119      } else {
     120        results.Add(new Result(BestSolutionInputvariableCountResultName, new IntValue(bestSolution.Model.InputVariables.Count())));
     121        results.Add(new Result(VariableImpactsResultName, CalculateVariableImpacts(variableFrequencies)));
     122      }
     123    }
     124
     125
     126    private static DoubleMatrix CalculateVariableImpacts(DataTable variableFrequencies) {
     127      if (variableFrequencies != null) {
     128        var impacts = new DoubleMatrix(variableFrequencies.Rows.Count, 1, new string[] { "Impact" }, variableFrequencies.Rows.Select(x => x.Name));
    119129        impacts.SortableView = true;
    120130        int rowIndex = 0;
    121         foreach (var dataRow in VariableFrequencies.Rows) {
     131        foreach (var dataRow in variableFrequencies.Rows) {
    122132          string variableName = dataRow.Name;
    123133          double integral = 0;
     
    134144      } else return new DoubleMatrix(1, 1);
    135145    }
    136 
    137146  }
    138147}
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/Analyzers/SymbolicRegressionModelQualityAnalyzer.cs

    r3710 r3996  
    4545  [Item("SymbolicRegressionModelQualityAnalyzer", "An operator for analyzing the quality of symbolic regression solutions symbolic expression tree encoding.")]
    4646  [StorableClass]
    47   public sealed class SymbolicRegressionModelQualityAnalyzer : AlgorithmOperator, ISymbolicRegressionAnalyzer {
     47  public sealed class SymbolicRegressionModelQualityAnalyzer : SingleSuccessorOperator, ISymbolicRegressionAnalyzer {
    4848    private const string SymbolicExpressionTreeInterpreterParameterName = "SymbolicExpressionTreeInterpreter";
    4949    private const string SymbolicExpressionTreeParameterName = "SymbolicExpressionTree";
     
    130130    }
    131131    #endregion
    132 
    133     [Storable]
    134     private UniformSubScopesProcessor subScopesProcessor;
    135     [Storable]
    136     private MinAverageMaxValueAnalyzer minAvgMaxTrainingMseAnalyzer;
    137     [Storable]
    138     private MinAverageMaxValueAnalyzer minAvgMaxTestMseAnalyzer;
    139     [Storable]
    140     private MinAverageMaxValueAnalyzer minAvgMaxTrainingRSquaredAnalyzer;
    141     [Storable]
    142     private MinAverageMaxValueAnalyzer minAvgMaxTestRSquaredAnalyzer;
    143     [Storable]
    144     private MinAverageMaxValueAnalyzer minAvgMaxTrainingRelErrorAnalyzer;
    145     [Storable]
    146     private MinAverageMaxValueAnalyzer minAvgMaxTestRelErrorAnalyzer;
     132    #region properties
     133    public DoubleValue UpperEstimationLimit {
     134      get { return UpperEstimationLimitParameter.ActualValue; }
     135    }
     136    public DoubleValue LowerEstimationLimit {
     137      get { return LowerEstimationLimitParameter.ActualValue; }
     138    }
     139    #endregion
    147140
    148141    public SymbolicRegressionModelQualityAnalyzer()
     
    161154      Parameters.Add(new ValueLookupParameter<DataTable>(RelativeErrorValuesParameterName, "The data table to collect relative error values."));
    162155      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection where the best symbolic regression solution should be stored."));
    163 
    164       #region operator initialization
    165       subScopesProcessor = new UniformSubScopesProcessor();
    166       SymbolicRegressionModelQualityCalculator trainingQualityCalculator = new SymbolicRegressionModelQualityCalculator();
    167       SymbolicRegressionModelQualityCalculator testQualityCalculator = new SymbolicRegressionModelQualityCalculator();
    168       minAvgMaxTrainingMseAnalyzer = new MinAverageMaxValueAnalyzer();
    169       minAvgMaxTestMseAnalyzer = new MinAverageMaxValueAnalyzer();
    170 
    171       minAvgMaxTrainingRSquaredAnalyzer = new MinAverageMaxValueAnalyzer();
    172       minAvgMaxTestRSquaredAnalyzer = new MinAverageMaxValueAnalyzer();
    173 
    174       minAvgMaxTrainingRelErrorAnalyzer = new MinAverageMaxValueAnalyzer();
    175       minAvgMaxTestRelErrorAnalyzer = new MinAverageMaxValueAnalyzer();
    176       #endregion
    177 
    178       #region parameter wiring
    179       subScopesProcessor.Depth.Value = SymbolicExpressionTreeParameter.Depth;
    180       trainingQualityCalculator.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name;
    181       trainingQualityCalculator.ProblemDataParameter.ActualName = ProblemDataParameter.Name;
    182       trainingQualityCalculator.SamplesStartParameter.ActualName = TrainingSamplesStartParameter.Name;
    183       trainingQualityCalculator.SamplesEndParameter.ActualName = TrainingSamplesEndParameter.Name;
    184       trainingQualityCalculator.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
    185       trainingQualityCalculator.SymbolicExpressionTreeParameter.ActualName = SymbolicExpressionTreeParameter.Name;
    186       trainingQualityCalculator.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name;
    187       trainingQualityCalculator.AverageRelativeErrorQualityParameter.ActualName = TrainingAverageRelativeErrorQualityParameterName;
    188       trainingQualityCalculator.MeanSquaredErrorQualityParameter.ActualName = TrainingMeanSquaredErrorQualityParameterName;
    189       trainingQualityCalculator.RSquaredQualityParameter.ActualName = TrainingRSquaredQualityParameterName;
    190 
    191       testQualityCalculator.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name;
    192       testQualityCalculator.ProblemDataParameter.ActualName = ProblemDataParameter.Name;
    193       testQualityCalculator.SamplesStartParameter.ActualName = TestSamplesStartParameter.Name;
    194       testQualityCalculator.SamplesEndParameter.ActualName = TestSamplesEndParameter.Name;
    195       testQualityCalculator.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
    196       testQualityCalculator.SymbolicExpressionTreeParameter.ActualName = SymbolicExpressionTreeParameter.Name;
    197       testQualityCalculator.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name;
    198       testQualityCalculator.AverageRelativeErrorQualityParameter.ActualName = TestAverageRelativeErrorQualityParameterName;
    199       testQualityCalculator.MeanSquaredErrorQualityParameter.ActualName = TestMeanSquaredErrorQualityParameterName;
    200       testQualityCalculator.RSquaredQualityParameter.ActualName = TestRSquaredQualityParameterName;
    201       #region training/test MSE
    202       minAvgMaxTrainingMseAnalyzer.ValueParameter.ActualName = TrainingMeanSquaredErrorQualityParameterName;
    203       minAvgMaxTrainingMseAnalyzer.ValueParameter.Depth = SymbolicExpressionTreeParameter.Depth;
    204       minAvgMaxTrainingMseAnalyzer.AverageValueParameter.ActualName = AverageTrainingMeanSquaredErrorQualityParameterName;
    205       minAvgMaxTrainingMseAnalyzer.MaxValueParameter.ActualName = MaxTrainingMeanSquaredErrorQualityParameterName;
    206       minAvgMaxTrainingMseAnalyzer.MinValueParameter.ActualName = MinTrainingMeanSquaredErrorQualityParameterName;
    207       minAvgMaxTrainingMseAnalyzer.ValuesParameter.ActualName = MeanSquaredErrorValuesParameterName;
    208       minAvgMaxTrainingMseAnalyzer.ResultsParameter.ActualName = ResultsParameter.Name;
    209       minAvgMaxTrainingMseAnalyzer.CollectMinValueInResultsParameter.Value = new BoolValue(false);
    210       minAvgMaxTrainingMseAnalyzer.CollectAverageValueInResultsParameter.Value = new BoolValue(false);
    211       minAvgMaxTrainingMseAnalyzer.CollectMaxValueInResultsParameter.Value = new BoolValue(false);
    212 
    213       minAvgMaxTestMseAnalyzer.ValueParameter.ActualName = TestMeanSquaredErrorQualityParameterName;
    214       minAvgMaxTestMseAnalyzer.ValueParameter.Depth = SymbolicExpressionTreeParameter.Depth;
    215       minAvgMaxTestMseAnalyzer.AverageValueParameter.ActualName = AverageTestMeanSquaredErrorQualityParameterName;
    216       minAvgMaxTestMseAnalyzer.MaxValueParameter.ActualName = MaxTestMeanSquaredErrorQualityParameterName;
    217       minAvgMaxTestMseAnalyzer.MinValueParameter.ActualName = MinTestMeanSquaredErrorQualityParameterName;
    218       minAvgMaxTestMseAnalyzer.ValuesParameter.ActualName = MeanSquaredErrorValuesParameterName;
    219       minAvgMaxTestMseAnalyzer.ResultsParameter.ActualName = ResultsParameter.Name;
    220       minAvgMaxTestMseAnalyzer.CollectMinValueInResultsParameter.Value = new BoolValue(false);
    221       minAvgMaxTestMseAnalyzer.CollectAverageValueInResultsParameter.Value = new BoolValue(false);
    222       minAvgMaxTestMseAnalyzer.CollectMaxValueInResultsParameter.Value = new BoolValue(false);
    223 
    224       #endregion
    225       #region training/test R²
    226       minAvgMaxTrainingRSquaredAnalyzer.ValueParameter.ActualName = TrainingRSquaredQualityParameterName;
    227       minAvgMaxTrainingRSquaredAnalyzer.ValueParameter.Depth = SymbolicExpressionTreeParameter.Depth;
    228       minAvgMaxTrainingRSquaredAnalyzer.AverageValueParameter.ActualName = AverageTrainingRSquaredQualityParameterName;
    229       minAvgMaxTrainingRSquaredAnalyzer.MaxValueParameter.ActualName = MaxTrainingRSquaredQualityParameterName;
    230       minAvgMaxTrainingRSquaredAnalyzer.MinValueParameter.ActualName = MinTrainingRSquaredQualityParameterName;
    231       minAvgMaxTrainingRSquaredAnalyzer.ValuesParameter.ActualName = RSquaredValuesParameterName;
    232       minAvgMaxTrainingRSquaredAnalyzer.ResultsParameter.ActualName = ResultsParameter.Name;
    233       minAvgMaxTrainingRSquaredAnalyzer.CollectMinValueInResultsParameter.Value = new BoolValue(false);
    234       minAvgMaxTrainingRSquaredAnalyzer.CollectAverageValueInResultsParameter.Value = new BoolValue(false);
    235       minAvgMaxTrainingRSquaredAnalyzer.CollectMaxValueInResultsParameter.Value = new BoolValue(false);
    236 
    237 
    238       minAvgMaxTestRSquaredAnalyzer.ValueParameter.ActualName = TestRSquaredQualityParameterName;
    239       minAvgMaxTestRSquaredAnalyzer.ValueParameter.Depth = SymbolicExpressionTreeParameter.Depth;
    240       minAvgMaxTestRSquaredAnalyzer.AverageValueParameter.ActualName = AverageTestRSquaredQualityParameterName;
    241       minAvgMaxTestRSquaredAnalyzer.MaxValueParameter.ActualName = MaxTestRSquaredQualityParameterName;
    242       minAvgMaxTestRSquaredAnalyzer.MinValueParameter.ActualName = MinTestRSquaredQualityParameterName;
    243       minAvgMaxTestRSquaredAnalyzer.ValuesParameter.ActualName = RSquaredValuesParameterName;
    244       minAvgMaxTestRSquaredAnalyzer.ResultsParameter.ActualName = ResultsParameter.Name;
    245       minAvgMaxTestRSquaredAnalyzer.CollectMinValueInResultsParameter.Value = new BoolValue(false);
    246       minAvgMaxTestRSquaredAnalyzer.CollectAverageValueInResultsParameter.Value = new BoolValue(false);
    247       minAvgMaxTestRSquaredAnalyzer.CollectMaxValueInResultsParameter.Value = new BoolValue(false);
    248 
    249       #endregion
    250       #region training/test avg. rel. error
    251       minAvgMaxTrainingRelErrorAnalyzer.ValueParameter.ActualName = TrainingAverageRelativeErrorQualityParameterName;
    252       minAvgMaxTrainingRelErrorAnalyzer.ValueParameter.Depth = SymbolicExpressionTreeParameter.Depth;
    253       minAvgMaxTrainingRelErrorAnalyzer.AverageValueParameter.ActualName = AverageTrainingAverageRelativeErrorQualityParameterName;
    254       minAvgMaxTrainingRelErrorAnalyzer.MaxValueParameter.ActualName = MaxTrainingAverageRelativeErrorQualityParameterName;
    255       minAvgMaxTrainingRelErrorAnalyzer.MinValueParameter.ActualName = MinTrainingAverageRelativeErrorQualityParameterName;
    256       minAvgMaxTrainingRelErrorAnalyzer.ValuesParameter.ActualName = RelativeErrorValuesParameterName;
    257       minAvgMaxTrainingRelErrorAnalyzer.ResultsParameter.ActualName = ResultsParameter.Name;
    258       minAvgMaxTrainingRelErrorAnalyzer.CollectMinValueInResultsParameter.Value = new BoolValue(false);
    259       minAvgMaxTrainingRelErrorAnalyzer.CollectAverageValueInResultsParameter.Value = new BoolValue(false);
    260       minAvgMaxTrainingRelErrorAnalyzer.CollectMaxValueInResultsParameter.Value = new BoolValue(false);
    261 
    262       minAvgMaxTestRelErrorAnalyzer.ValueParameter.ActualName = TestAverageRelativeErrorQualityParameterName;
    263       minAvgMaxTestRelErrorAnalyzer.ValueParameter.Depth = SymbolicExpressionTreeParameter.Depth;
    264       minAvgMaxTestRelErrorAnalyzer.AverageValueParameter.ActualName = AverageTestAverageRelativeErrorQualityParameterName;
    265       minAvgMaxTestRelErrorAnalyzer.MaxValueParameter.ActualName = MaxTestAverageRelativeErrorQualityParameterName;
    266       minAvgMaxTestRelErrorAnalyzer.MinValueParameter.ActualName = MinTestAverageRelativeErrorQualityParameterName;
    267       minAvgMaxTestRelErrorAnalyzer.ValuesParameter.ActualName = RelativeErrorValuesParameterName;
    268       minAvgMaxTestRelErrorAnalyzer.ResultsParameter.ActualName = ResultsParameter.Name;
    269       minAvgMaxTestRelErrorAnalyzer.CollectMinValueInResultsParameter.Value = new BoolValue(false);
    270       minAvgMaxTestRelErrorAnalyzer.CollectAverageValueInResultsParameter.Value = new BoolValue(false);
    271       minAvgMaxTestRelErrorAnalyzer.CollectMaxValueInResultsParameter.Value = new BoolValue(false);
    272       #endregion
    273       #endregion
    274 
    275       #region operator graph
    276       OperatorGraph.InitialOperator = subScopesProcessor;
    277       subScopesProcessor.Operator = trainingQualityCalculator;
    278       trainingQualityCalculator.Successor = testQualityCalculator;
    279       testQualityCalculator.Successor = null;
    280       subScopesProcessor.Successor = minAvgMaxTrainingMseAnalyzer;
    281       minAvgMaxTrainingMseAnalyzer.Successor = minAvgMaxTestMseAnalyzer;
    282       minAvgMaxTestMseAnalyzer.Successor = minAvgMaxTrainingRSquaredAnalyzer;
    283       minAvgMaxTrainingRSquaredAnalyzer.Successor = minAvgMaxTestRSquaredAnalyzer;
    284       minAvgMaxTestRSquaredAnalyzer.Successor = minAvgMaxTrainingRelErrorAnalyzer;
    285       minAvgMaxTrainingRelErrorAnalyzer.Successor = minAvgMaxTestRelErrorAnalyzer;
    286       minAvgMaxTestRelErrorAnalyzer.Successor = null;
    287       #endregion
    288 
    289       Initialize();
    290156    }
    291157
     
    293159    private SymbolicRegressionModelQualityAnalyzer(bool deserializing) : base() { }
    294160
    295     [StorableHook(HookType.AfterDeserialization)]
    296     private void Initialize() {
    297       SymbolicExpressionTreeParameter.DepthChanged += new EventHandler(SymbolicExpressionTreeParameter_DepthChanged);
    298     }
    299 
    300     public override IDeepCloneable Clone(Cloner cloner) {
    301       SymbolicRegressionModelQualityAnalyzer clone = (SymbolicRegressionModelQualityAnalyzer)base.Clone(cloner);
    302       clone.Initialize();
    303       return clone;
    304     }
    305 
    306     private void SymbolicExpressionTreeParameter_DepthChanged(object sender, EventArgs e) {
    307       subScopesProcessor.Depth.Value = SymbolicExpressionTreeParameter.Depth;
    308       minAvgMaxTrainingMseAnalyzer.ValueParameter.Depth = SymbolicExpressionTreeParameter.Depth;
    309       minAvgMaxTrainingRelErrorAnalyzer.ValueParameter.Depth = SymbolicExpressionTreeParameter.Depth;
    310       minAvgMaxTrainingRSquaredAnalyzer.ValueParameter.Depth = SymbolicExpressionTreeParameter.Depth;
    311       minAvgMaxTestMseAnalyzer.ValueParameter.Depth = SymbolicExpressionTreeParameter.Depth;
    312       minAvgMaxTestRelErrorAnalyzer.ValueParameter.Depth = SymbolicExpressionTreeParameter.Depth;
    313       minAvgMaxTestRSquaredAnalyzer.ValueParameter.Depth = SymbolicExpressionTreeParameter.Depth;
     161    public override IOperation Apply() {
     162      Analyze(SymbolicExpressionTreeParameter.ActualValue, SymbolicExpressionTreeInterpreterParameter.ActualValue,
     163        UpperEstimationLimit.Value, LowerEstimationLimit.Value, ProblemDataParameter.ActualValue,
     164        TrainingSamplesStartParameter.ActualValue.Value, TrainingSamplesEndParameter.ActualValue.Value,
     165        TestSamplesStartParameter.ActualValue.Value, TestSamplesEndParameter.ActualValue.Value,
     166        ResultsParameter.ActualValue);
     167      return base.Apply();
     168    }
     169
     170    public static void Analyze(IEnumerable<SymbolicExpressionTree> trees, ISymbolicExpressionTreeInterpreter interpreter,
     171      double upperEstimationLimit, double lowerEstimationLimit,
     172      DataAnalysisProblemData problemData, int trainingStart, int trainingEnd, int testStart, int testEnd, ResultCollection results) {
     173      int targetVariableIndex = problemData.Dataset.GetVariableIndex(problemData.TargetVariable.Value);
     174      IEnumerable<double> originalTrainingValues = problemData.Dataset.GetEnumeratedVariableValues(targetVariableIndex, trainingStart, trainingEnd);
     175      IEnumerable<double> originalTestValues = problemData.Dataset.GetEnumeratedVariableValues(targetVariableIndex, testStart, testEnd);
     176      List<double> trainingMse = new List<double>();
     177      List<double> trainingR2 = new List<double>();
     178      List<double> trainingRelErr = new List<double>();
     179      List<double> testMse = new List<double>();
     180      List<double> testR2 = new List<double>();
     181      List<double> testRelErr = new List<double>();
     182
     183      OnlineMeanSquaredErrorEvaluator mseEvaluator = new OnlineMeanSquaredErrorEvaluator();
     184      OnlineMeanAbsolutePercentageErrorEvaluator relErrEvaluator = new OnlineMeanAbsolutePercentageErrorEvaluator();
     185      OnlinePearsonsRSquaredEvaluator r2Evaluator = new OnlinePearsonsRSquaredEvaluator();
     186
     187      foreach (var tree in trees) {
     188        #region training
     189        var estimatedTrainingValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, Enumerable.Range(trainingStart, trainingEnd - trainingStart));
     190        mseEvaluator.Reset();
     191        r2Evaluator.Reset();
     192        relErrEvaluator.Reset();
     193        var estimatedEnumerator = estimatedTrainingValues.GetEnumerator();
     194        var originalEnumerator = originalTrainingValues.GetEnumerator();
     195        while (estimatedEnumerator.MoveNext() & originalEnumerator.MoveNext()) {
     196          double estimated = estimatedEnumerator.Current;
     197          if (double.IsNaN(estimated)) estimated = upperEstimationLimit;
     198          else estimated = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, estimated));
     199          mseEvaluator.Add(originalEnumerator.Current, estimated);
     200          r2Evaluator.Add(originalEnumerator.Current, estimated);
     201          relErrEvaluator.Add(originalEnumerator.Current, estimated);
     202        }
     203        if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
     204          throw new InvalidOperationException("Number of elements in estimated and original enumeration doesn't match.");
     205        }
     206        trainingMse.Add(mseEvaluator.MeanSquaredError);
     207        trainingR2.Add(r2Evaluator.RSquared);
     208        trainingRelErr.Add(relErrEvaluator.MeanAbsolutePercentageError);
     209        #endregion
     210        #region test
     211        var estimatedTestValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, Enumerable.Range(testStart, testEnd - testStart));
     212
     213        mseEvaluator.Reset();
     214        r2Evaluator.Reset();
     215        relErrEvaluator.Reset();
     216        estimatedEnumerator = estimatedTestValues.GetEnumerator();
     217        originalEnumerator = originalTestValues.GetEnumerator();
     218        while (estimatedEnumerator.MoveNext() & originalEnumerator.MoveNext()) {
     219          double estimated = estimatedEnumerator.Current;
     220          if (double.IsNaN(estimated)) estimated = upperEstimationLimit;
     221          else estimated = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, estimated));
     222          mseEvaluator.Add(originalEnumerator.Current, estimated);
     223          r2Evaluator.Add(originalEnumerator.Current, estimated);
     224          relErrEvaluator.Add(originalEnumerator.Current, estimated);
     225        }
     226        if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
     227          throw new InvalidOperationException("Number of elements in estimated and original enumeration doesn't match.");
     228        }
     229        testMse.Add(mseEvaluator.MeanSquaredError);
     230        testR2.Add(r2Evaluator.RSquared);
     231        testRelErr.Add(relErrEvaluator.MeanAbsolutePercentageError);
     232        #endregion
     233      }
     234
     235      AddResultTableValues(results, MeanSquaredErrorValuesParameterName, "mean squared error (training)", trainingMse.Min(), trainingMse.Average(), trainingMse.Max());
     236      AddResultTableValues(results, MeanSquaredErrorValuesParameterName, "mean squared error (test)", testMse.Min(), testMse.Average(), testMse.Max());
     237      AddResultTableValues(results, RelativeErrorValuesParameterName, "mean relative error (training)", trainingRelErr.Min(), trainingRelErr.Average(), trainingRelErr.Max());
     238      AddResultTableValues(results, RelativeErrorValuesParameterName, "mean relative error (test)", testRelErr.Min(), testRelErr.Average(), testRelErr.Max());
     239      AddResultTableValues(results, RSquaredValuesParameterName, "Pearson's R² (training)", trainingR2.Min(), trainingR2.Average(), trainingR2.Max());
     240      AddResultTableValues(results, RSquaredValuesParameterName, "Pearson's R² (test)", testR2.Min(), testR2.Average(), testR2.Max());
     241    }
     242
     243    private static void AddResultTableValues(ResultCollection results, string tableName, string valueName, double minValue, double avgValue, double maxValue) {
     244      if (!results.ContainsKey(tableName)) {
     245        results.Add(new Result(tableName, new DataTable(tableName)));
     246      }
     247      DataTable table = (DataTable)results[tableName].Value;
     248      AddValue(table, minValue, "Min. " + valueName, string.Empty);
     249      AddValue(table, avgValue, "Avg. " + valueName, string.Empty);
     250      AddValue(table, maxValue, "Max. " + valueName, string.Empty);
     251    }
     252
     253    private static void AddValue(DataTable table, double data, string name, string description) {
     254      DataRow row;
     255      table.Rows.TryGetValue(name, out row);
     256      if (row == null) {
     257        row = new DataRow(name, description);
     258        row.Values.Add(data);
     259        table.Rows.Add(row);
     260      } else {
     261        row.Values.Add(data);
     262      }
     263    }
     264
     265
     266    private static void SetResultValue(ResultCollection results, string name, double value) {
     267      if (results.ContainsKey(name))
     268        results[name].Value = new DoubleValue(value);
     269      else
     270        results.Add(new Result(name, new DoubleValue(value)));
    314271    }
    315272  }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionMeanSquaredErrorEvaluator.cs

    r3995 r3996  
    7272
    7373    public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, int start, int end) {
    74       var estimatedValues = from x in interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start))
    75                             let boundedX = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, x))
    76                             select double.IsNaN(boundedX) ? upperEstimationLimit : boundedX;
    77       var originalValues = dataset.GetEnumeratedVariableValues(targetVariable, start, end);
    78       return SimpleMSEEvaluator.Calculate(originalValues, estimatedValues);
     74      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start));
     75      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, start, end);
     76      IEnumerator<double> originalEnumerator = originalValues.GetEnumerator();
     77      IEnumerator<double> estimatedEnumerator = estimatedValues.GetEnumerator();
     78      OnlineMeanSquaredErrorEvaluator mseEvaluator = new OnlineMeanSquaredErrorEvaluator();
     79
     80      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
     81        double estimated = estimatedEnumerator.Current;
     82        double original = originalEnumerator.Current;
     83        if (double.IsNaN(estimated))
     84          estimated = upperEstimationLimit;
     85        else
     86          estimated = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, estimated));
     87        mseEvaluator.Add(original, estimated);
     88      }
     89
     90      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
     91        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
     92      } else {
     93        return mseEvaluator.MeanSquaredError;
     94      }
    7995    }
    8096  }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionProblem.cs

    r3877 r3996  
    340340      operators = new List<IOperator>();
    341341      operators.AddRange(ApplicationManager.Manager.GetInstances<ISymbolicExpressionTreeOperator>().OfType<IOperator>());
    342       operators.Add(new ValidationBestScaledSymbolicRegressionSolutionAnalyzer());
     342      operators.Add(new SymbolicRegressionVariableFrequencyAnalyzer());
     343      operators.Add(new FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer());
    343344      operators.Add(new MinAverageMaxSymbolicExpressionTreeSizeAnalyzer());
    344       operators.Add(new SymbolicRegressionVariableFrequencyAnalyzer());
    345345      ParameterizeOperators();
    346346      ParameterizeAnalyzers();
     
    365365      foreach (var analyzer in Analyzers) {
    366366        analyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
    367         var bestValidationSolutionAnalyzer = analyzer as ValidationBestScaledSymbolicRegressionSolutionAnalyzer;
     367        var fixedBestValidationSolutionAnalyzer = analyzer as FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer;
     368        if (fixedBestValidationSolutionAnalyzer != null) {
     369          fixedBestValidationSolutionAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
     370          fixedBestValidationSolutionAnalyzer.UpperEstimationLimitParameter.ActualName = UpperEstimationLimitParameter.Name;
     371          fixedBestValidationSolutionAnalyzer.LowerEstimationLimitParameter.ActualName = LowerEstimationLimitParameter.Name;
     372          fixedBestValidationSolutionAnalyzer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
     373          fixedBestValidationSolutionAnalyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
     374          fixedBestValidationSolutionAnalyzer.ValidationSamplesStartParameter.Value = ValidationSamplesStart;
     375          fixedBestValidationSolutionAnalyzer.ValidationSamplesEndParameter.Value = ValidationSamplesEnd;
     376          fixedBestValidationSolutionAnalyzer.BestKnownQualityParameter.ActualName = BestKnownQualityParameter.Name;
     377          fixedBestValidationSolutionAnalyzer.QualityParameter.ActualName = Evaluator.QualityParameter.ActualName;
     378        }
     379        var bestValidationSolutionAnalyzer = analyzer as FixedValidationBestScaledSymbolicRegressionSolutionAnalyzer;
    368380        if (bestValidationSolutionAnalyzer != null) {
    369381          bestValidationSolutionAnalyzer.ProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
     
    372384          bestValidationSolutionAnalyzer.SymbolicExpressionTreeInterpreterParameter.ActualName = SymbolicExpressionTreeInterpreterParameter.Name;
    373385          bestValidationSolutionAnalyzer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
    374           bestValidationSolutionAnalyzer.TrainingSamplesStartParameter.Value = TrainingSamplesStart;
    375           bestValidationSolutionAnalyzer.TrainingSamplesEndParameter.Value = TrainingSamplesEnd;
    376386          bestValidationSolutionAnalyzer.ValidationSamplesStartParameter.Value = ValidationSamplesStart;
    377387          bestValidationSolutionAnalyzer.ValidationSamplesEndParameter.Value = ValidationSamplesEnd;
    378           bestValidationSolutionAnalyzer.TestSamplesStartParameter.Value = TestSamplesStart;
    379           bestValidationSolutionAnalyzer.TestSamplesEndParameter.Value = TestSamplesEnd;
    380388          bestValidationSolutionAnalyzer.BestKnownQualityParameter.ActualName = BestKnownQualityParameter.Name;
    381389          bestValidationSolutionAnalyzer.QualityParameter.ActualName = Evaluator.QualityParameter.ActualName;
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionScaledMeanSquaredErrorEvaluator.cs

    r3995 r3996  
    8383
    8484    public static double CalculateWithScaling(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, int start, int end, double beta, double alpha) {
    85       //IEnumerable<double> estimatedValues = from x in interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start))
    86       //                                      let boundedX = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, x * beta + alpha))
    87       //                                      select double.IsNaN(boundedX) ? upperEstimationLimit : boundedX;
    8885      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start));
    8986      IEnumerable<double> originalValues = dataset.GetEnumeratedVariableValues(targetVariable, start, end);
    9087      IEnumerator<double> originalEnumerator = originalValues.GetEnumerator();
    9188      IEnumerator<double> estimatedEnumerator = estimatedValues.GetEnumerator();
    92       double cnt = 0;
    93       double sse = 0;
     89      OnlineMeanSquaredErrorEvaluator mseEvaluator = new OnlineMeanSquaredErrorEvaluator();
    9490
    9591      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
    9692        double estimated = estimatedEnumerator.Current * beta + alpha;
    9793        double original = originalEnumerator.Current;
    98         estimated = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, estimated));
    9994        if (double.IsNaN(estimated))
    10095          estimated = upperEstimationLimit;
    101         if (!double.IsNaN(estimated) && !double.IsInfinity(estimated) &&
    102             !double.IsNaN(original) && !double.IsInfinity(original)) {
    103           double error = estimated - original;
    104           sse += error * error;
    105           cnt++;
    106         }
     96        else
     97          estimated = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, estimated));
     98        mseEvaluator.Add(original, estimated);
    10799      }
    108100
    109101      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
    110102        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
    111       } else if (cnt == 0) {
    112         throw new ArgumentException("Mean squared errors is not defined for input vectors of NaN or Inf");
    113103      } else {
    114         double mse = sse / cnt;
    115         return mse;
     104        return mseEvaluator.MeanSquaredError;
    116105      }
    117106    }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Evaluators/SimpleMSEEvaluator.cs

    r3462 r3996  
    4545
    4646    public static double Calculate(IEnumerable<double> original, IEnumerable<double> estimated) {
    47       double sse = 0.0;
    48       int cnt = 0;
     47      var onlineMseEvaluator = new OnlineMeanSquaredErrorEvaluator();
    4948      var originalEnumerator = original.GetEnumerator();
    5049      var estimatedEnumerator = estimated.GetEnumerator();
     
    5251        double e = estimatedEnumerator.Current;
    5352        double o = originalEnumerator.Current;
    54         if (!double.IsNaN(e) && !double.IsInfinity(e) &&
    55             !double.IsNaN(o) && !double.IsInfinity(o)) {
    56           double error = e - o;
    57           sse += error * error;
    58           cnt++;
    59         }
     53        onlineMseEvaluator.Add(o, e);
    6054      }
    6155      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
    6256        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
    63       } else if (cnt == 0) {
    64         throw new ArgumentException("Mean squared errors is not defined for input vectors of NaN or Inf");
    6557      } else {
    66         double mse = sse / cnt;
    67         return mse;
     58        return onlineMseEvaluator.MeanSquaredError;
    6859      }
    6960    }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Evaluators/SimpleMeanAbsolutePercentageErrorEvaluator.cs

    r3980 r3996  
    4848
    4949    public static double Calculate(IEnumerable<double> original, IEnumerable<double> estimated) {
    50       double sre = 0;
    51       int cnt = 0;
     50      OnlineMeanAbsolutePercentageErrorEvaluator onlineEvaluator = new OnlineMeanAbsolutePercentageErrorEvaluator();
    5251      var originalEnumerator = original.GetEnumerator();
    5352      var estimatedEnumerator = estimated.GetEnumerator();
     
    5554        double e = estimatedEnumerator.Current;
    5655        double o = originalEnumerator.Current;
    57         if (!double.IsNaN(e) && !double.IsInfinity(e) &&
    58             !double.IsNaN(o) && !double.IsInfinity(o) && !o.IsAlmost(0.0)) {
    59           sre += Math.Abs((e - o) / o);
    60           cnt++;
    61         }
     56        onlineEvaluator.Add(o, e);
    6257      }
    6358      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
    6459        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
    65       } else if (cnt == 0) {
    66         throw new ArgumentException("Average relative error is not defined for input vectors of NaN or Inf");
    6760      } else {
    68         return sre / cnt;
     61        return onlineEvaluator.MeanAbsolutePercentageError;
    6962      }
    7063    }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Evaluators/SimpleRSquaredEvaluator.cs

    r3452 r3996  
    4949
    5050    public static double Calculate(IEnumerable<double> original, IEnumerable<double> estimated) {
     51      var onlinePearsonRSquaredEvaluator = new OnlinePearsonsRSquaredEvaluator();
    5152      var originalEnumerator = original.GetEnumerator();
    5253      var estimatedEnumerator = estimated.GetEnumerator();
    53       originalEnumerator.MoveNext();
    54       estimatedEnumerator.MoveNext();
    55       double e = estimatedEnumerator.Current;
    56       double o = originalEnumerator.Current;
    5754
    58       // stable and iterative calculation of R² in one pass over original and estimated
    59       double sum_sq_x = 0.0;
    60       double sum_sq_y = 0.0;
    61       double sum_coproduct = 0.0;
    62       if (IsInvalidValue(o) || IsInvalidValue(e)) {
    63         throw new ArgumentException("R² is not defined for variables with NaN or infinity values.");
    64       }
    65       double mean_x = o;
    66       double mean_y = e;
    67       int n = 1;
    6855      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
    69         e = estimatedEnumerator.Current;
    70         o = originalEnumerator.Current;
    71         double sweep = (n - 1.0) / n;
    72         if (IsInvalidValue(o) || IsInvalidValue(e)) {
    73           throw new ArgumentException("Correlation coefficient is not defined for variables with NaN or infinity values.");
    74         }
    75         double delta_x = o - mean_x;
    76         double delta_y = e - mean_y;
    77         sum_sq_x += delta_x * delta_x * sweep;
    78         sum_sq_y += delta_y * delta_y * sweep;
    79         sum_coproduct += delta_x * delta_y * sweep;
    80         mean_x += delta_x / n;
    81         mean_y += delta_y / n;
    82         n++;
     56        double e = estimatedEnumerator.Current;
     57        double o = originalEnumerator.Current;
     58        onlinePearsonRSquaredEvaluator.Add(o, e);
    8359      }
    8460      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
    8561        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
    8662      } else {
    87         double pop_sd_x = Math.Sqrt(sum_sq_x / n);
    88         double pop_sd_y = Math.Sqrt(sum_sq_y / n);
    89         double cov_x_y = sum_coproduct / n;
    90 
    91         if (pop_sd_x.IsAlmost(0.0) || pop_sd_y.IsAlmost(0.0))
    92           return 0.0;
    93         else {
    94           double r = cov_x_y / (pop_sd_x * pop_sd_y);
    95           return r * r;
    96         }
     63        return onlinePearsonRSquaredEvaluator.RSquared;
    9764      }
    9865    }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Symbolic/SimpleArithmeticExpressionInterpreter.cs

    r3841 r3996  
    147147      switch (currentInstr.opCode) {
    148148        case OpCodes.Add: {
    149             double s = 0.0;
    150             for (int i = 0; i < currentInstr.nArguments; i++) {
     149            double s = Evaluate();
     150            for (int i = 1; i < currentInstr.nArguments; i++) {
    151151              s += Evaluate();
    152152            }
Note: See TracChangeset for help on using the changeset viewer.