#region License Information /* HeuristicLab * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using System.Text; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Operators; using HeuristicLab.DataAnalysis; using HeuristicLab.Random; namespace HeuristicLab.GP.StructureIdentification { public class UncertainMeanSquaredErrorEvaluator : MeanSquaredErrorEvaluator { public override string Description { get { return @"Evaluates 'FunctionTree' for all samples of the dataset and calculates the mean-squared-error for the estimated values vs. the real values of 'TargetVariable'. This operator stops the computation as soon as an upper limit for the mean-squared-error is reached."; } } public UncertainMeanSquaredErrorEvaluator() : base() { AddVariableInfo(new VariableInfo("Random", "", typeof(MersenneTwister), VariableKind.In)); AddVariableInfo(new VariableInfo("MinEvaluatedSamples", "", typeof(IntData), VariableKind.In)); AddVariableInfo(new VariableInfo("QualityLimit", "The upper limit of the MSE which is used as early stopping criterion.", typeof(DoubleData), VariableKind.In)); AddVariableInfo(new VariableInfo("ConfidenceBounds", "Confidence bounds of the calculated MSE", typeof(DoubleData), VariableKind.New | VariableKind.Out)); AddVariableInfo(new VariableInfo("ActuallyEvaluatedSamples", "", typeof(IntData), VariableKind.New | VariableKind.Out)); } // evaluates the function-tree for the given target-variable and the whole dataset and returns the MSE public override void Evaluate(IScope scope, BakedTreeEvaluator evaluator, HeuristicLab.DataAnalysis.Dataset dataset, int targetVariable, int start, int end, bool updateTargetValues) { double qualityLimit = GetVariableValue("QualityLimit", scope, false).Data; int minSamples = GetVariableValue("MinEvaluatedSamples", scope, true).Data; MersenneTwister mt = GetVariableValue("Random", scope, true); DoubleData mse = GetVariableValue("MSE", scope, false, false); if (mse == null) { mse = new DoubleData(); scope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName("MSE"), mse)); } DoubleData confidenceBounds = GetVariableValue("ConfidenceBounds", scope, false, false); if (confidenceBounds == null) { confidenceBounds = new DoubleData(); scope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName("ConfidenceBounds"), confidenceBounds)); } IntData evaluatedSamples = GetVariableValue("ActuallyEvaluatedSamples", scope, false, false); if (evaluatedSamples == null) { evaluatedSamples = new IntData(); scope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName("ActuallyEvaluatedSamples"), evaluatedSamples)); } int rows = end - start; double mean = 0; double stdDev = 0; double confidenceInterval = 0; double m2 = 0; int[] indexes = InitIndexes(mt, start, end); int n = 0; for (int sample = 0; sample < rows; sample++) { double estimated = evaluator.Evaluate(indexes[sample]); double original = dataset.GetValue(indexes[sample], targetVariable); if (!double.IsNaN(original) && !double.IsInfinity(original)) { n++; double error = estimated - original; double squaredError = error * error; double delta = squaredError - mean; mean = mean + delta / n; m2 = m2 + delta * (squaredError - mean); if (n > minSamples && n % minSamples == 0) { stdDev = Math.Sqrt(Math.Sqrt(m2 / (n - 1))); confidenceInterval = 2.364 * stdDev / Math.Sqrt(n); if (qualityLimit < mean - confidenceInterval || qualityLimit > mean + confidenceInterval) { break; } } } } evaluatedSamples.Data = n; mse.Data = mean; stdDev = Math.Sqrt(Math.Sqrt(m2 / (n - 1))); confidenceBounds.Data = 2.364 * stdDev / Math.Sqrt(n); } private int[] InitIndexes(MersenneTwister mt, int start, int end) { int n = end - start; int[] indexes = new int[n]; for (int i = 0; i < n; i++) indexes[i] = i + start; for (int i = 0; i < n - 1; i++) { int j = mt.Next(i, n); int tmp = indexes[j]; indexes[j] = indexes[i]; indexes[i] = tmp; } return indexes; } } }