#region License Information /* HeuristicLab * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using System.Drawing; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Optimization; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.PluginInfrastructure; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Operators; using HeuristicLab.Problems.DataAnalysis.Evaluators; using HeuristicLab.Problems.DataAnalysis.Symbolic; namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic { [Item("SymbolicRegressionScaledMeanSquaredErrorEvaluator", "Calculates the mean squared error of a linearly scaled symbolic regression solution.")] [StorableClass] public class SymbolicRegressionScaledMeanSquaredErrorEvaluator : SymbolicRegressionMeanSquaredErrorEvaluator { #region parameter properties public ILookupParameter AlphaParameter { get { return (ILookupParameter)Parameters["Alpha"]; } } public ILookupParameter BetaParameter { get { return (ILookupParameter)Parameters["Beta"]; } } #endregion #region properties public DoubleValue Alpha { get { return AlphaParameter.ActualValue; } set { AlphaParameter.ActualValue = value; } } public DoubleValue Beta { get { return BetaParameter.ActualValue; } set { BetaParameter.ActualValue = value; } } #endregion public SymbolicRegressionScaledMeanSquaredErrorEvaluator() : base() { Parameters.Add(new LookupParameter("Alpha", "Alpha parameter for linear scaling of the estimated values.")); Parameters.Add(new LookupParameter("Beta", "Beta parameter for linear scaling of the estimated values.")); } protected override double Evaluate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, Dataset dataset, StringValue targetVariable, IntValue samplesStart, IntValue samplesEnd) { double alpha, beta; double mse = Calculate(interpreter, solution, LowerEstimationLimit.Value, UpperEstimationLimit.Value, dataset, targetVariable.Value, samplesStart.Value, samplesEnd.Value, out beta, out alpha); AlphaParameter.ActualValue = new DoubleValue(alpha); BetaParameter.ActualValue = new DoubleValue(beta); return mse; } public static double Calculate(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, int start, int end, out double beta, out double alpha) { var estimatedValues = CalculateScaledEstimatedValues(interpreter, solution, dataset, targetVariable, start, end, out beta, out alpha); estimatedValues = from x in estimatedValues let boundedX = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, x)) select double.IsNaN(boundedX) ? upperEstimationLimit : boundedX; var originalValues = dataset.GetVariableValues(targetVariable, start, end); return SimpleMSEEvaluator.Calculate(originalValues, estimatedValues); } public static double CalculateWithScaling(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, Dataset dataset, string targetVariable, int start, int end, double beta, double alpha) { var estimatedValues = from x in interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start)) let boundedX = Math.Min(upperEstimationLimit, Math.Max(lowerEstimationLimit, x * beta + alpha)) select double.IsNaN(boundedX) ? upperEstimationLimit : boundedX; var originalValues = dataset.GetVariableValues(targetVariable, start, end); return SimpleMSEEvaluator.Calculate(originalValues, estimatedValues); } private static IEnumerable CalculateScaledEstimatedValues(ISymbolicExpressionTreeInterpreter interpreter, SymbolicExpressionTree solution, Dataset dataset, string targetVariable, int start, int end, out double beta, out double alpha) { int targetVariableIndex = dataset.GetVariableIndex(targetVariable); var estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, dataset, Enumerable.Range(start, end - start)).ToArray(); var originalValues = dataset.GetVariableValues(targetVariable, start, end); CalculateScalingParameters(originalValues, estimatedValues, out beta, out alpha); for (int i = 0; i < estimatedValues.Length; i++) estimatedValues[i] = estimatedValues[i] * beta + alpha; return estimatedValues; } public static void CalculateScalingParameters(IEnumerable original, IEnumerable estimated, out double beta, out double alpha) { double[] originalValues = original.ToArray(); double[] estimatedValues = estimated.ToArray(); if (originalValues.Length != estimatedValues.Length) throw new ArgumentException(); var filteredResult = (from row in Enumerable.Range(0, originalValues.Length) let t = originalValues[row] let e = estimatedValues[row] where IsValidValue(t) where IsValidValue(e) select new { Estimation = e, Target = t }) .OrderBy(x => Math.Abs(x.Target)) // make sure small values are considered before large values .ToArray(); // calculate alpha and beta on the subset of rows with valid values originalValues = filteredResult.Select(x => x.Target).ToArray(); estimatedValues = filteredResult.Select(x => x.Estimation).ToArray(); int n = originalValues.Length; if (n > 2) { double tMean = originalValues.Average(); double xMean = estimatedValues.Average(); double sumXT = 0; double sumXX = 0; for (int i = 0; i < n; i++) { // calculate alpha and beta on the subset of rows with valid values double x = estimatedValues[i]; double t = originalValues[i]; sumXT += (x - xMean) * (t - tMean); sumXX += (x - xMean) * (x - xMean); } if (!sumXX.IsAlmost(0.0)) { beta = sumXT / sumXX; } else { beta = 1; } alpha = tMean - beta * xMean; } else { alpha = 0.0; beta = 1.0; } } private static bool IsValidValue(double d) { return !double.IsInfinity(d) && !double.IsNaN(d) && d > -1.0E07 && d < 1.0E07; // don't consider very large or very small values for scaling } } }