Changeset 3452


Ignore:
Timestamp:
04/20/10 20:31:23 (11 years ago)
Author:
gkronber
Message:

Included tracking of best of run solution (based on validation set) and calculation of MSE, R² and rel. Error on training and test sets. #938 (Data types and operators for regression problems)

Location:
trunk/sources
Files:
8 edited
6 copied

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/BestValidationSymbolicRegressionSolutionVisualizer.cs

    r3442 r3452  
    2929using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3030using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
     31using HeuristicLab.Problems.DataAnalysis.Evaluators;
     32using HeuristicLab.Problems.DataAnalysis.Symbolic;
    3133
    3234namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
     
    3739  [StorableClass]
    3840  public sealed class BestValidationSymbolicRegressionSolutionVisualizer : SingleSuccessorOperator, ISingleObjectiveSolutionsVisualizer, ISolutionsVisualizer {
     41    private const string EvaluatorParameterName = "Evaluator";
    3942    private const string SymbolicRegressionModelParameterName = "SymbolicRegressionModel";
    4043    private const string DataAnalysisProblemDataParameterName = "DataAnalysisProblemData";
    4144    private const string BestValidationSolutionParameterName = "BestValidationSolution";
     45    private const string ValidationSamplesStartParameterName = "ValidationSamplesStart";
     46    private const string ValidationSamplesEndParameterName = "ValidationSamplesEnd";
    4247    private const string QualityParameterName = "Quality";
     48    private const string ResultsParameterName = "Results";
     49
     50    #region parameter properties
     51    public ILookupParameter<ISymbolicRegressionEvaluator> EvaluatorParameter {
     52      get { return (ILookupParameter<ISymbolicRegressionEvaluator>)Parameters[EvaluatorParameterName]; }
     53    }
     54    public IValueLookupParameter<IntValue> ValidationSamplesStartParameter {
     55      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesStartParameterName]; }
     56    }
     57    public IValueLookupParameter<IntValue> ValidationSamplesEndParameter {
     58      get { return (IValueLookupParameter<IntValue>)Parameters[ValidationSamplesEndParameterName]; }
     59    }
     60
    4361    public ILookupParameter<ItemArray<SymbolicExpressionTree>> SymbolicExpressionTreeParameter {
    4462      get { return (ILookupParameter<ItemArray<SymbolicExpressionTree>>)Parameters[SymbolicRegressionModelParameterName]; }
     
    5876    }
    5977
     78    public ILookupParameter<ResultCollection> ResultParameter {
     79      get { return (ILookupParameter<ResultCollection>)Parameters[ResultsParameterName]; }
     80    }
     81    #endregion
     82
     83    #region properties
     84    public ISymbolicRegressionEvaluator Evaluator {
     85      get { return EvaluatorParameter.ActualValue; }
     86    }
     87    public IntValue ValidationSamplesStart {
     88      get { return ValidationSamplesStartParameter.ActualValue; }
     89    }
     90    public IntValue ValidationSamplesEnd {
     91      get { return ValidationSamplesEndParameter.ActualValue; }
     92    }
     93    #endregion
     94
    6095    public BestValidationSymbolicRegressionSolutionVisualizer()
    6196      : base() {
     
    6398      Parameters.Add(new SubScopesLookupParameter<DoubleValue>(QualityParameterName, "The quality of the symbolic regression solutions."));
    6499      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(DataAnalysisProblemDataParameterName, "The symbolic regression problme data on which the best solution should be evaluated."));
     100      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesStartParameterName, "The start index of the validation partition (part of the training partition)."));
     101      Parameters.Add(new ValueLookupParameter<IntValue>(ValidationSamplesEndParameterName, "The end index of the validation partition (part of the training partition)."));
    65102      Parameters.Add(new LookupParameter<SymbolicRegressionSolution>(BestValidationSolutionParameterName, "The best symbolic expression tree based on the validation data for the symbolic regression problem."));
    66       Parameters.Add(new LookupParameter<ResultCollection>("Results"));
     103      Parameters.Add(new LookupParameter<ResultCollection>(ResultsParameterName, "The result collection of the algorithm."));
    67104    }
    68105
     
    70107      ItemArray<SymbolicExpressionTree> expressions = SymbolicExpressionTreeParameter.ActualValue;
    71108      DataAnalysisProblemData problemData = DataAnalysisProblemDataParameter.ActualValue;
    72       ItemArray<DoubleValue> qualities = QualityParameter.ActualValue;
    73109
    74       var bestExpressionIndex = (from index in Enumerable.Range(0, qualities.Count())
    75                                  select new { Index = index, Quality = qualities[index] }).OrderBy(x => x.Quality).Select(x => x.Index).First();
     110      int validationSamplesStart = ValidationSamplesStart.Value;
     111      int validationSamplesEnd = ValidationSamplesEnd.Value;
     112      var validationValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, validationSamplesStart, validationSamplesEnd);
    76113
    77       var bestExpression = expressions[bestExpressionIndex];
    78       SymbolicRegressionSolution bestSolution = BestValidationSolutionParameter.ActualValue;
    79       if (bestSolution == null) BestValidationSolutionParameter.ActualValue = CreateDataAnalysisSolution(problemData, bestExpression);
    80       else {
    81         bestSolution.Model = CreateModel(problemData, bestExpression);
     114      var currentBestExpression = (from expression in expressions
     115                                   let validationQuality = SymbolicRegressionMeanSquaredErrorEvaluator.Calculate(expression, problemData.Dataset, problemData.TargetVariable.Value, validationSamplesStart, validationSamplesEnd)
     116                                   select new { Expression = expression, ValidationQuality = validationQuality })
     117                                   .OrderBy(x => x.ValidationQuality)
     118                                   .First();
     119
     120      SymbolicRegressionSolution bestOfRunSolution = BestValidationSolutionParameter.ActualValue;
     121      if (bestOfRunSolution == null) {
     122        // no best of run solution yet -> make a solution from the currentBestExpression
     123        UpdateBestOfRunSolution(problemData, currentBestExpression.Expression);
     124      } else {
     125        // compare quality of current best with best of run solution
     126        var estimatedValidationValues = bestOfRunSolution.EstimatedValues.Skip(validationSamplesStart).Take(validationSamplesEnd - validationSamplesStart);
     127        var bestOfRunValidationQuality = SimpleMSEEvaluator.Calculate(validationValues, estimatedValidationValues);
     128        if (bestOfRunValidationQuality > currentBestExpression.ValidationQuality) {
     129          UpdateBestOfRunSolution(problemData, currentBestExpression.Expression);
     130        }
    82131      }
    83       // ((ResultCollection)Parameters["Results"].ActualValue).Add(new Result("ValidationMSE", new DoubleValue(3.15)));
     132
     133
    84134      return base.Apply();
     135    }
     136
     137    private void UpdateBestOfRunSolution(DataAnalysisProblemData problemData, SymbolicExpressionTree tree) {
     138      var newBestSolution = CreateDataAnalysisSolution(problemData, tree);
     139      BestValidationSolutionParameter.ActualValue = newBestSolution;
     140
     141      var trainingValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, problemData.TrainingSamplesStart.Value, problemData.TrainingSamplesEnd.Value);
     142      var testValues = problemData.Dataset.GetVariableValues(problemData.TargetVariable.Value, problemData.TestSamplesStart.Value, problemData.TestSamplesEnd.Value);
     143
     144      AddResult("MeanSquaredError (Training)", new DoubleValue(SimpleMSEEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
     145      AddResult("MeanRelativeError (Training)", new PercentValue(SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
     146      AddResult("RSquared (Training)", new DoubleValue(SimpleRSquaredEvaluator.Calculate(trainingValues, newBestSolution.EstimatedTrainingValues)));
     147
     148      AddResult("MeanSquaredError (Test)", new DoubleValue(SimpleMSEEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
     149      AddResult("MeanRelativeError (Test)", new PercentValue(SimpleMeanAbsolutePercentageErrorEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
     150      AddResult("RSquared (Test)", new DoubleValue(SimpleRSquaredEvaluator.Calculate(testValues, newBestSolution.EstimatedTestValues)));
     151    }
     152
     153    private void AddResult(string resultName, IItem value) {
     154      var resultCollection = ResultParameter.ActualValue;
     155      if (resultCollection.ContainsKey(resultName)) {
     156        resultCollection[resultName].Value = value;
     157      } else {
     158        resultCollection.Add(new Result(resultName, value));
     159      }
    85160    }
    86161
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/ISymbolicRegressionEvaluator.cs

    r3373 r3452  
    3636namespace HeuristicLab.Problems.DataAnalysis.Regression.Symbolic {
    3737  public interface ISymbolicRegressionEvaluator : ISingleObjectiveEvaluator {
    38     ILookupParameter<SymbolicExpressionTree> FunctionTreeParameter { get; }
     38    ILookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter { get; }
    3939    ILookupParameter<DataAnalysisProblemData> RegressionProblemDataParameter { get; }
     40    IValueLookupParameter<IntValue> SamplesStartParameter { get; }
     41    IValueLookupParameter<IntValue> SamplesEndParameter { get; }
    4042    ILookupParameter<DoubleValue> NumberOfEvaluatedNodesParameter { get; }
    4143  }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionEvaluator.cs

    r3374 r3452  
    4242    private const string FunctionTreeParameterName = "FunctionTree";
    4343    private const string RegressionProblemDataParameterName = "RegressionProblemData";
     44    private const string SamplesStartParameterName = "SamplesStart";
     45    private const string SamplesEndParameterName = "SamplesEnd";
    4446    private const string NumberOfEvaluatedNodexParameterName = "NumberOfEvaluatedNodes";
    4547    #region ISymbolicRegressionEvaluator Members
     
    4951    }
    5052
    51     public ILookupParameter<SymbolicExpressionTree> FunctionTreeParameter {
     53    public ILookupParameter<SymbolicExpressionTree> SymbolicExpressionTreeParameter {
    5254      get { return (ILookupParameter<SymbolicExpressionTree>)Parameters[FunctionTreeParameterName]; }
    5355    }
     
    5759    }
    5860
    59     //public ILookupParameter<IntValue> SamplesStartParameter {
    60     //  get { return (ILookupParameter<IntValue>)Parameters["SamplesStart"]; }
    61     //}
     61    public IValueLookupParameter<IntValue> SamplesStartParameter {
     62      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesStartParameterName]; }
     63    }
    6264
    63     //public ILookupParameter<IntValue> SamplesEndParameter {
    64     //  get { return (ILookupParameter<IntValue>)Parameters["SamplesEnd"]; }
    65     //}
     65    public IValueLookupParameter<IntValue> SamplesEndParameter {
     66      get { return (IValueLookupParameter<IntValue>)Parameters[SamplesEndParameterName]; }
     67    }
    6668
    6769    public ILookupParameter<DoubleValue> NumberOfEvaluatedNodesParameter {
    6870      get { return (ILookupParameter<DoubleValue>)Parameters[NumberOfEvaluatedNodexParameterName]; }
    6971    }
    70 
     72    #endregion
     73    #region properties
     74    public SymbolicExpressionTree SymbolicExpressionTree {
     75      get { return SymbolicExpressionTreeParameter.ActualValue; }
     76    }
     77    public DataAnalysisProblemData RegressionProblemData {
     78      get { return RegressionProblemDataParameter.ActualValue; }
     79    }
     80    public IntValue SamplesStart {
     81      get { return SamplesStartParameter.ActualValue; }
     82    }
     83    public IntValue SamplesEnd {
     84      get { return SamplesEndParameter.ActualValue; }
     85    }
    7186    #endregion
    7287
     
    7590      Parameters.Add(new LookupParameter<DoubleValue>(QualityParameterName, "The quality of the evaluated symbolic regression solution."));
    7691      Parameters.Add(new LookupParameter<SymbolicExpressionTree>(FunctionTreeParameterName, "The symbolic regression solution encoded as a symbolic expression tree."));
    77       Parameters.Add(new LookupParameter<DataAnalysisProblemData>(RegressionProblemDataParameterName, "The data set on which the symbolic regression solution should be evaluated."));
     92      Parameters.Add(new LookupParameter<DataAnalysisProblemData>(RegressionProblemDataParameterName, "The problem data on which the symbolic regression solution should be evaluated."));
     93      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesStartParameterName, "The start index of the dataset partition on which the symbolic regression solution should be evaluated."));
     94      Parameters.Add(new ValueLookupParameter<IntValue>(SamplesEndParameterName, "The end index of the dataset partition on which the symbolic regression solution should be evaluated."));
    7895      Parameters.Add(new LookupParameter<DoubleValue>(NumberOfEvaluatedNodexParameterName, "The number of evaluated nodes so far (for performance measurements.)"));
    7996    }
    8097
    8198    public override IOperation Apply() {
    82       SymbolicExpressionTree solution = FunctionTreeParameter.ActualValue;
    83       DataAnalysisProblemData regressionProblemData = RegressionProblemDataParameter.ActualValue;
    8499      DoubleValue numberOfEvaluatedNodes = NumberOfEvaluatedNodesParameter.ActualValue;
    85      
    86       QualityParameter.ActualValue = new DoubleValue(Evaluate(solution, regressionProblemData.Dataset, regressionProblemData.TargetVariable, regressionProblemData.TrainingSamplesStart, regressionProblemData.TrainingSamplesEnd, numberOfEvaluatedNodes));
     100      QualityParameter.ActualValue = new DoubleValue(Evaluate(SymbolicExpressionTree, RegressionProblemData.Dataset,
     101        RegressionProblemData.TargetVariable, SamplesStart, SamplesEnd, numberOfEvaluatedNodes));
    87102      return null;
    88103    }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Regression/3.3/Symbolic/SymbolicRegressionProblem.cs

    r3442 r3452  
    8484      get { return VisualizerParameter; }
    8585    }
    86     public ValueParameter<DoubleValue> BestKnownQualityParameter {
    87       get { return (ValueParameter<DoubleValue>)Parameters["BestKnownQuality"]; }
     86    public OptionalValueParameter<DoubleValue> BestKnownQualityParameter {
     87      get { return (OptionalValueParameter<DoubleValue>)Parameters["BestKnownQuality"]; }
    8888    }
    8989    IParameter ISingleObjectiveProblem.BestKnownQualityParameter {
     
    147147      Parameters.Add(new ValueParameter<SymbolicExpressionTreeCreator>("SolutionCreator", "The operator which should be used to create new symbolic regression solutions.", creator));
    148148      Parameters.Add(new ValueParameter<ISymbolicRegressionEvaluator>("Evaluator", "The operator which should be used to evaluate symbolic regression solutions.", evaluator));
    149       Parameters.Add(new ValueParameter<DoubleValue>("BestKnownQuality", "The minimal error value that can be reached by symbolic regression models.", new DoubleValue(0)));
     149      Parameters.Add(new OptionalValueParameter<DoubleValue>("BestKnownQuality", "The minimal error value that reached by symbolic regression solutions for the problem."));
    150150      Parameters.Add(new ValueParameter<ISymbolicExpressionGrammar>("FunctionTreeGrammar", "The grammar that should be used for symbolic regression models.", globalGrammar));
    151151      Parameters.Add(new ValueParameter<IntValue>("MaxExpressionLength", "Maximal length of the symbolic expression.", new IntValue(100)));
     
    159159      creator.MaxFunctionArgumentsParameter.ActualName = "MaxFunctionArguments";
    160160      creator.MaxFunctionDefinitionsParameter.ActualName = "MaxFunctionDefiningBranches";
    161       evaluator.QualityParameter.ActualName = "TrainingMeanSquaredError";
    162161      DataAnalysisProblemDataParameter.ValueChanged += new EventHandler(DataAnalysisProblemDataParameter_ValueChanged);
    163162      DataAnalysisProblemData.ProblemDataChanged += new EventHandler(DataAnalysisProblemData_Changed);
     
    169168    }
    170169
    171     void DataAnalysisProblemDataParameter_ValueChanged(object sender, EventArgs e) {
    172       DataAnalysisProblemData.ProblemDataChanged += new EventHandler(DataAnalysisProblemData_Changed);
    173     }
    174 
    175     void DataAnalysisProblemData_Changed(object sender, EventArgs e) {
    176       foreach (var varSymbol in FunctionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols.Variable>()) {
    177         varSymbol.VariableNames = DataAnalysisProblemData.InputVariables.Select(x => x.Value);
    178       }
    179     }
    180170
    181171    [StorableConstructor]
     
    189179
    190180    #region Events
     181    void DataAnalysisProblemDataParameter_ValueChanged(object sender, EventArgs e) {
     182      DataAnalysisProblemData.ProblemDataChanged += new EventHandler(DataAnalysisProblemData_Changed);
     183    }
     184
     185    void DataAnalysisProblemData_Changed(object sender, EventArgs e) {
     186      foreach (var varSymbol in FunctionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Symbols.Variable>()) {
     187        varSymbol.VariableNames = DataAnalysisProblemData.InputVariables.Select(x => x.Value);
     188      }
     189      UpdatePartitioningParameters();
     190    }
     191
     192    private void UpdatePartitioningParameters() {
     193      int trainingStart = DataAnalysisProblemData.TrainingSamplesStart.Value;
     194      int validationEnd = DataAnalysisProblemData.TrainingSamplesEnd.Value;
     195      int trainingEnd = trainingStart + (validationEnd - trainingStart) / 2;
     196      int validationStart = trainingEnd;
     197      var solutionVisualizer = Visualizer as BestValidationSymbolicRegressionSolutionVisualizer;
     198      if (solutionVisualizer != null) {
     199        solutionVisualizer.ValidationSamplesStartParameter.Value = new IntValue(validationStart);
     200        solutionVisualizer.ValidationSamplesEndParameter.Value = new IntValue(validationEnd);
     201      }
     202      Evaluator.SamplesStartParameter.Value = new IntValue(trainingStart);
     203      Evaluator.SamplesEndParameter.Value = new IntValue(trainingEnd);
     204    }
     205
    191206    public event EventHandler SolutionCreatorChanged;
    192207    private void OnSolutionCreatorChanged() {
     
    269284    }
    270285    private void ParameterizeEvaluator() {
    271       Evaluator.FunctionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
     286      Evaluator.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
    272287      Evaluator.RegressionProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
     288      Evaluator.QualityParameter.ActualName = "TrainingMeanSquaredError";
     289      Evaluator.SamplesStartParameter.Value = new IntValue(DataAnalysisProblemData.TrainingSamplesStart.Value);
     290      Evaluator.SamplesEndParameter.Value = new IntValue((DataAnalysisProblemData.TrainingSamplesStart.Value + DataAnalysisProblemData.TrainingSamplesEnd.Value) / 2);
    273291    }
    274292    private void ParameterizeVisualizer() {
     
    278296          solutionVisualizer.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
    279297          solutionVisualizer.DataAnalysisProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
     298          solutionVisualizer.ValidationSamplesStartParameter.Value = new IntValue((DataAnalysisProblemData.TrainingSamplesStart.Value + DataAnalysisProblemData.TrainingSamplesEnd.Value) / 2);
     299          solutionVisualizer.ValidationSamplesEndParameter.Value = new IntValue(DataAnalysisProblemData.TrainingSamplesEnd.Value);
    280300        }
    281301      }
     
    289309      }
    290310      foreach (ISymbolicRegressionEvaluator op in Operators.OfType<ISymbolicRegressionEvaluator>()) {
    291         op.FunctionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
     311        op.SymbolicExpressionTreeParameter.ActualName = SolutionCreator.SymbolicExpressionTreeParameter.ActualName;
    292312        op.RegressionProblemDataParameter.ActualName = DataAnalysisProblemDataParameter.Name;
    293313        op.NumberOfEvaluatedNodesParameter.ActualName = NumberOfEvaluatedNodesParameter.Name;
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/DataAnalysisSolution.cs

    r3442 r3452  
    9898    private void Initialize() {
    9999      if (problemData != null) RegisterProblemDataEvents();
     100      if (problemData != null && model != null) RecalculateEstimatedValues();
    100101    }
    101102
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Evaluators/SimpleEvaluator.cs

    r3376 r3452  
    3838      get { return (ILookupParameter<DoubleMatrix>)Parameters["Values"]; }
    3939    }
    40     public ILookupParameter<DoubleValue> QualityParameter {
    41       get { return (ILookupParameter<DoubleValue>)Parameters["Quality"]; }
    42     }
    43 
    4440    public SimpleEvaluator()
    4541      : base() {
    4642      Parameters.Add(new LookupParameter<DoubleMatrix>("Values", "Table of original and predicted values for which the quality value should be evaluated."));
    47       Parameters.Add(new LookupParameter<DoubleValue>("Quality", "The quality value calculated from the values table."));
    4843    }
    4944
    5045    public override IOperation Apply() {
    5146      DoubleMatrix values = ValuesParameter.ActualValue;
    52       QualityParameter.ActualValue = new DoubleValue(Apply(values));
     47      Apply(values);
    5348      return null;
    5449    }
    5550
    56     protected abstract double Apply(DoubleMatrix values);
     51    protected abstract void Apply(DoubleMatrix values);
    5752  }
    5853}
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Evaluators/SimpleMSEEvaluator.cs

    r3376 r3452  
    2727using HeuristicLab.Core;
    2828using HeuristicLab.Data;
     29using HeuristicLab.Parameters;
    2930
    3031namespace HeuristicLab.Problems.DataAnalysis.Evaluators {
    3132  public class SimpleMSEEvaluator : SimpleEvaluator {
    3233
    33     public SimpleMSEEvaluator()
    34       : base() {
    35       QualityParameter.ActualName = "MeanSquaredError";
     34    public ILookupParameter<DoubleValue> MeanSquaredErrorParameter {
     35      get { return (ILookupParameter<DoubleValue>)Parameters["MeanSquaredError"]; }
    3636    }
    3737
    38     protected override double Apply(DoubleMatrix values) {
    39       return Calculate(values);
     38    public SimpleMSEEvaluator() {
     39      Parameters.Add(new LookupParameter<DoubleValue>("MeanSquaredError", "The mean squared error of estimated values."));
     40    }
     41
     42    protected override void Apply(DoubleMatrix values) {
     43      MeanSquaredErrorParameter.ActualValue = new DoubleValue(Calculate(values));
    4044    }
    4145
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Evaluators/SimpleMeanAbsolutePercentageErrorEvaluator.cs

    r3441 r3452  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2626using HeuristicLab.Core;
    2727using HeuristicLab.Data;
    28 using HeuristicLab.DataAnalysis;
     28using HeuristicLab.Parameters;
     29using HeuristicLab.Common;
    2930
    30 namespace HeuristicLab.Modeling {
    31   public class SimpleMeanAbsolutePercentageErrorEvaluator : SimpleEvaluatorBase {
    32 
    33     public override string OutputVariableName {
    34       get {
    35         return "MAPE";
    36       }
     31namespace HeuristicLab.Problems.DataAnalysis.Evaluators {
     32  public class SimpleMeanAbsolutePercentageErrorEvaluator : SimpleEvaluator {
     33    public ILookupParameter<PercentValue> AverageRelativeErrorParameter {
     34      get { return (ILookupParameter<PercentValue>)Parameters["AverageRelativeError"]; }
    3735    }
    3836
    39     public override double Evaluate(double[,] values) {
    40       try {
    41         return Calculate(values);
    42       }
    43       catch (ArgumentException) {
    44         return double.PositiveInfinity;
    45       }
     37    public SimpleMeanAbsolutePercentageErrorEvaluator() {
     38      Parameters.Add(new LookupParameter<PercentValue>("AverageRelativeError", "The average relative error of estimated values."));
    4639    }
    4740
    48     public static double Calculate(double[,] values) {
    49       double errorsSum = 0.0;
    50       int n = 0;
    51       for (int i = 0; i < values.GetLength(0); i++) {
    52         double estimated = values[i, ESTIMATION_INDEX];
    53         double original = values[i, ORIGINAL_INDEX];
     41    protected override void Apply(DoubleMatrix values) {
     42      var original = from i in Enumerable.Range(0, values.Rows)
     43                     select values[i, ORIGINAL_INDEX];
     44      var estimated = from i in Enumerable.Range(0, values.Rows)
     45                      select values[i, ESTIMATION_INDEX];
     46      AverageRelativeErrorParameter.ActualValue = new PercentValue(Calculate(original, estimated));
     47    }
    5448
    55         if (!double.IsNaN(estimated) && !double.IsInfinity(estimated) &&
    56           !double.IsNaN(original) && !double.IsInfinity(original) && original != 0.0) {
    57           double percent_error = Math.Abs((estimated - original) / original);
    58           errorsSum += percent_error;
    59           n++;
     49    public static double Calculate(IEnumerable<double> original, IEnumerable<double> estimated) {
     50      double sre = 0;
     51      int cnt = 0;
     52      var originalEnumerator = original.GetEnumerator();
     53      var estimatedEnumerator = estimated.GetEnumerator();
     54      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
     55        double e = estimatedEnumerator.Current;
     56        double o = originalEnumerator.Current;
     57        if (!double.IsNaN(e) && !double.IsInfinity(e) &&
     58            !double.IsNaN(o) && !double.IsInfinity(o) && !o.IsAlmost(0.0)) {
     59          double error = Math.Abs((e - o) / o);
     60          sre += error * error;
     61          cnt++;
    6062        }
    6163      }
    62       if (n > 0) {
    63         return errorsSum / n;
    64       } else throw new ArgumentException("Mean of absolute percentage error is not defined for input vectors of NaN or Inf");
     64      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
     65        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
     66      } else if (cnt == 0) {
     67        throw new ArgumentException("Average relative error is not defined for input vectors of NaN or Inf");
     68      } else {
     69        return sre / cnt;
     70      }
    6571    }
    6672  }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Evaluators/SimpleMeanAbsolutePercentageOfRangeErrorEvaluator.cs

    r3441 r3452  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2727using HeuristicLab.Common;
    2828using HeuristicLab.Data;
    29 using HeuristicLab.DataAnalysis;
     29using HeuristicLab.Parameters;
    3030
    31 namespace HeuristicLab.Modeling {
    32   public class SimpleMeanAbsolutePercentageOfRangeErrorEvaluator : SimpleEvaluatorBase {
    33     public override string OutputVariableName {
    34       get {
    35         return "MAPRE";
    36       }
     31namespace HeuristicLab.Problems.DataAnalysis.Evaluators {
     32  public class SimpleMeanAbsolutePercentageOfRangeErrorEvaluator : SimpleEvaluator {
     33
     34    public ILookupParameter<PercentValue> AveragePercentageOfRangeErrorParameter {
     35      get { return (ILookupParameter<PercentValue>)Parameters["AveragePercentageOfRangeError"]; }
    3736    }
    3837
    39     public override double Evaluate(double[,] values) {
    40       try {
    41         return Calculate(values);
    42       }
    43       catch (ArgumentException) {
    44         return double.PositiveInfinity;
    45       }
     38    public SimpleMeanAbsolutePercentageOfRangeErrorEvaluator() {
     39      Parameters.Add(new LookupParameter<PercentValue>("AveragePercentageOfRangeError", "The average relative (percentage of range) error of estimated values."));
    4640    }
    4741
    48     public static double Calculate(double[,] values) {
     42    protected override void Apply(DoubleMatrix values) {
     43      var original = from i in Enumerable.Range(0, values.Rows)
     44                     select values[i, ORIGINAL_INDEX];
     45      var estimated = from i in Enumerable.Range(0, values.Rows)
     46                      select values[i, ESTIMATION_INDEX];
     47      AveragePercentageOfRangeErrorParameter.ActualValue = new PercentValue(Calculate(original, estimated));
     48    }
     49
     50    public static double Calculate(IEnumerable<double> original, IEnumerable<double> estimated) {
    4951      double errorsSum = 0.0;
    5052      int n = 0;
    51       // copy to one-dimensional array for range calculation
    52       double[] originalValues = new double[values.GetLength(0)];
    53       for (int i = 0; i < originalValues.Length; i++) originalValues[i] = values[i, ORIGINAL_INDEX];
    54       double range = Statistics.Range(originalValues);
    55       if (double.IsInfinity(range)) throw new ArgumentException("Range of elements in values is infinity");
    56       if (range.IsAlmost(0.0)) throw new ArgumentException("Range of elements in values is zero");
     53      IList<double> originalList = original as IList<double>;
     54      if (originalList == null) originalList = original.ToList();
    5755
    58       for (int i = 0; i < values.GetLength(0); i++) {
    59         double estimated = values[i, ESTIMATION_INDEX];
    60         double original = values[i, ORIGINAL_INDEX];
     56      double range = originalList.Max() - originalList.Min();
     57      if (double.IsInfinity(range)) return double.MaxValue;
     58      if (range.IsAlmost(0.0)) return double.MaxValue;
    6159
    62         if (!double.IsNaN(estimated) && !double.IsInfinity(estimated) &&
    63           !double.IsNaN(original) && !double.IsInfinity(original) && original != 0.0) {
    64           double percent_error = Math.Abs((estimated - original) / range);
     60
     61      var originalEnumerator = original.GetEnumerator();
     62      var estimatedEnumerator = estimated.GetEnumerator();
     63      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
     64        double e = estimatedEnumerator.Current;
     65        double o = originalEnumerator.Current;
     66
     67        if (!double.IsNaN(e) && !double.IsInfinity(e) &&
     68          !double.IsNaN(o) && !double.IsInfinity(o) && !o.IsAlmost(0.0)) {
     69          double percent_error = Math.Abs((e - o) / range);
    6570          errorsSum += percent_error;
    6671          n++;
    6772        }
    6873      }
    69       if (double.IsInfinity(range) || n == 0) {
    70         throw new ArgumentException("Mean of absolute percentage of range error is not defined for input vectors of NaN or Inf");
     74      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
     75        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
     76      } else if (n == 0) {
     77        return double.MaxValue;
    7178      } else {
    7279        return errorsSum / n;
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Evaluators/SimpleNMSEEvaluator.cs

    r3441 r3452  
    1 using System;
     1#region License Information
     2/* HeuristicLab
     3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     4 *
     5 * This file is part of HeuristicLab.
     6 *
     7 * HeuristicLab is free software: you can redistribute it and/or modify
     8 * it under the terms of the GNU General Public License as published by
     9 * the Free Software Foundation, either version 3 of the License, or
     10 * (at your option) any later version.
     11 *
     12 * HeuristicLab is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 * GNU General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU General Public License
     18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
     19 */
     20#endregion
     21
     22using System;
    223using System.Collections.Generic;
    324using System.Linq;
     
    526using HeuristicLab.Core;
    627using HeuristicLab.Data;
    7 using HeuristicLab.DataAnalysis;
     28using HeuristicLab.Parameters;
    829
    9 namespace HeuristicLab.Modeling {
    10   public class SimpleNMSEEvaluator : SimpleEvaluatorBase {
     30namespace HeuristicLab.Problems.DataAnalysis.Evaluators {
     31  public class SimpleNMSEEvaluator : SimpleEvaluator {
    1132
    12     public override string OutputVariableName {
    13       get {
    14         return "NMSE";
    15       }
    16     }
    17     public override double Evaluate(double[,] values) {
    18       try {
    19         return Calculate(values);
    20       }
    21       catch (ArgumentException) {
    22         return double.PositiveInfinity;
    23       }
     33    public ILookupParameter<DoubleValue> NormalizedMeanSquaredErrorParameter {
     34      get { return (ILookupParameter<DoubleValue>)Parameters["NormalizedMeanSquaredError"]; }
    2435    }
    2536
    26     public static double Calculate(double[,] values) {
    27       double mse = SimpleMSEEvaluator.Calculate(values);
    28       double mean = Statistics.Mean(Matrix<double>.GetColumn(values, ORIGINAL_INDEX));
    29       double ssd = 0;
    30       int n = 0;
    31       for (int i = 0; i < values.GetLength(0); i++) {
    32         double original = values[i, ORIGINAL_INDEX];
    33         if (!(double.IsNaN(original) || double.IsInfinity(original))) {
    34           double dev = original - mean;
    35           ssd += dev * dev;
    36           n++;
    37         }
    38       }
    39       double variance = ssd / (n - 1);
    40       return mse / variance;
     37    public SimpleNMSEEvaluator() {
     38      Parameters.Add(new LookupParameter<DoubleValue>("NormalizedMeanSquaredError", "The normalized mean squared error (divided by variance) of estimated values."));
     39    }
     40
     41    protected override void Apply(DoubleMatrix values) {
     42      var original = from i in Enumerable.Range(0, values.Rows)
     43                     select values[i, ORIGINAL_INDEX];
     44      var estimated = from i in Enumerable.Range(0, values.Rows)
     45                      select values[i, ESTIMATION_INDEX];
     46
     47      NormalizedMeanSquaredErrorParameter.ActualValue = new DoubleValue(Calculate(original, estimated));
     48    }
     49
     50    public static double Calculate(IEnumerable<double> original, IEnumerable<double> estimated) {
     51      double mse = SimpleMSEEvaluator.Calculate(original, estimated);
     52      return mse / original.Variance();
    4153    }
    4254  }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Evaluators/SimpleRSquaredEvaluator.cs

    r3441 r3452  
    1 using System;
     1#region License Information
     2/* HeuristicLab
     3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     4 *
     5 * This file is part of HeuristicLab.
     6 *
     7 * HeuristicLab is free software: you can redistribute it and/or modify
     8 * it under the terms of the GNU General Public License as published by
     9 * the Free Software Foundation, either version 3 of the License, or
     10 * (at your option) any later version.
     11 *
     12 * HeuristicLab is distributed in the hope that it will be useful,
     13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15 * GNU General Public License for more details.
     16 *
     17 * You should have received a copy of the GNU General Public License
     18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
     19 */
     20#endregion
     21
     22using System;
    223using System.Collections.Generic;
    324using System.Linq;
    425using System.Text;
    526using HeuristicLab.Core;
     27using HeuristicLab.Common;
    628using HeuristicLab.Data;
    7 using HeuristicLab.DataAnalysis;
     29using HeuristicLab.Parameters;
    830
    9 namespace HeuristicLab.Modeling {
    10   public class SimpleStableCorrelationCoefficientEvaluator : SimpleEvaluatorBase {
    11 
    12     public override string OutputVariableName {
    13       get {
    14         return "R2";
    15       }
     31namespace HeuristicLab.Problems.DataAnalysis.Evaluators {
     32  public class SimpleRSquaredEvaluator : SimpleEvaluator {
     33    public ILookupParameter<DoubleValue> RSquaredParameter {
     34      get { return (ILookupParameter<DoubleValue>)Parameters["RSquared"]; }
    1635    }
    1736
    18     public override double Evaluate(double[,] values) {
    19       try {
    20         return Calculate(values);
    21       }
    22       catch (ArgumentException) {
    23         return double.NegativeInfinity;
    24       }
     37    public SimpleRSquaredEvaluator() {
     38      Parameters.Add(new LookupParameter<DoubleValue>("RSquared", "The squared Pearson's Product Moment Correlation (R²) of estimated values and original values."));
    2539    }
    2640
    27     public static double Calculate(double[,] values) {
     41    protected override void Apply(DoubleMatrix values) {
     42      var original = from i in Enumerable.Range(0, values.Rows)
     43                     select values[i, ORIGINAL_INDEX];
     44      var estimated = from i in Enumerable.Range(0, values.Rows)
     45                      select values[i, ESTIMATION_INDEX];
     46      RSquaredParameter.ActualValue = new DoubleValue(Calculate(original, estimated));
     47    }
     48
     49
     50    public static double Calculate(IEnumerable<double> original, IEnumerable<double> estimated) {
     51      var originalEnumerator = original.GetEnumerator();
     52      var estimatedEnumerator = estimated.GetEnumerator();
     53      originalEnumerator.MoveNext();
     54      estimatedEnumerator.MoveNext();
     55      double e = estimatedEnumerator.Current;
     56      double o = originalEnumerator.Current;
     57
     58      // stable and iterative calculation of R² in one pass over original and estimated
    2859      double sum_sq_x = 0.0;
    2960      double sum_sq_y = 0.0;
    3061      double sum_coproduct = 0.0;
    31       if (IsInvalidValue(values[0, ORIGINAL_INDEX]) || IsInvalidValue(values[0, ESTIMATION_INDEX])) {
    32         throw new ArgumentException("Correlation coefficient is not defined for variables with NaN or infinity values.");
     62      if (IsInvalidValue(o) || IsInvalidValue(e)) {
     63        throw new ArgumentException(" is not defined for variables with NaN or infinity values.");
    3364      }
    34       double mean_x = values[0, ORIGINAL_INDEX];
    35       double mean_y = values[0, ESTIMATION_INDEX];
    36       for (int i = 1; i < values.GetLength(0); i++) {
    37         double sweep = (i - 1.0) / i;
    38         if (IsInvalidValue(values[i, ORIGINAL_INDEX]) || IsInvalidValue(values[i, ESTIMATION_INDEX])) {
     65      double mean_x = o;
     66      double mean_y = e;
     67      int n = 1;
     68      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
     69        e = estimatedEnumerator.Current;
     70        o = originalEnumerator.Current;
     71        double sweep = (n - 1.0) / n;
     72        if (IsInvalidValue(o) || IsInvalidValue(e)) {
    3973          throw new ArgumentException("Correlation coefficient is not defined for variables with NaN or infinity values.");
    4074        }
    41         double delta_x = values[i, ORIGINAL_INDEX] - mean_x;
    42         double delta_y = values[i, ESTIMATION_INDEX] - mean_y;
     75        double delta_x = o - mean_x;
     76        double delta_y = e - mean_y;
    4377        sum_sq_x += delta_x * delta_x * sweep;
    4478        sum_sq_y += delta_y * delta_y * sweep;
    4579        sum_coproduct += delta_x * delta_y * sweep;
    46         mean_x += delta_x / i;
    47         mean_y += delta_y / i;
     80        mean_x += delta_x / n;
     81        mean_y += delta_y / n;
     82        n++;
    4883      }
    49       double pop_sd_x = Math.Sqrt(sum_sq_x / values.GetLength(0));
    50       double pop_sd_y = Math.Sqrt(sum_sq_y / values.GetLength(0));
    51       double cov_x_y = sum_coproduct / values.GetLength(0);
     84      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
     85        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
     86      } else {
     87        double pop_sd_x = Math.Sqrt(sum_sq_x / n);
     88        double pop_sd_y = Math.Sqrt(sum_sq_y / n);
     89        double cov_x_y = sum_coproduct / n;
    5290
    53       if (pop_sd_x == 0.0 || pop_sd_y == 0.0)
    54         return 0.0;
    55       else {
    56         double r = cov_x_y / (pop_sd_x * pop_sd_y);
    57         return r * r;
     91        if (pop_sd_x.IsAlmost(0.0) || pop_sd_y.IsAlmost(0.0))
     92          return 0.0;
     93        else {
     94          double r = cov_x_y / (pop_sd_x * pop_sd_y);
     95          return r * r;
     96        }
    5897      }
    5998    }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Evaluators/SimpleVarianceAccountedForEvaluator.cs

    r3441 r3452  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2010 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2727using HeuristicLab.Common;
    2828using HeuristicLab.Data;
    29 using HeuristicLab.DataAnalysis;
     29using HeuristicLab.Parameters;
    3030
    31 namespace HeuristicLab.Modeling {
     31namespace HeuristicLab.Problems.DataAnalysis.Evaluators {
    3232  /// <summary>
    3333  /// The Variance Accounted For (VAF) function calculates is computed as
    34   /// VAF(y,y') = ( 1 - var(y-y')/var(y) )
     34  /// VAF(y,y') =  1 - var(y-y')/var(y)
    3535  /// where y' denotes the predicted / modelled values for y and var(x) the variance of a signal x.
    3636  /// </summary>
    37   public class SimpleVarianceAccountedForEvaluator : SimpleEvaluatorBase {
     37  public class SimpleVarianceAccountedForEvaluator : SimpleEvaluator {
    3838
    39     public override string OutputVariableName {
    40       get {
    41         return "VAF";
    42       }
     39    public ILookupParameter<DoubleValue> VarianceAccountedForParameter {
     40      get { return (ILookupParameter<DoubleValue>)Parameters["VarianceAccountedFor"]; }
    4341    }
    4442
    45     public override double Evaluate(double[,] values) {
    46       try {
    47         return Calculate(values);
    48       }
    49       catch (ArgumentException) {
    50         return double.NegativeInfinity;
    51       }
     43    public SimpleVarianceAccountedForEvaluator() {
     44      Parameters.Add(new LookupParameter<DoubleValue>("VarianceAccountedFor", "The variance of the original values accounted for by the estimated values (VAF(y,y') = 1 - var(y-y') / var(y) )."));
    5245    }
    5346
    54     public static double Calculate(double[,] values) {
    55       int n = values.GetLength(0);
    56       double[] errors = new double[n];
    57       double[] originalTargetVariableValues = new double[n];
    58       for (int i = 0; i < n; i++) {
    59         double estimated = values[i, ESTIMATION_INDEX];
    60         double original = values[i, ORIGINAL_INDEX];
    61         if (!double.IsNaN(estimated) && !double.IsInfinity(estimated) &&
    62           !double.IsNaN(original) && !double.IsInfinity(original)) {
    63           errors[i] = original - estimated;
    64           originalTargetVariableValues[i] = original;
    65         } else {
    66           errors[i] = double.NaN;
    67           originalTargetVariableValues[i] = double.NaN;
     47    protected override void Apply(DoubleMatrix values) {
     48      var original = from i in Enumerable.Range(0, values.Rows)
     49                     select values[i, ORIGINAL_INDEX];
     50      var estimated = from i in Enumerable.Range(0, values.Rows)
     51                      select values[i, ESTIMATION_INDEX];
     52      VarianceAccountedForParameter.ActualValue = new DoubleValue(Calculate(original, estimated));
     53    }
     54
     55    public static double Calculate(IEnumerable<double> original, IEnumerable<double> estimated) {
     56      var originalEnumerator = original.GetEnumerator();
     57      var estimatedEnumerator = estimated.GetEnumerator();
     58      var errors = new List<double>();
     59      while (originalEnumerator.MoveNext() & estimatedEnumerator.MoveNext()) {
     60        double e = estimatedEnumerator.Current;
     61        double o = originalEnumerator.Current;
     62        if (!double.IsNaN(e) && !double.IsInfinity(e) &&
     63          !double.IsNaN(o) && !double.IsInfinity(o)) {
     64          errors.Add(o - e);
    6865        }
    6966      }
    70       double errorsVariance = Statistics.Variance(errors);
    71       double originalsVariance = Statistics.Variance(originalTargetVariableValues);
     67      if (estimatedEnumerator.MoveNext() || originalEnumerator.MoveNext()) {
     68        throw new ArgumentException("Number of elements in original and estimated enumeration doesn't match.");
     69      }
     70
     71      double errorsVariance = errors.Variance();
     72      double originalsVariance = original.Variance();
    7273      if (originalsVariance.IsAlmost(0.0))
    7374        if (errorsVariance.IsAlmost(0.0)) {
    7475          return 1.0;
    7576        } else {
    76           throw new ArgumentException("Variance of original values is zero");
     77          return double.MaxValue;
    7778        } else {
    7879        return 1.0 - errorsVariance / originalsVariance;
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/HeuristicLab.Problems.DataAnalysis-3.3.csproj

    r3442 r3452  
    9191    <Compile Include="Dataset.cs" />
    9292    <Compile Include="Evaluators\SimpleEvaluator.cs" />
     93    <Compile Include="Evaluators\SimpleMeanAbsolutePercentageErrorEvaluator.cs" />
     94    <Compile Include="Evaluators\SimpleMeanAbsolutePercentageOfRangeErrorEvaluator.cs" />
    9395    <Compile Include="Evaluators\SimpleMSEEvaluator.cs" />
     96    <Compile Include="Evaluators\SimpleNMSEEvaluator.cs" />
     97    <Compile Include="Evaluators\SimpleRSquaredEvaluator.cs" />
     98    <Compile Include="Evaluators\SimpleVarianceAccountedForEvaluator.cs" />
    9499    <Compile Include="HeuristicLabProblemsDataAnalysisPlugin.cs" />
    95100    <Compile Include="Interfaces\IModel.cs" />
    96101    <Compile Include="MatrixExtensions.cs" />
    97102    <Compile Include="Properties\AssemblyInfo.cs" />
     103    <Compile Include="Statistics.cs" />
    98104    <Compile Include="Symbolic\ArithmeticExpressionGrammar.cs" />
    99105    <Compile Include="Symbolic\SimpleArithmeticExpressionEvaluator.cs" />
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.3/Statistics.cs

    r3441 r3452  
    2323using System.Collections.Generic;
    2424using System.Text;
     25using System.Linq;
    2526
    26 namespace HeuristicLab.DataAnalysis {
    27   public class Statistics {
    28 
     27namespace HeuristicLab.Problems.DataAnalysis {
     28  public static class StatisticExtensions {
    2929    /// <summary>
    30     /// Minimum returns the smalles entry of values.
    31     /// Throws and exception if values is empty.
    32     /// </summary>
    33     /// <typeparam name="T"></typeparam>
    34     /// <param name="values"></param>
    35     /// <returns></returns>
    36     public static T Minimum<T>(IEnumerable<T> values) where T : struct, IComparable, IComparable<T> {
    37       IEnumerator<T> enumerator = values.GetEnumerator();
    38 
    39       // this will throw an exception if the values collection is empty
    40       enumerator.MoveNext();
    41       T minimum = enumerator.Current;
    42 
    43       while (enumerator.MoveNext()) {
    44         T current = enumerator.Current;
    45         if (current.CompareTo(minimum) < 0) {
    46           minimum = current;
    47         }
    48       }
    49 
    50       return minimum;
    51     }
    52 
    53     /// <summary>
    54     /// Maximum returns the largest entry of values.
    55     /// Throws an exception if values is empty.
    56     /// </summary>
    57     /// <typeparam name="T"></typeparam>
    58     /// <param name="values"></param>
    59     /// <returns></returns>
    60     public static T Maximum<T>(IEnumerable<T> values) where T : struct, IComparable, IComparable<T> {
    61       IEnumerator<T> enumerator = values.GetEnumerator();
    62 
    63       // this will throw an exception if the values collection is empty
    64       enumerator.MoveNext();
    65       T maximum = enumerator.Current;
    66 
    67       while (enumerator.MoveNext()) {
    68         T current = enumerator.Current;
    69         if (current.CompareTo(maximum) > 0) {
    70           maximum = current;
    71         }
    72       }
    73 
    74       return maximum;
    75     }
    76 
    77     /// <summary>
    78     /// Range calculates the difference between the larges and smallest entry of values.
     30    /// Calculates the median element of the enumeration.
    7931    /// </summary>
    8032    /// <param name="values"></param>
    8133    /// <returns></returns>
    82     public static double Range(double[] values) {
    83       return Range(values, 0, values.Length);
    84     }
     34    public static double Median(this IEnumerable<double> values) {
     35      int n = values.Count();
     36      if (n == 0) throw new InvalidOperationException("Enumeration contains no elements.");
    8537
    86     /// <summary>
    87     /// Range calculates the difference between the larges and smallest entry of values.
    88     /// </summary>
    89     public static double Range(List<double> values) {
    90       return Range(values.ToArray(), 0, values.Count);
    91     }
     38      double[] sortedValues = new double[n];
     39      int i = 0;
     40      foreach (double x in values)
     41        sortedValues[i++] = x;
    9242
    93     /// <summary>
    94     /// Range calculates the difference between the largest and smallest entry of values between start and end.
    95     /// </summary>
    96     /// <param name="values">collection of values</param>
    97     /// <param name="start">start index (inclusive)</param>
    98     /// <param name="end">end index (exclusive)</param>
    99     /// <returns></returns>
    100     public static double Range(double[] values, int start, int end) {
    101       if (start < 0 || start > values.Length || end < 0 || end > values.Length || start > end) {
    102         throw new InvalidOperationException();
    103       }
    104 
    105       double minimum = double.PositiveInfinity;
    106       double maximum = double.NegativeInfinity;
    107       for (int i = start; i < end; i++) {
    108         if (!double.IsNaN(values[i])) {
    109           if (values[i] > maximum) {
    110             maximum = values[i];
    111           }
    112           if (values[i] < minimum) {
    113             minimum = values[i];
    114           }
    115         }
    116       }
    117       return (maximum - minimum);
    118     }
    119 
    120     /// <summary>
    121     /// Calculates the mean of all values.
    122     /// </summary>
    123     /// <param name="values"></param>
    124     /// <returns></returns>
    125     public static double Mean(List<double> values) {
    126       return Mean(values.ToArray(), 0, values.Count);
    127     }
    128 
    129     // Calculates the mean of all values.
    130     public static double Mean(double[] values) {
    131       return Mean(values, 0, values.Length);
    132     }
    133 
    134     /// <summary>
    135     /// Calculates the mean of the values between start and end.
    136     /// </summary>
    137     /// <param name="values"></param>
    138     /// <param name="start">start index (inclusive)</param>
    139     /// <param name="end">end index(exclusive)</param>
    140     /// <returns></returns>
    141     public static double Mean(double[] values, int start, int end) {
    142       if (values.Length == 0) throw new ArgumentException("Values is empty.");
    143       if(end <=start) throw new ArgumentException("End is smaller or equal start");
    144       double sum = 0.0;
    145       int n = 0;
    146       for (int i = start; i < end; i++) {
    147         if (!double.IsNaN(values[i])) {
    148           sum += values[i];
    149           n++;
    150         }
    151       }
    152       if (n > 0)
    153         return sum / n;
    154       else throw new ArgumentException("Only NaN elements in values");
    155     }
    156 
    157     /// <summary>
    158     /// Calculates the median of the values.
    159     /// </summary>
    160     /// <param name="values"></param>
    161     /// <returns></returns>
    162     public static double Median(double[] values) {
    163       if (values.Length == 0) throw new InvalidOperationException();
    164       int n = values.Length;
    165       double[] sortedValues = new double[n];
    166 
    167       Array.Copy(values, sortedValues, n);
    16843      Array.Sort(sortedValues);
    16944
     
    17247        return sortedValues[n / 2];
    17348      } else {
    174         return (sortedValues[(n / 2)-1] + sortedValues[n / 2 ]) / 2.0;
     49        return (sortedValues[(n / 2) - 1] + sortedValues[n / 2]) / 2.0;
    17550      }
    17651    }
     
    18257    /// <param name="values"></param>
    18358    /// <returns></returns>
    184     public static double StandardDeviation(double[] values) {
     59    public static double StandardDeviation(this IEnumerable<double> values) {
    18560      return Math.Sqrt(Variance(values));
    18661    }
    18762
    18863    /// <summary>
    189     /// Calculates the variance of values.
     64    /// Calculates the variance of values. (sum (x - x_mean)² / n)
    19065    /// </summary>
    19166    /// <param name="values"></param>
    19267    /// <returns></returns>
    193     public static double Variance(double[] values) {
    194       return Variance(values, 0, values.Length);
    195     }
     68    public static double Variance(this IEnumerable<double> values) {
     69      IList<double> list = values as IList<double>;
     70      if (list == null) {
     71        list = values.ToList();
     72      }
     73      if (list.Count == 0) throw new ArgumentException("Enumeration contains no elements.");
    19674
    197 
    198     /// <summary>
    199     /// Calculates the variance of the entries of values between start and end.
    200     /// </summary>
    201     /// <param name="values"></param>
    202     /// <param name="start">start index (inclusive)</param>
    203     /// <param name="end">end index (exclusive)</param>
    204     /// <returns></returns>
    205     public static double Variance(double[] values, int start, int end) {
    206       if (values.Length == 0) throw new ArgumentException("Values is empty.");
    207       if (end <= start) throw new ArgumentException("End is smaller or equal start");
    208       if (end - start == 1)
    209         return 0.0;
    210 
    211       double mean = Mean(values, start, end);
     75      double mean = list.Average();
    21276      double squaredErrorsSum = 0.0;
    213 
    214       int n = 0;
    215       for (int i = start; i < end; i++) {
    216         if (!double.IsNaN(values[i])) {
    217           double d = values[i] - mean;
     77      int n = list.Count;
     78      int s = 0;
     79      for (int i = 0; i < n; i++) {
     80        if (!double.IsNaN(list[i])) {
     81          double d = list[i] - mean;
    21882          squaredErrorsSum += d * d;
    219           n++;
     83          s++;
    22084        }
    22185      }
    222       if (n < 2) {
    223         throw new ArgumentException("Only one non-NaN element in values");
     86      if (s == 0) {
     87        throw new ArgumentException("Enumeration contains no non-NaN elements.");
    22488      }
    225       return squaredErrorsSum / (n - 1);
     89      return squaredErrorsSum / n;
    22690    }
    22791  }
Note: See TracChangeset for help on using the changeset viewer.