Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Modeling/3.2/VariableQualityImpactCalculator.cs @ 2420

Last change on this file since 2420 was 2379, checked in by gkronber, 15 years ago

Implemented additional model quality metrics. #761

File size: 6.0 KB
RevLine 
[2034]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2008 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Text;
25using System.Xml;
26using HeuristicLab.Core;
27using HeuristicLab.Data;
28using HeuristicLab.DataAnalysis;
29using System.Linq;
30
31namespace HeuristicLab.Modeling {
[2319]32  public class VariableQualityImpactCalculator : OperatorBase {
33
34    public VariableQualityImpactCalculator()
35      : base() {
36      AddVariableInfo(new VariableInfo("Predictor", "The predictor used to evaluate the model", typeof(IPredictor), VariableKind.In));
37      AddVariableInfo(new VariableInfo("Dataset", "Dataset", typeof(Dataset), VariableKind.In));
38      AddVariableInfo(new VariableInfo("TargetVariable", "TargetVariable", typeof(IntData), VariableKind.In));
39      AddVariableInfo(new VariableInfo("InputVariableNames", "Names of used variables in the model (optional)", typeof(ItemList<StringData>), VariableKind.In));
40      AddVariableInfo(new VariableInfo("SamplesStart", "SamplesStart", typeof(IntData), VariableKind.In));
41      AddVariableInfo(new VariableInfo("SamplesEnd", "SamplesEnd", typeof(IntData), VariableKind.In));
[2374]42      AddVariableInfo(new VariableInfo(ModelingResult.VariableQualityImpact.ToString(), "VariableQualityImpacts", typeof(ItemList), VariableKind.New));
[2319]43    }
44
[2034]45    public override string Description {
46      get { return @"Calculates the impact of all allowed input variables on the quality of the model using evaluator supplied as suboperator."; }
47    }
48
[2319]49    public override IOperation Apply(IScope scope) {
50      IPredictor predictor = GetVariableValue<IPredictor>("Predictor", scope, true);
51      Dataset dataset = GetVariableValue<Dataset>("Dataset", scope, true);
52      int targetVariable = GetVariableValue<IntData>("TargetVariable", scope, true).Data;
53      string targetVariableName = dataset.GetVariableName(targetVariable);
54      ItemList<StringData> inputVariableNames = GetVariableValue<ItemList<StringData>>("InputVariableNames", scope, true, false);
55      int start = GetVariableValue<IntData>("SamplesStart", scope, true).Data;
56      int end = GetVariableValue<IntData>("SamplesEnd", scope, true).Data;
57
58      Dictionary<string, double> qualityImpacts;
59      if (inputVariableNames == null)
60        qualityImpacts = Calculate(dataset, predictor, targetVariableName, start, end);
61      else
62        qualityImpacts = Calculate(dataset, predictor, targetVariableName, inputVariableNames.Select(iv => iv.Data), start, end);
63
64      ItemList variableImpacts = new ItemList();
65      foreach (KeyValuePair<string, double> p in qualityImpacts) {
66        if (p.Key != targetVariableName) {
67          ItemList row = new ItemList();
68          row.Add(new StringData(p.Key));
69          row.Add(new DoubleData(p.Value));
70          variableImpacts.Add(row);
71        }
72      }
73
[2374]74      scope.AddVariable(new Variable(scope.TranslateName(ModelingResult.VariableQualityImpact.ToString()), variableImpacts));
[2319]75      return null;
[2034]76    }
77
[2319]78    public static Dictionary<string, double> Calculate(Dataset dataset, IPredictor predictor, string targetVariableName, int start, int end) {
79      return Calculate(dataset, predictor, targetVariableName, null, start, end);
80    }
81
82    public static Dictionary<string, double> Calculate(Dataset dataset, IPredictor predictor, string targetVariableName, IEnumerable<string> inputVariableNames, int start, int end) {
83      Dictionary<string, double> evaluationImpacts = new Dictionary<string, double>();
84      Dataset dirtyDataset = (Dataset)dataset.Clone();
85
86      double[] predictedValues = predictor.Predict(dataset, start, end);
87      double[] targetValues = dataset.GetVariableValues(targetVariableName, start, end);
88
[2357]89      double oldMSE = CalculateMSE(targetValues, predictedValues);
[2319]90      double newMSE;
91
92      double mean;
93      IEnumerable<double> oldValues;
94      IEnumerable<string> variables;
95      if (inputVariableNames != null)
96        variables = inputVariableNames;
97      else
98        variables = dataset.VariableNames;
99
100      foreach (string variableName in variables) {
[2368]101        if (dataset.CountMissingValues(variableName, start, end) < (end - start) &&
102          dataset.GetRange(variableName, start, end) > 0.0 &&
103          variableName != targetVariableName) {
[2319]104          mean = dataset.GetMean(variableName, start, end);
105          oldValues = dirtyDataset.ReplaceVariableValues(variableName, Enumerable.Repeat(mean, end - start), start, end);
106          predictedValues = predictor.Predict(dirtyDataset, start, end);
107          newMSE = CalculateMSE(predictedValues, targetValues);
108          evaluationImpacts[variableName] = newMSE / oldMSE;
109          dirtyDataset.ReplaceVariableValues(variableName, oldValues, start, end);
[2368]110        } else {
111          evaluationImpacts[variableName] = 1.0;
[2319]112        }
113      }
114
115      return evaluationImpacts;
116    }
117
118    private static double CalculateImpact(double referenceValue, double newValue) {
[2041]119      return newValue / referenceValue;
[2034]120    }
121
[2319]122    private static double CalculateMSE(double[] referenceValues, double[] newValues) {
123      try {
[2379]124        return SimpleMSEEvaluator.Calculate(Matrix<double>.Create(referenceValues, newValues));
[2319]125      }
126      catch (ArgumentException) {
127        return double.PositiveInfinity;
128      }
[2034]129    }
130  }
131}
Note: See TracBrowser for help on using the repository browser.