Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
09/01/09 11:09:50 (15 years ago)
Author:
gkronber
Message:

Applied patch from mkommend for variable impact calculators and adapted data-modeling algorithms to use the new operators for variable impact calculation. #728

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Modeling/3.2/VariableEvaluationImpactCalculator.cs

    r2226 r2319  
    3030
    3131namespace HeuristicLab.Modeling {
    32   public abstract class VariableEvaluationImpactCalculator : VariableImpactCalculatorBase<double[]> {
    33     public override string OutputVariableName {
    34       get { return "VariableEvaluationImpacts"; }
     32  public class VariableEvaluationImpactCalculator : OperatorBase {
     33
     34    public VariableEvaluationImpactCalculator()
     35      : base() {
     36      AddVariableInfo(new VariableInfo("Predictor", "The predictor used to evaluate the model", typeof(IPredictor), VariableKind.In));
     37      AddVariableInfo(new VariableInfo("Dataset", "Dataset", typeof(Dataset), VariableKind.In));
     38      AddVariableInfo(new VariableInfo("TargetVariable", "TargetVariable", typeof(IntData), VariableKind.In));
     39      AddVariableInfo(new VariableInfo("InputVariableNames", "Names of used variables in the model (optional)", typeof(ItemList<StringData>), VariableKind.In));
     40      AddVariableInfo(new VariableInfo("SamplesStart", "SamplesStart", typeof(IntData), VariableKind.In));
     41      AddVariableInfo(new VariableInfo("SamplesEnd", "SamplesEnd", typeof(IntData), VariableKind.In));
     42      AddVariableInfo(new VariableInfo("VariableEvaluationImpacts", "VariableEvaluationImpacts", typeof(ItemList), VariableKind.New));
    3543    }
    3644
     
    3947    }
    4048
    41     private double[,] CombineOutputs(double[] referenceOutputs, double[] newOutputs) {
    42       if (referenceOutputs.Length != newOutputs.Length) throw new InvalidProgramException();
    43       double[,] result = new double[referenceOutputs.Length, 2];
    44       for (int i = 0; i < referenceOutputs.Length; i++) {
    45         result[i, 0] = referenceOutputs[i];
    46         result[i, 1] = newOutputs[i];
     49    public override IOperation Apply(IScope scope) {
     50      IPredictor predictor = GetVariableValue<IPredictor>("Predictor", scope, true);
     51      Dataset dataset = GetVariableValue<Dataset>("Dataset", scope, true);
     52      int targetVariable = GetVariableValue<IntData>("TargetVariable", scope, true).Data;
     53      string targetVariableName = dataset.GetVariableName(targetVariable);
     54      ItemList<StringData> inputVariableNames = GetVariableValue<ItemList<StringData>>("InputVariableNames", scope, true, false);
     55      int start = GetVariableValue<IntData>("SamplesStart", scope, true).Data;
     56      int end = GetVariableValue<IntData>("SamplesEnd", scope, true).Data;
     57
     58      Dictionary<string, double> evaluationImpacts;
     59      if (inputVariableNames == null)
     60        evaluationImpacts = Calculate(dataset, predictor, targetVariableName, start, end);
     61      else
     62        evaluationImpacts = Calculate(dataset, predictor, targetVariableName, inputVariableNames.Select(iv => iv.Data), start, end);
     63
     64      ItemList variableImpacts = new ItemList();
     65      foreach (KeyValuePair<string, double> p in evaluationImpacts) {
     66        if (p.Key != targetVariableName) {
     67          ItemList row = new ItemList();
     68          row.Add(new StringData(p.Key));
     69          row.Add(new DoubleData(p.Value));
     70          variableImpacts.Add(row);
     71        }
    4772      }
    48       return result;
     73
     74      scope.AddVariable(new Variable(scope.TranslateName("VariableEvaluationImpacts"), variableImpacts));
     75      return null;
     76
    4977    }
    5078
    51     protected override double CalculateImpact(double[] referenceValue, double[] newValue) {
     79    public static Dictionary<string, double> Calculate(Dataset dataset, IPredictor predictor, string targetVariableName, int start, int end) {
     80      return Calculate(dataset, predictor, targetVariableName, null, start, end);
     81    }
     82
     83
     84    public static Dictionary<string, double> Calculate(Dataset dataset, IPredictor predictor, string targetVariableName, IEnumerable<string> inputVariableNames, int start, int end) {
     85      Dictionary<string, double> evaluationImpacts = new Dictionary<string, double>();
     86      Dataset dirtyDataset = (Dataset)dataset.Clone();
     87      double[] referenceValues = predictor.Predict(dataset, start, end);
     88
     89      double mean;
     90      IEnumerable<double> oldValues;
     91      double[] newValues;
     92      IEnumerable<string> variables;
     93      if (inputVariableNames != null)
     94        variables = inputVariableNames;
     95      else
     96        variables = dataset.VariableNames;
     97
     98      foreach (string variableName in variables) {
     99        if (variableName != targetVariableName) {
     100          mean = dataset.GetMean(variableName, start, end);
     101          oldValues = dirtyDataset.ReplaceVariableValues(variableName, Enumerable.Repeat(mean, end - start), start, end);
     102          newValues = predictor.Predict(dirtyDataset, start, end);
     103          evaluationImpacts[variableName] = CalculateMSE(referenceValues, newValues);
     104          dirtyDataset.ReplaceVariableValues(variableName, oldValues, start, end);
     105        }
     106      }
     107
     108      double impactsSum = evaluationImpacts.Values.Sum();
     109      if (impactsSum.IsAlmost(0.0)) impactsSum = 1.0;
     110      foreach (KeyValuePair<string, double> p in evaluationImpacts.ToList())
     111        evaluationImpacts[p.Key] = p.Value / impactsSum;
     112
     113      return evaluationImpacts;
     114    }
     115
     116    private static double CalculateMSE(double[] referenceValues, double[] newValues) {
    52117      try {
    53         return SimpleMSEEvaluator.Calculate(CombineOutputs(referenceValue, newValue));
     118        return SimpleMSEEvaluator.Calculate(MatrixCreator<double>.CreateMatrix(referenceValues, newValues));
    54119      }
    55120      catch (ArgumentException) {
     
    57122      }
    58123    }
    59 
    60     protected override double[] CalculateValue(IScope scope, Dataset dataset, int targetVariable, int start, int end) {
    61       return GetOutputs(scope, dataset, targetVariable, start, end);
    62     }
    63 
    64     protected override double[] PostProcessImpacts(double[] impacts) {
    65       double mseSum = impacts.Sum();
    66       if (mseSum.IsAlmost(0.0)) mseSum = 1.0;
    67       for (int i = 0; i < impacts.Length; i++) {
    68         impacts[i] = impacts[i] / mseSum;
    69       }
    70       return impacts;
    71     }
    72 
    73     private bool IsAlmost(double x, double y) {
    74       return Math.Abs(x - y) < 1.0E-12;
    75     }
    76 
    77     protected abstract double[] GetOutputs(IScope scope, Dataset dataset, int targetVariable, int start, int end);
    78124  }
    79125}
Note: See TracChangeset for help on using the changeset viewer.