Changeset 2041


Ignore:
Timestamp:
06/10/09 19:05:34 (12 years ago)
Author:
gkronber
Message:

Implemented base classes for variable impact analysis and implemented specific operators for GP. #644 (Variable impact of CEDMA models should be calculated and stored in the result DB)

Location:
trunk/sources
Files:
4 added
7 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.GP.StructureIdentification/3.3/AlgorithmBase.cs

    r2034 r2041  
    423423      model.TrainingMeanSquaredError = bestModelScope.GetVariableValue<DoubleData>("Quality", false).Data;
    424424      model.ValidationMeanSquaredError = bestModelScope.GetVariableValue<DoubleData>("ValidationQuality", false).Data;
     425      // calculate and set variable impacts
     426      VariableEvaluationImpactCalculator evaluationImpactCalculator = new VariableEvaluationImpactCalculator();
     427      VariableQualityImpactCalculator qualityImpactCalculator = new VariableQualityImpactCalculator();
     428
     429      evaluationImpactCalculator.Apply(bestModelScope);
     430      qualityImpactCalculator.Apply(bestModelScope);
     431
     432      ItemList evaluationImpacts = bestModelScope.GetVariableValue<ItemList>("VariableEvaluationImpacts", false);
     433      ItemList qualityImpacts = bestModelScope.GetVariableValue<ItemList>("VariableQualityImpacts", false);
     434      foreach (ItemList row in evaluationImpacts) {
     435        string variableName = ((StringData)row[0]).Data;
     436        double impact = ((DoubleData)row[0]).Data;
     437        model.SetVariableEvaluationImpact(variableName, impact);
     438      }
     439      foreach (ItemList row in qualityImpacts) {
     440        string variableName = ((StringData)row[0]).Data;
     441        double impact = ((DoubleData)row[0]).Data;
     442        model.SetVariableQualityImpact(variableName, impact);
     443      }
    425444      return model;
    426445    }
  • trunk/sources/HeuristicLab.GP.StructureIdentification/3.3/Evaluators/SimpleEvaluator.cs

    r1891 r2041  
    3333    public SimpleEvaluator()
    3434      : base() {
    35       AddVariableInfo(new VariableInfo("Values", "The values of the target variable as predicted by the model and the original value of the target variable", typeof(ItemList), VariableKind.New | VariableKind.Out));
     35      AddVariableInfo(new VariableInfo("Values", "Target vs. predicted values", typeof(DoubleMatrixData), VariableKind.New | VariableKind.Out));
    3636    }
    3737
    3838    public override void Evaluate(IScope scope, ITreeEvaluator evaluator, Dataset dataset, int targetVariable, int start, int end, bool updateTargetValues) {
    39       ItemList values = GetVariableValue<ItemList>("Values", scope, false, false);
     39      DoubleMatrixData values = GetVariableValue<DoubleMatrixData>("Values", scope, false, false);
    4040      if (values == null) {
    41         values = new ItemList();
     41        values = new DoubleMatrixData();
    4242        IVariableInfo info = GetVariableInfo("Values");
    4343        if (info.Local)
     
    4646          scope.AddVariable(new HeuristicLab.Core.Variable(scope.TranslateName(info.FormalName), values));
    4747      }
    48       values.Clear();
     48
     49      double[,] v = new double[end - start, 2];
    4950
    5051      for (int sample = start; sample < end; sample++) {
    51         ItemList row = new ItemList();
    5252        double estimated = evaluator.Evaluate(sample);
    5353        double original = dataset.GetValue(sample, targetVariable);
     
    5555          dataset.SetValue(sample, targetVariable, estimated);
    5656        }
    57         row.Add(new DoubleData(estimated));
    58         row.Add(new DoubleData(original));
    59         values.Add(row);
     57        v[sample - start, 0] = original;
     58        v[sample - start, 1] = estimated;
    6059      }
     60      values.Data = v;
    6161    }
    6262  }
  • trunk/sources/HeuristicLab.GP.StructureIdentification/3.3/HeuristicLab.GP.StructureIdentification-3.3.csproj

    r2034 r2041  
    140140    <Compile Include="Tangens.cs" />
    141141    <Compile Include="Variable.cs" />
     142    <Compile Include="Evaluators\VariableEvaluationImpactCalculator.cs" />
     143    <Compile Include="Evaluators\VariableQualityImpactCalculator.cs" />
    142144    <Compile Include="Xor.cs" />
    143145  </ItemGroup>
  • trunk/sources/HeuristicLab.Modeling/3.2/HeuristicLab.Modeling-3.2.csproj

    r2038 r2041  
    8383  <ItemGroup>
    8484    <Compile Include="ClassificationProblemInjector.cs" />
     85    <Compile Include="VariableImpactCalculatorBase.cs" />
     86    <Compile Include="VariableEvaluationImpactCalculator.cs" />
    8587    <Compile Include="Model.cs" />
    8688    <Compile Include="IModel.cs" />
  • trunk/sources/HeuristicLab.Modeling/3.2/IModel.cs

    r2034 r2041  
    4545    double ValidationVarianceAccountedFor { get; }
    4646    double TestVarianceAccountedFor { get; }
    47     double GetVariableImpact(string variableName);
     47    double GetVariableEvaluationImpact(string variableName);
     48    double GetVariableQualityImpact(string variableName);
    4849
    4950    IItem Data { get; }
  • trunk/sources/HeuristicLab.Modeling/3.2/Model.cs

    r2034 r2041  
    120120    }
    121121
    122     public double GetVariableImpact(string variableName) {
    123       if (variableImpacts.ContainsKey(variableName)) return variableImpacts[variableName];
     122    public double GetVariableQualityImpact(string variableName) {
     123      if (variableQualityImpacts.ContainsKey(variableName)) return variableQualityImpacts[variableName];
     124      else return 1.0;
     125    }
     126
     127    public double GetVariableEvaluationImpact(string variableName) {
     128      if (variableEvaluationImpacts.ContainsKey(variableName)) return variableEvaluationImpacts[variableName];
    124129      else return 0.0;
    125130    }
     
    133138    #endregion
    134139
    135     private Dictionary<string, double> variableImpacts = new Dictionary<string, double>();
    136     public void SetVariableImpact(string variableName, double impact) {
    137       variableImpacts[variableName] = impact;
     140    private Dictionary<string, double> variableQualityImpacts = new Dictionary<string, double>();
     141    public void SetVariableQualityImpact(string variableName, double impact) {
     142      variableQualityImpacts[variableName] = impact;
    138143    }
    139144
    140     public void SetVariableImpact(int variableIndex, double impact) {
    141       variableImpacts[dataset.GetVariableName(variableIndex)] = impact;
     145    public void SetVariableQualityImpact(int variableIndex, double impact) {
     146      variableQualityImpacts[dataset.GetVariableName(variableIndex)] = impact;
     147    }
     148
     149    private Dictionary<string, double> variableEvaluationImpacts = new Dictionary<string, double>();
     150    public void SetVariableEvaluationImpact(string variableName, double impact) {
     151      variableEvaluationImpacts[variableName] = impact;
     152    }
     153
     154    public void SetVariableEvaluationImpact(int variableIndex, double impact) {
     155      variableEvaluationImpacts[dataset.GetVariableName(variableIndex)] = impact;
    142156    }
    143157  }
  • trunk/sources/HeuristicLab.Modeling/3.2/VariableQualityImpactCalculator.cs

    r2038 r2041  
    3030
    3131namespace HeuristicLab.Modeling {
    32   public class VariableQualityImpactCalculator : OperatorBase {
     32  public abstract class VariableQualityImpactCalculator : VariableImpactCalculatorBase<double> {
    3333    public override string Description {
    3434      get { return @"Calculates the impact of all allowed input variables on the quality of the model using evaluator supplied as suboperator."; }
    3535    }
    3636
    37     public VariableQualityImpactCalculator()
    38       : base() {
    39       AddVariableInfo(new VariableInfo("Dataset", "Dataset", typeof(Dataset), VariableKind.In));
    40       AddVariableInfo(new VariableInfo("TargetVariable", "TargetVariable", typeof(IntData), VariableKind.In));
    41       AddVariableInfo(new VariableInfo("AllowedFeatures", "Indexes of allowed input variables", typeof(ItemList<IntData>), VariableKind.In));
    42       AddVariableInfo(new VariableInfo("TrainingSamplesStart", "TrainingSamplesStart", typeof(IntData), VariableKind.In));
    43       AddVariableInfo(new VariableInfo("TrainingSamplesEnd", "TrainingSamplesEnd", typeof(IntData), VariableKind.In));
    44       AddVariableInfo(new VariableInfo("VariableQualityImpacts", "Effect on quality of model (percentage of original quality) if variable is replaced by its mean.", typeof(ItemList), VariableKind.New));
     37    public override string OutputVariableName {
     38      get { return "VariableQualityImpacts"; }
    4539    }
    4640
    47     public override IOperation Apply(IScope scope) {
    48       ItemList<IntData> allowedFeatures = GetVariableValue<ItemList<IntData>>("AllowedFeatures", scope, true);
    49       int targetVariable = GetVariableValue<IntData>("TargetVariable", scope, true).Data;
    50       Dataset dataset = GetVariableValue<Dataset>("Dataset", scope, true);
    51       Dataset dirtyDataset = (Dataset)dataset.Clone();
    52       int start = GetVariableValue<IntData>("TrainingSamplesStart", scope, true).Data;
    53       int end = GetVariableValue<IntData>("TrainingSamplesEnd", scope, true).Data;
    54 
    55       if (SubOperators.Count < 1) throw new InvalidOperationException("VariableQualityImpactCalculator needs a suboperator to evaluate the model");
    56       IOperator evaluationOperator = this.SubOperators[0];
    57       ItemList variableQualityImpacts = new ItemList();
    58 
    59       // calculateReferenceQuality
    60       double referenceQuality = CalculateQuality(scope, dataset, evaluationOperator);
    61 
    62       for (int i = 0; i < allowedFeatures.Count; i++) {
    63         int currentVariable = allowedFeatures[i].Data;
    64         var oldValues = ReplaceVariableValues(dirtyDataset, currentVariable , CalculateNewValues(dirtyDataset, currentVariable, start, end), start, end);
    65         double newQuality = CalculateQuality(scope, dirtyDataset, evaluationOperator);
    66         double ratio = newQuality / referenceQuality;
    67         ItemList row = new ItemList();
    68         row.Add(new StringData(dataset.GetVariableName(currentVariable)));
    69         row.Add(new DoubleData(ratio));
    70         variableQualityImpacts.Add(row);
    71         ReplaceVariableValues(dirtyDataset, currentVariable, oldValues, start, end);
    72       }
    73       scope.AddVariable(new Variable(scope.TranslateName("VariableQualityImpacts"), variableQualityImpacts));
    74       return null;
     41    protected override double CalculateImpact(double referenceValue, double newValue) {
     42      return newValue / referenceValue;
    7543    }
    7644
    77     private double CalculateQuality(IScope scope, Dataset dataset, IOperator evaluationOperator) {
    78       Scope s = new Scope();
    79       s.AddVariable(new Variable("Dataset", dataset));
    80       scope.AddSubScope(s);
    81       evaluationOperator.Execute(s);
    82       double quality = s.GetVariableValue<DoubleData>("Quality", false).Data;
    83       scope.RemoveSubScope(s);
    84       return quality;
     45    protected override double CalculateValue(IScope scope, Dataset dataset, int targetVariable, int start, int end) {
     46      return CalculateQuality(scope, dataset, targetVariable, start, end);
    8547    }
    8648
    87     private IEnumerable<double> ReplaceVariableValues(Dataset ds, int variableIndex, IEnumerable<double> newValues, int start, int end) {
    88       double[] oldValues = new double[end - start];
    89       for (int i = 0; i < end - start; i++) oldValues[i] = ds.GetValue(i + start, variableIndex);
    90       if (newValues.Count() != end - start) throw new ArgumentException("The length of the new values sequence doesn't match the required length (number of replaced values)");
    91 
    92       int index = start;
    93       ds.FireChangeEvents = false;
    94       foreach(double v in newValues) {
    95         ds.SetValue(index++, variableIndex, v);
    96       }
    97       ds.FireChangeEvents = true;
    98       ds.FireChanged();
    99       return oldValues;
    100     }
    101 
    102     private IEnumerable<double> CalculateNewValues(Dataset ds, int variableIndex, int start, int end) {
    103       double mean = ds.GetMean(variableIndex, start, end);
    104       return Enumerable.Repeat(mean, end - start);
    105     }
     49    protected abstract double CalculateQuality(IScope scope, Dataset dataset, int targetVariable, int start, int end);
    10650  }
    10751}
Note: See TracChangeset for help on using the changeset viewer.