Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
06/10/09 19:05:34 (16 years ago)
Author:
gkronber
Message:

Implemented base classes for variable impact analysis and implemented specific operators for GP. #644 (Variable impact of CEDMA models should be calculated and stored in the result DB)

Location:
trunk/sources/HeuristicLab.Modeling/3.2
Files:
2 added
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Modeling/3.2/HeuristicLab.Modeling-3.2.csproj

    r2038 r2041  
    8383  <ItemGroup>
    8484    <Compile Include="ClassificationProblemInjector.cs" />
     85    <Compile Include="VariableImpactCalculatorBase.cs" />
     86    <Compile Include="VariableEvaluationImpactCalculator.cs" />
    8587    <Compile Include="Model.cs" />
    8688    <Compile Include="IModel.cs" />
  • trunk/sources/HeuristicLab.Modeling/3.2/IModel.cs

    r2034 r2041  
    4545    double ValidationVarianceAccountedFor { get; }
    4646    double TestVarianceAccountedFor { get; }
    47     double GetVariableImpact(string variableName);
     47    double GetVariableEvaluationImpact(string variableName);
     48    double GetVariableQualityImpact(string variableName);
    4849
    4950    IItem Data { get; }
  • trunk/sources/HeuristicLab.Modeling/3.2/Model.cs

    r2034 r2041  
    120120    }
    121121
    122     public double GetVariableImpact(string variableName) {
    123       if (variableImpacts.ContainsKey(variableName)) return variableImpacts[variableName];
     122    public double GetVariableQualityImpact(string variableName) {
     123      if (variableQualityImpacts.ContainsKey(variableName)) return variableQualityImpacts[variableName];
     124      else return 1.0;
     125    }
     126
     127    public double GetVariableEvaluationImpact(string variableName) {
     128      if (variableEvaluationImpacts.ContainsKey(variableName)) return variableEvaluationImpacts[variableName];
    124129      else return 0.0;
    125130    }
     
    133138    #endregion
    134139
    135     private Dictionary<string, double> variableImpacts = new Dictionary<string, double>();
    136     public void SetVariableImpact(string variableName, double impact) {
    137       variableImpacts[variableName] = impact;
     140    private Dictionary<string, double> variableQualityImpacts = new Dictionary<string, double>();
     141    public void SetVariableQualityImpact(string variableName, double impact) {
     142      variableQualityImpacts[variableName] = impact;
    138143    }
    139144
    140     public void SetVariableImpact(int variableIndex, double impact) {
    141       variableImpacts[dataset.GetVariableName(variableIndex)] = impact;
     145    public void SetVariableQualityImpact(int variableIndex, double impact) {
     146      variableQualityImpacts[dataset.GetVariableName(variableIndex)] = impact;
     147    }
     148
     149    private Dictionary<string, double> variableEvaluationImpacts = new Dictionary<string, double>();
     150    public void SetVariableEvaluationImpact(string variableName, double impact) {
     151      variableEvaluationImpacts[variableName] = impact;
     152    }
     153
     154    public void SetVariableEvaluationImpact(int variableIndex, double impact) {
     155      variableEvaluationImpacts[dataset.GetVariableName(variableIndex)] = impact;
    142156    }
    143157  }
  • trunk/sources/HeuristicLab.Modeling/3.2/VariableQualityImpactCalculator.cs

    r2038 r2041  
    3030
    3131namespace HeuristicLab.Modeling {
    32   public class VariableQualityImpactCalculator : OperatorBase {
     32  public abstract class VariableQualityImpactCalculator : VariableImpactCalculatorBase<double> {
    3333    public override string Description {
    3434      get { return @"Calculates the impact of all allowed input variables on the quality of the model using evaluator supplied as suboperator."; }
    3535    }
    3636
    37     public VariableQualityImpactCalculator()
    38       : base() {
    39       AddVariableInfo(new VariableInfo("Dataset", "Dataset", typeof(Dataset), VariableKind.In));
    40       AddVariableInfo(new VariableInfo("TargetVariable", "TargetVariable", typeof(IntData), VariableKind.In));
    41       AddVariableInfo(new VariableInfo("AllowedFeatures", "Indexes of allowed input variables", typeof(ItemList<IntData>), VariableKind.In));
    42       AddVariableInfo(new VariableInfo("TrainingSamplesStart", "TrainingSamplesStart", typeof(IntData), VariableKind.In));
    43       AddVariableInfo(new VariableInfo("TrainingSamplesEnd", "TrainingSamplesEnd", typeof(IntData), VariableKind.In));
    44       AddVariableInfo(new VariableInfo("VariableQualityImpacts", "Effect on quality of model (percentage of original quality) if variable is replaced by its mean.", typeof(ItemList), VariableKind.New));
     37    public override string OutputVariableName {
     38      get { return "VariableQualityImpacts"; }
    4539    }
    4640
    47     public override IOperation Apply(IScope scope) {
    48       ItemList<IntData> allowedFeatures = GetVariableValue<ItemList<IntData>>("AllowedFeatures", scope, true);
    49       int targetVariable = GetVariableValue<IntData>("TargetVariable", scope, true).Data;
    50       Dataset dataset = GetVariableValue<Dataset>("Dataset", scope, true);
    51       Dataset dirtyDataset = (Dataset)dataset.Clone();
    52       int start = GetVariableValue<IntData>("TrainingSamplesStart", scope, true).Data;
    53       int end = GetVariableValue<IntData>("TrainingSamplesEnd", scope, true).Data;
    54 
    55       if (SubOperators.Count < 1) throw new InvalidOperationException("VariableQualityImpactCalculator needs a suboperator to evaluate the model");
    56       IOperator evaluationOperator = this.SubOperators[0];
    57       ItemList variableQualityImpacts = new ItemList();
    58 
    59       // calculateReferenceQuality
    60       double referenceQuality = CalculateQuality(scope, dataset, evaluationOperator);
    61 
    62       for (int i = 0; i < allowedFeatures.Count; i++) {
    63         int currentVariable = allowedFeatures[i].Data;
    64         var oldValues = ReplaceVariableValues(dirtyDataset, currentVariable , CalculateNewValues(dirtyDataset, currentVariable, start, end), start, end);
    65         double newQuality = CalculateQuality(scope, dirtyDataset, evaluationOperator);
    66         double ratio = newQuality / referenceQuality;
    67         ItemList row = new ItemList();
    68         row.Add(new StringData(dataset.GetVariableName(currentVariable)));
    69         row.Add(new DoubleData(ratio));
    70         variableQualityImpacts.Add(row);
    71         ReplaceVariableValues(dirtyDataset, currentVariable, oldValues, start, end);
    72       }
    73       scope.AddVariable(new Variable(scope.TranslateName("VariableQualityImpacts"), variableQualityImpacts));
    74       return null;
     41    protected override double CalculateImpact(double referenceValue, double newValue) {
     42      return newValue / referenceValue;
    7543    }
    7644
    77     private double CalculateQuality(IScope scope, Dataset dataset, IOperator evaluationOperator) {
    78       Scope s = new Scope();
    79       s.AddVariable(new Variable("Dataset", dataset));
    80       scope.AddSubScope(s);
    81       evaluationOperator.Execute(s);
    82       double quality = s.GetVariableValue<DoubleData>("Quality", false).Data;
    83       scope.RemoveSubScope(s);
    84       return quality;
     45    protected override double CalculateValue(IScope scope, Dataset dataset, int targetVariable, int start, int end) {
     46      return CalculateQuality(scope, dataset, targetVariable, start, end);
    8547    }
    8648
    87     private IEnumerable<double> ReplaceVariableValues(Dataset ds, int variableIndex, IEnumerable<double> newValues, int start, int end) {
    88       double[] oldValues = new double[end - start];
    89       for (int i = 0; i < end - start; i++) oldValues[i] = ds.GetValue(i + start, variableIndex);
    90       if (newValues.Count() != end - start) throw new ArgumentException("The length of the new values sequence doesn't match the required length (number of replaced values)");
    91 
    92       int index = start;
    93       ds.FireChangeEvents = false;
    94       foreach(double v in newValues) {
    95         ds.SetValue(index++, variableIndex, v);
    96       }
    97       ds.FireChangeEvents = true;
    98       ds.FireChanged();
    99       return oldValues;
    100     }
    101 
    102     private IEnumerable<double> CalculateNewValues(Dataset ds, int variableIndex, int start, int end) {
    103       double mean = ds.GetMean(variableIndex, start, end);
    104       return Enumerable.Repeat(mean, end - start);
    105     }
     49    protected abstract double CalculateQuality(IScope scope, Dataset dataset, int targetVariable, int start, int end);
    10650  }
    10751}
Note: See TracChangeset for help on using the changeset viewer.