Free cookie consent management tool by TermsFeed Policy Generator

Changeset 2319 for trunk/sources


Ignore:
Timestamp:
09/01/09 11:09:50 (15 years ago)
Author:
gkronber
Message:

Applied patch from mkommend for variable impact calculators and adapted data-modeling algorithms to use the new operators for variable impact calculation. #728

Location:
trunk/sources
Files:
3 added
5 deleted
14 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.DataAnalysis/3.2/Dataset.cs

    r2311 r2319  
    3131namespace HeuristicLab.DataAnalysis {
    3232  public sealed class Dataset : ItemBase {
    33 
    34     private string name;
    35     private double[] samples;
    36     private int rows;
    37     private int columns;
    3833    private Dictionary<int, Dictionary<int, double>>[] cachedMeans;
    3934    private Dictionary<int, Dictionary<int, double>>[] cachedRanges;
    40     private double[] scalingFactor;
    41     private double[] scalingOffset;
    4235    private bool cachedValuesInvalidated = true;
    43 
    44     private bool fireChangeEvents = true;
    45     public bool FireChangeEvents {
    46       get { return fireChangeEvents; }
    47       set { fireChangeEvents = value; }
    48     }
    49 
     36   
     37    public Dataset()
     38      : this(new double[,] { { 0.0 } }) {
     39    }
     40
     41    public Dataset(double[,] samples) {
     42      Name = "-";
     43      Rows = samples.GetLength(0);
     44      Columns = samples.GetLength(1);
     45      double[] values = new double[Rows * Columns];
     46      int i = 0;
     47      for (int row = 0; row < Rows; row++) {
     48        for (int column = 0; column < columns; column++) {
     49          values[i++] = samples[row, column];
     50        }
     51      }
     52      Samples = values;
     53      fireChangeEvents = true;
     54    }
     55
     56    #region Properties
     57    private string name;
    5058    public string Name {
    5159      get { return name; }
     
    5361    }
    5462
     63    private int rows;
    5564    public int Rows {
    5665      get { return rows; }
     
    5867    }
    5968
     69    private int columns;
    6070    public int Columns {
    6171      get { return columns; }
     
    6878    }
    6979
    70     public double[] ScalingFactor {
    71       get { return scalingFactor; }
    72       set {
    73         if (value.Length != scalingFactor.Length)
    74           throw new ArgumentException("Length of scaling factor array doesn't match number of variables");
    75         scalingFactor = value;
    76       }
    77     }
    78     public double[] ScalingOffset {
    79       get { return scalingOffset; }
    80       set {
    81         if (value.Length != scalingOffset.Length)
    82           throw new ArgumentException("Length of scaling offset array doesn't match number of variables");
    83         scalingOffset = value;
    84       }
    85     }
    86 
    87     public double GetValue(int i, int j) {
    88       return samples[columns * i + j];
    89     }
    90 
    91     public void SetValue(int i, int j, double v) {
    92       if (v != samples[columns * i + j]) {
    93         samples[columns * i + j] = v;
    94         cachedValuesInvalidated = true;
    95         if (fireChangeEvents) FireChanged();
    96       }
    97     }
    98 
     80    private string[] variableNames;
     81    public IEnumerable<string> VariableNames {
     82      get { return variableNames; }
     83    }
     84
     85    private double[] samples;
    9986    public double[] Samples {
    10087      get { return samples; }
     
    113100    }
    114101
    115     private string[] variableNames;
    116     public IEnumerable<string> VariableNames {
    117       get { return variableNames; }
    118     }
    119 
    120     public Dataset()
    121       : this(new double[,] { { 0.0 } }) {
    122     }
    123 
    124     public Dataset(double[,] samples) {
    125       Name = "-";
    126       Rows = samples.GetLength(0);
    127       Columns = samples.GetLength(1);
    128       double[] values = new double[Rows * Columns];
    129       int i = 0;
    130       for (int row = 0; row < Rows; row++) {
    131         for (int column = 0; column < columns; column++) {
    132           values[i++] = samples[row, column];
    133         }
    134       }
    135       Samples = values;
    136       fireChangeEvents = true;
    137     }
    138 
    139 
    140     public string GetVariableName(int variableIndex) {
    141       return variableNames[variableIndex];
    142     }
    143 
    144     public int GetVariableIndex(string variableName) {
    145       for (int i = 0; i < variableNames.Length; i++) {
    146         if (variableNames[i].Equals(variableName)) return i;
    147       }
    148       throw new ArgumentException("The variable name " + variableName + " was not found.");
     102    private bool fireChangeEvents = true;
     103    public bool FireChangeEvents {
     104      get { return fireChangeEvents; }
     105      set { fireChangeEvents = value; }
     106    }
     107
     108    private double[] scalingFactor;
     109    public double[] ScalingFactor {
     110      get { return scalingFactor; }
     111      set {
     112        if (value.Length != scalingFactor.Length)
     113          throw new ArgumentException("Length of scaling factor array doesn't match number of variables");
     114        scalingFactor = value;
     115      }
     116    }
     117
     118    private double[] scalingOffset;
     119    public double[] ScalingOffset {
     120      get { return scalingOffset; }
     121      set {
     122        if (value.Length != scalingOffset.Length)
     123          throw new ArgumentException("Length of scaling offset array doesn't match number of variables");
     124        scalingOffset = value;
     125      }
     126    }
     127    #endregion
     128
     129    #region Modify and get values
     130    public double GetValue(int i, int j) {
     131      return samples[columns * i + j];
    149132    }
    150133
     
    173156    }
    174157
     158    public void SetValue(int i, int j, double v) {
     159      if (v != samples[columns * i + j]) {
     160        samples[columns * i + j] = v;
     161        cachedValuesInvalidated = true;
     162        if (fireChangeEvents) FireChanged();
     163      }
     164    }
     165
     166    public IEnumerable<double> ReplaceVariableValues(int variableIndex, IEnumerable<double> newValues, int start, int end) {
     167      double[] oldValues = new double[end - start];
     168      for (int i = 0; i < end - start; i++) oldValues[i] = this.GetValue(i + start, variableIndex);
     169      if (newValues.Count() != end - start) throw new ArgumentException("The length of the new values sequence doesn't match the required length (number of replaced values)");
     170
     171      int index = start;
     172      this.FireChangeEvents = false;
     173      foreach (double v in newValues) {
     174        this.SetValue(index++, variableIndex, v);
     175      }
     176      this.FireChangeEvents = true;
     177      this.FireChanged();
     178      return oldValues;
     179    }
     180
     181    public IEnumerable<double> ReplaceVariableValues(string variableName, IEnumerable<double> newValues, int start, int end) {
     182      return ReplaceVariableValues(this.GetVariableIndex(variableName), newValues, start, end);
     183    }
     184    #endregion
     185
     186    #region Variable name methods
     187    public string GetVariableName(int variableIndex) {
     188      return variableNames[variableIndex];
     189    }
     190
     191    public int GetVariableIndex(string variableName) {
     192      for (int i = 0; i < variableNames.Length; i++) {
     193        if (variableNames[i].Equals(variableName)) return i;
     194      }
     195      throw new ArgumentException("The variable name " + variableName + " was not found.");
     196    }
     197
    175198    public void SetVariableName(int variableIndex, string name) {
    176199      variableNames[variableIndex] = name;
     
    180203      return this.variableNames.Contains(variableName);
    181204    }
     205    #endregion
    182206
    183207    public override IView CreateView() {
    184208      return new DatasetView(this);
     209    }
     210
     211
     212    #region Variable statistics
     213    public double GetMean(string variableName) {
     214      return GetMean(GetVariableIndex(variableName));
     215    }
     216
     217    public double GetMean(string variableName, int start, int end) {
     218      return GetMean(GetVariableIndex(variableName), start, end);
     219    }
     220
     221    public double GetMean(int column) {
     222      return GetMean(column, 0, Rows);
     223    }
     224
     225    public double GetMean(int column, int start, int end) {
     226      if (cachedValuesInvalidated) CreateDictionaries();
     227      if (!cachedMeans[column].ContainsKey(start) || !cachedMeans[column][start].ContainsKey(end)) {
     228        double[] values = new double[end - start];
     229        for (int sample = start; sample < end; sample++) {
     230          values[sample - start] = GetValue(sample, column);
     231        }
     232        double mean = Statistics.Mean(values);
     233        if (!cachedMeans[column].ContainsKey(start)) cachedMeans[column][start] = new Dictionary<int, double>();
     234        cachedMeans[column][start][end] = mean;
     235        return mean;
     236      } else {
     237        return cachedMeans[column][start][end];
     238      }
     239    }
     240
     241    public double GetRange(string variableName) {
     242      return GetRange(this.GetVariableIndex(variableName));
     243    }
     244
     245    public double GetRange(int column) {
     246      return GetRange(column, 0, Rows);
     247    }
     248
     249    public double GetRange(string variableName, int start, int end) {
     250      return GetRange(this.GetVariableIndex(variableName), start, end);
     251    }
     252
     253    public double GetRange(int column, int start, int end) {
     254      if (cachedValuesInvalidated) CreateDictionaries();
     255      if (!cachedRanges[column].ContainsKey(start) || !cachedRanges[column][start].ContainsKey(end)) {
     256        double[] values = new double[end - start];
     257        for (int sample = start; sample < end; sample++) {
     258          values[sample - start] = GetValue(sample, column);
     259        }
     260        double range = Statistics.Range(values);
     261        if (!cachedRanges[column].ContainsKey(start)) cachedRanges[column][start]= new Dictionary<int, double>();
     262        cachedRanges[column][start][end] = range;
     263        return range;
     264      } else {
     265        return cachedRanges[column][start][end];
     266      }
     267    }
     268
     269    public double GetMaximum(string variableName) {
     270      return GetMaximum(this.GetVariableIndex(variableName));
     271    }
     272
     273    public double GetMaximum(int column) {
     274      return GetMaximum(column, 0, Rows);
     275    }
     276
     277    public double GetMaximum(string variableName, int start, int end) {
     278      return GetMaximum(this.GetVariableIndex(variableName), start, end);
     279    }
     280
     281    public double GetMaximum(int column, int start, int end) {
     282      double max = Double.NegativeInfinity;
     283      for (int i = start; i < end; i++) {
     284        double val = GetValue(i, column);
     285        if (!double.IsNaN(val) && val > max) max = val;
     286      }
     287      return max;
     288    }
     289
     290    public double GetMinimum(string variableName) {
     291      return GetMinimum(GetVariableIndex(variableName));
     292    }
     293
     294    public double GetMinimum(int column) {
     295      return GetMinimum(column, 0, Rows);
     296    }
     297
     298    public double GetMinimum(string variableName, int start, int end) {
     299      return GetMinimum(this.GetVariableIndex(variableName), start, end);
     300    }
     301
     302    public double GetMinimum(int column, int start, int end) {
     303      double min = Double.PositiveInfinity;
     304      for (int i = start; i < end; i++) {
     305        double val = GetValue(i, column);
     306        if (!double.IsNaN(val) && val < min) min = val;
     307      }
     308      return min;
     309    }
     310    #endregion
     311
     312    internal void ScaleVariable(int column) {
     313      if (scalingFactor[column] == 1.0 && scalingOffset[column] == 0.0) {
     314        double min = GetMinimum(column);
     315        double max = GetMaximum(column);
     316        double range = max - min;
     317        if (range == 0) ScaleVariable(column, 1.0, -min);
     318        else ScaleVariable(column, 1.0 / range, -min);
     319      }
     320      cachedValuesInvalidated = true;
     321      if (fireChangeEvents) FireChanged();
     322    }
     323
     324    internal void ScaleVariable(int column, double factor, double offset) {
     325      scalingFactor[column] = factor;
     326      scalingOffset[column] = offset;
     327      for (int i = 0; i < Rows; i++) {
     328        double origValue = samples[i * columns + column];
     329        samples[i * columns + column] = (origValue + offset) * factor;
     330      }
     331      cachedValuesInvalidated = true;
     332      if (fireChangeEvents) FireChanged();
     333    }
     334
     335    internal void UnscaleVariable(int column) {
     336      if (scalingFactor[column] != 1.0 || scalingOffset[column] != 0.0) {
     337        for (int i = 0; i < rows; i++) {
     338          double scaledValue = samples[i * columns + column];
     339          samples[i * columns + column] = scaledValue / scalingFactor[column] - scalingOffset[column];
     340        }
     341        scalingFactor[column] = 1.0;
     342        scalingOffset[column] = 0.0;
     343      }
     344      cachedValuesInvalidated = true;
     345      if (fireChangeEvents) FireChanged();
     346    }
     347
     348    private void CreateDictionaries() {
     349      // keep a means and ranges dictionary for each column (possible target variable) of the dataset.
     350      cachedMeans = new Dictionary<int, Dictionary<int, double>>[columns];
     351      cachedRanges = new Dictionary<int, Dictionary<int, double>>[columns];
     352      for (int i = 0; i < columns; i++) {
     353        cachedMeans[i] = new Dictionary<int, Dictionary<int, double>>();
     354        cachedRanges[i] = new Dictionary<int, Dictionary<int, double>>();
     355      }
     356      cachedValuesInvalidated = false;
    185357    }
    186358
     
    313485    }
    314486    #endregion
    315 
    316     public double GetMean(int column) {
    317       return GetMean(column, 0, Rows);
    318     }
    319 
    320     public double GetMean(int column, int from, int to) {
    321       if (cachedValuesInvalidated) CreateDictionaries();
    322       if (!cachedMeans[column].ContainsKey(from) || !cachedMeans[column][from].ContainsKey(to)) {
    323         double[] values = new double[to - from];
    324         for (int sample = from; sample < to; sample++) {
    325           values[sample - from] = GetValue(sample, column);
    326         }
    327         double mean = Statistics.Mean(values);
    328         if (!cachedMeans[column].ContainsKey(from)) cachedMeans[column][from] = new Dictionary<int, double>();
    329         cachedMeans[column][from][to] = mean;
    330         return mean;
    331       } else {
    332         return cachedMeans[column][from][to];
    333       }
    334     }
    335 
    336     public double GetRange(int column) {
    337       return GetRange(column, 0, Rows);
    338     }
    339 
    340     public double GetRange(int column, int from, int to) {
    341       if (cachedValuesInvalidated) CreateDictionaries();
    342       if (!cachedRanges[column].ContainsKey(from) || !cachedRanges[column][from].ContainsKey(to)) {
    343         double[] values = new double[to - from];
    344         for (int sample = from; sample < to; sample++) {
    345           values[sample - from] = GetValue(sample, column);
    346         }
    347         double range = Statistics.Range(values);
    348         if (!cachedRanges[column].ContainsKey(from)) cachedRanges[column][from] = new Dictionary<int, double>();
    349         cachedRanges[column][from][to] = range;
    350         return range;
    351       } else {
    352         return cachedRanges[column][from][to];
    353       }
    354     }
    355 
    356     public double GetMaximum(int column) {
    357       return GetMaximum(column, 0, Rows);
    358     }
    359 
    360     public double GetMaximum(int column, int start, int end) {
    361       double max = Double.NegativeInfinity;
    362       for (int i = start; i < end; i++) {
    363         double val = GetValue(i, column);
    364         if (!double.IsNaN(val) && val > max) max = val;
    365       }
    366       return max;
    367     }
    368 
    369     public double GetMinimum(int column) {
    370       return GetMinimum(column, 0, Rows);
    371     }
    372 
    373     public double GetMinimum(int column, int start, int end) {
    374       double min = Double.PositiveInfinity;
    375       for (int i = start; i < end; i++) {
    376         double val = GetValue(i, column);
    377         if (!double.IsNaN(val) && val < min) min = val;
    378       }
    379       return min;
    380     }
    381 
    382     internal void ScaleVariable(int column) {
    383       if (scalingFactor[column] == 1.0 && scalingOffset[column] == 0.0) {
    384         double min = GetMinimum(column);
    385         double max = GetMaximum(column);
    386         double range = max - min;
    387         if (range == 0) ScaleVariable(column, 1.0, -min);
    388         else ScaleVariable(column, 1.0 / range, -min);
    389       }
    390       cachedValuesInvalidated = true;
    391       if (fireChangeEvents) FireChanged();
    392     }
    393 
    394     internal void ScaleVariable(int column, double factor, double offset) {
    395       scalingFactor[column] = factor;
    396       scalingOffset[column] = offset;
    397       for (int i = 0; i < Rows; i++) {
    398         double origValue = samples[i * columns + column];
    399         samples[i * columns + column] = (origValue + offset) * factor;
    400       }
    401       cachedValuesInvalidated = true;
    402       if (fireChangeEvents) FireChanged();
    403     }
    404 
    405     internal void UnscaleVariable(int column) {
    406       if (scalingFactor[column] != 1.0 || scalingOffset[column] != 0.0) {
    407         for (int i = 0; i < rows; i++) {
    408           double scaledValue = samples[i * columns + column];
    409           samples[i * columns + column] = scaledValue / scalingFactor[column] - scalingOffset[column];
    410         }
    411         scalingFactor[column] = 1.0;
    412         scalingOffset[column] = 0.0;
    413       }
    414       cachedValuesInvalidated = true;
    415       if (fireChangeEvents) FireChanged();
    416     }
    417 
    418     private void CreateDictionaries() {
    419       // keep a means and ranges dictionary for each column (possible target variable) of the dataset.
    420       cachedMeans = new Dictionary<int, Dictionary<int, double>>[columns];
    421       cachedRanges = new Dictionary<int, Dictionary<int, double>>[columns];
    422       for (int i = 0; i < columns; i++) {
    423         cachedMeans[i] = new Dictionary<int, Dictionary<int, double>>();
    424         cachedRanges[i] = new Dictionary<int, Dictionary<int, double>>();
    425       }
    426       cachedValuesInvalidated = false;
    427     }
    428487  }
    429488}
  • trunk/sources/HeuristicLab.GP.StructureIdentification.Classification/3.3/OffspringSelectionGP.cs

    r2222 r2319  
    2222using HeuristicLab.Core;
    2323using HeuristicLab.Modeling;
     24using HeuristicLab.Operators;
    2425
    2526namespace HeuristicLab.GP.StructureIdentification.Classification {
     
    2728
    2829    protected override IOperator CreateBestSolutionProcessor() {
    29       IOperator seq = base.CreateBestSolutionProcessor();
     30      IOperator seq = new SequentialProcessor();
     31      seq.AddSubOperator(base.CreateBestSolutionProcessor());
    3032      seq.AddSubOperator(StandardGP.BestSolutionProcessor);
    3133      return seq;
  • trunk/sources/HeuristicLab.GP.StructureIdentification.Classification/3.3/StandardGP.cs

    r2222 r2319  
    2727  public class StandardGP : HeuristicLab.GP.StructureIdentification.StandardGP, IClassificationAlgorithm {
    2828    protected override IOperator CreateBestSolutionProcessor() {
    29       IOperator seq = base.CreateBestSolutionProcessor();
    30       seq.AddSubOperator(BestSolutionProcessor);
    31       return seq;
     29      IOperator bestSolutionProcessor = BestSolutionProcessor;
     30      bestSolutionProcessor.AddSubOperator(base.CreateBestSolutionProcessor());
     31      return bestSolutionProcessor;
    3232    }
    3333
     
    7676  }
    7777}
     78
  • trunk/sources/HeuristicLab.GP.StructureIdentification.TimeSeries/3.3/OffspringSelectionGP.cs

    r2222 r2319  
    4848
    4949    protected override IOperator CreateBestSolutionProcessor() {
    50       IOperator seq = base.CreateBestSolutionProcessor();
     50      SequentialProcessor seq = new SequentialProcessor();
     51      seq.AddSubOperator(base.CreateBestSolutionProcessor());
    5152      seq.AddSubOperator(StandardGP.BestSolutionProcessor);
    5253      return seq;
  • trunk/sources/HeuristicLab.GP.StructureIdentification.TimeSeries/3.3/StandardGP.cs

    r2222 r2319  
    5050
    5151    protected override IOperator CreateBestSolutionProcessor() {
    52       IOperator seq = base.CreateBestSolutionProcessor();
     52      SequentialProcessor seq = new SequentialProcessor();
     53      seq.AddSubOperator(base.CreateBestSolutionProcessor());
    5354      seq.AddSubOperator(BestSolutionProcessor);
    5455      return seq;
  • trunk/sources/HeuristicLab.GP.StructureIdentification/3.3/BaseClasses/AlgorithmBase.cs

    r2290 r2319  
    311311
    312312    protected internal virtual IOperator CreateBestSolutionProcessor() {
    313       return new EmptyOperator();
     313      SequentialProcessor seq = new SequentialProcessor();
     314      // calculate and set variable impacts
     315      VariableNamesExtractor namesExtractor = new VariableNamesExtractor();
     316      namesExtractor.GetVariableInfo("VariableNames").ActualName = "InputVariableNames";
     317      PredictorBuilder predictorBuilder = new PredictorBuilder();
     318
     319      VariableEvaluationImpactCalculator evaluationImpactCalculator = new VariableEvaluationImpactCalculator();
     320      evaluationImpactCalculator.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart";
     321      evaluationImpactCalculator.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd";
     322      VariableQualityImpactCalculator qualityImpactCalculator = new VariableQualityImpactCalculator();
     323      qualityImpactCalculator.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart";
     324      qualityImpactCalculator.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd";
     325
     326      seq.AddSubOperator(namesExtractor);
     327      seq.AddSubOperator(predictorBuilder);
     328      seq.AddSubOperator(evaluationImpactCalculator);
     329      seq.AddSubOperator(qualityImpactCalculator);
     330      return seq;
    314331    }
    315332
     
    410427
    411428    protected internal virtual IAnalyzerModel CreateGPModel(IScope bestModelScope) {
    412       Engine.GlobalScope.AddSubScope(bestModelScope);
    413       IGeneticProgrammingModel tree = bestModelScope.GetVariableValue<IGeneticProgrammingModel>("FunctionTree", false);
    414       ITreeEvaluator evaluator = bestModelScope.GetVariableValue<ITreeEvaluator>("TreeEvaluator", true);
    415429      IAnalyzerModel model = new AnalyzerModel();
    416       model.Predictor = new Predictor(evaluator, tree);
     430      model.Predictor = bestModelScope.GetVariableValue<IPredictor>("Predictor", true);
    417431      Dataset ds = bestModelScope.GetVariableValue<Dataset>("Dataset", true);
    418432      model.Dataset = ds;
     
    427441      model.TrainingMeanSquaredError = bestModelScope.GetVariableValue<DoubleData>("Quality", false).Data;
    428442      model.ValidationMeanSquaredError = bestModelScope.GetVariableValue<DoubleData>("ValidationQuality", false).Data;
    429       // calculate and set variable impacts
    430       VariableEvaluationImpactCalculator evaluationImpactCalculator = new VariableEvaluationImpactCalculator();
    431       evaluationImpactCalculator.GetVariableInfo("TrainingSamplesStart").ActualName = "ActualTrainingSamplesStart";
    432       evaluationImpactCalculator.GetVariableInfo("TrainingSamplesEnd").ActualName = "ActualTrainingSamplesEnd";
    433       VariableQualityImpactCalculator qualityImpactCalculator = new VariableQualityImpactCalculator();
    434       qualityImpactCalculator.GetVariableInfo("TrainingSamplesStart").ActualName = "ActualTrainingSamplesStart";
    435       qualityImpactCalculator.GetVariableInfo("TrainingSamplesEnd").ActualName = "ActualTrainingSamplesEnd";
    436        
    437 
    438       evaluationImpactCalculator.Apply(bestModelScope);
    439       qualityImpactCalculator.Apply(bestModelScope);
    440443
    441444      ItemList evaluationImpacts = bestModelScope.GetVariableValue<ItemList>("VariableEvaluationImpacts", false);
  • trunk/sources/HeuristicLab.GP.StructureIdentification/3.3/HeuristicLab.GP.StructureIdentification-3.3.csproj

    r2285 r2319  
    8585    <Compile Include="BaseClasses\FunctionTreeBase.cs" />
    8686    <Compile Include="BaseClasses\TreeEvaluatorBase.cs" />
     87    <Compile Include="PredictorBuilder.cs" />
    8788    <Compile Include="Predictor.cs" />
    8889    <Compile Include="Evaluators\SimpleGPEvaluatorBase.cs" />
     
    145146    <Compile Include="Properties\AssemblyInfo.cs" />
    146147    <Compile Include="SymbolicExpressionExporter.cs" />
    147     <Compile Include="Evaluators\VariableEvaluationImpactCalculator.cs" />
    148     <Compile Include="Evaluators\VariableQualityImpactCalculator.cs" />
     148    <Compile Include="VariableNamesExtractor.cs" />
    149149  </ItemGroup>
    150150  <ItemGroup>
  • trunk/sources/HeuristicLab.GP.StructureIdentification/3.3/StandardGP.cs

    r2285 r2319  
    203203    }
    204204
    205     protected internal override IOperator CreateBestSolutionProcessor() {
     205    protected internal override IOperator CreateBestSolutionProcessor() {     
    206206      SequentialProcessor bestSolutionProcessor = new SequentialProcessor();
     207      bestSolutionProcessor.AddSubOperator(base.CreateBestSolutionProcessor());
     208
    207209      #region MSE
    208210      MeanSquaredErrorEvaluator testMseEvaluator = new MeanSquaredErrorEvaluator();
  • trunk/sources/HeuristicLab.LinearRegression/3.2/LinearRegression.cs

    r2290 r2319  
    237237      #endregion
    238238
    239       HeuristicLab.GP.StructureIdentification.VariableEvaluationImpactCalculator evalImpactCalc = new HeuristicLab.GP.StructureIdentification.VariableEvaluationImpactCalculator();
    240       evalImpactCalc.GetVariableInfo("TrainingSamplesStart").ActualName = "ActualTrainingSamplesStart";
    241       evalImpactCalc.GetVariableInfo("TrainingSamplesEnd").ActualName = "ActualTrainingSamplesEnd";
    242       evalImpactCalc.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
    243       HeuristicLab.Modeling.VariableQualityImpactCalculator qualImpactCalc = new HeuristicLab.GP.StructureIdentification.VariableQualityImpactCalculator();
    244       qualImpactCalc.GetVariableInfo("TrainingSamplesStart").ActualName = "ActualTrainingSamplesStart";
    245       qualImpactCalc.GetVariableInfo("TrainingSamplesEnd").ActualName = "ActualTrainingSamplesEnd";
    246       qualImpactCalc.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
     239      VariableNamesExtractor namesExtractor = new VariableNamesExtractor();
     240      namesExtractor.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
     241      namesExtractor.GetVariableInfo("VariableNames").ActualName = "InputVariableNames";
     242      PredictorBuilder predictorBuilder = new PredictorBuilder();
     243      predictorBuilder.GetVariableInfo("FunctionTree").ActualName = "LinearRegressionModel";
     244      VariableEvaluationImpactCalculator evalImpactCalc = new VariableEvaluationImpactCalculator();
     245      evalImpactCalc.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart";
     246      evalImpactCalc.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd";
     247      VariableQualityImpactCalculator qualImpactCalc = new VariableQualityImpactCalculator();
     248      qualImpactCalc.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart";
     249      qualImpactCalc.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd";
    247250      seqProc.AddSubOperator(trainingMSE);
    248251      seqProc.AddSubOperator(validationMSE);
     
    260263      seqProc.AddSubOperator(validationVAF);
    261264      seqProc.AddSubOperator(testVAF);
     265      seqProc.AddSubOperator(namesExtractor);
     266      seqProc.AddSubOperator(predictorBuilder);
    262267      seqProc.AddSubOperator(qualImpactCalc);
    263268      seqProc.AddSubOperator(evalImpactCalc);
     
    269274
    270275    protected internal virtual IAnalyzerModel CreateLRModel(IScope bestModelScope) {
    271       IGeneticProgrammingModel tree = bestModelScope.GetVariableValue<IGeneticProgrammingModel>("LinearRegressionModel", false);
    272       ITreeEvaluator evaluator = bestModelScope.GetVariableValue<ITreeEvaluator>("TreeEvaluator", true);
    273276      IAnalyzerModel model = new AnalyzerModel();
    274       model.Predictor = new Predictor(evaluator, tree);
     277      model.Predictor = bestModelScope.GetVariableValue<IPredictor>("Predictor", true);
    275278      model.TrainingMeanSquaredError = bestModelScope.GetVariableValue<DoubleData>("TrainingQuality", false).Data;
    276279      model.ValidationMeanSquaredError = bestModelScope.GetVariableValue<DoubleData>("ValidationQuality", false).Data;
  • trunk/sources/HeuristicLab.Modeling/3.2/HeuristicLab.Modeling-3.2.csproj

    r2313 r2319  
    8787    <Compile Include="IAnalyzerModel.cs" />
    8888    <Compile Include="MatrixCreator.cs" />
    89     <Compile Include="VariableImpactCalculatorBase.cs" />
    9089    <Compile Include="VariableEvaluationImpactCalculator.cs" />
    9190    <Compile Include="IPredictor.cs" />
  • trunk/sources/HeuristicLab.Modeling/3.2/VariableEvaluationImpactCalculator.cs

    r2226 r2319  
    3030
    3131namespace HeuristicLab.Modeling {
    32   public abstract class VariableEvaluationImpactCalculator : VariableImpactCalculatorBase<double[]> {
    33     public override string OutputVariableName {
    34       get { return "VariableEvaluationImpacts"; }
     32  public class VariableEvaluationImpactCalculator : OperatorBase {
     33
     34    public VariableEvaluationImpactCalculator()
     35      : base() {
     36      AddVariableInfo(new VariableInfo("Predictor", "The predictor used to evaluate the model", typeof(IPredictor), VariableKind.In));
     37      AddVariableInfo(new VariableInfo("Dataset", "Dataset", typeof(Dataset), VariableKind.In));
     38      AddVariableInfo(new VariableInfo("TargetVariable", "TargetVariable", typeof(IntData), VariableKind.In));
     39      AddVariableInfo(new VariableInfo("InputVariableNames", "Names of used variables in the model (optional)", typeof(ItemList<StringData>), VariableKind.In));
     40      AddVariableInfo(new VariableInfo("SamplesStart", "SamplesStart", typeof(IntData), VariableKind.In));
     41      AddVariableInfo(new VariableInfo("SamplesEnd", "SamplesEnd", typeof(IntData), VariableKind.In));
     42      AddVariableInfo(new VariableInfo("VariableEvaluationImpacts", "VariableEvaluationImpacts", typeof(ItemList), VariableKind.New));
    3543    }
    3644
     
    3947    }
    4048
    41     private double[,] CombineOutputs(double[] referenceOutputs, double[] newOutputs) {
    42       if (referenceOutputs.Length != newOutputs.Length) throw new InvalidProgramException();
    43       double[,] result = new double[referenceOutputs.Length, 2];
    44       for (int i = 0; i < referenceOutputs.Length; i++) {
    45         result[i, 0] = referenceOutputs[i];
    46         result[i, 1] = newOutputs[i];
     49    public override IOperation Apply(IScope scope) {
     50      IPredictor predictor = GetVariableValue<IPredictor>("Predictor", scope, true);
     51      Dataset dataset = GetVariableValue<Dataset>("Dataset", scope, true);
     52      int targetVariable = GetVariableValue<IntData>("TargetVariable", scope, true).Data;
     53      string targetVariableName = dataset.GetVariableName(targetVariable);
     54      ItemList<StringData> inputVariableNames = GetVariableValue<ItemList<StringData>>("InputVariableNames", scope, true, false);
     55      int start = GetVariableValue<IntData>("SamplesStart", scope, true).Data;
     56      int end = GetVariableValue<IntData>("SamplesEnd", scope, true).Data;
     57
     58      Dictionary<string, double> evaluationImpacts;
     59      if (inputVariableNames == null)
     60        evaluationImpacts = Calculate(dataset, predictor, targetVariableName, start, end);
     61      else
     62        evaluationImpacts = Calculate(dataset, predictor, targetVariableName, inputVariableNames.Select(iv => iv.Data), start, end);
     63
     64      ItemList variableImpacts = new ItemList();
     65      foreach (KeyValuePair<string, double> p in evaluationImpacts) {
     66        if (p.Key != targetVariableName) {
     67          ItemList row = new ItemList();
     68          row.Add(new StringData(p.Key));
     69          row.Add(new DoubleData(p.Value));
     70          variableImpacts.Add(row);
     71        }
    4772      }
    48       return result;
     73
     74      scope.AddVariable(new Variable(scope.TranslateName("VariableEvaluationImpacts"), variableImpacts));
     75      return null;
     76
    4977    }
    5078
    51     protected override double CalculateImpact(double[] referenceValue, double[] newValue) {
     79    public static Dictionary<string, double> Calculate(Dataset dataset, IPredictor predictor, string targetVariableName, int start, int end) {
     80      return Calculate(dataset, predictor, targetVariableName, null, start, end);
     81    }
     82
     83
     84    public static Dictionary<string, double> Calculate(Dataset dataset, IPredictor predictor, string targetVariableName, IEnumerable<string> inputVariableNames, int start, int end) {
     85      Dictionary<string, double> evaluationImpacts = new Dictionary<string, double>();
     86      Dataset dirtyDataset = (Dataset)dataset.Clone();
     87      double[] referenceValues = predictor.Predict(dataset, start, end);
     88
     89      double mean;
     90      IEnumerable<double> oldValues;
     91      double[] newValues;
     92      IEnumerable<string> variables;
     93      if (inputVariableNames != null)
     94        variables = inputVariableNames;
     95      else
     96        variables = dataset.VariableNames;
     97
     98      foreach (string variableName in variables) {
     99        if (variableName != targetVariableName) {
     100          mean = dataset.GetMean(variableName, start, end);
     101          oldValues = dirtyDataset.ReplaceVariableValues(variableName, Enumerable.Repeat(mean, end - start), start, end);
     102          newValues = predictor.Predict(dirtyDataset, start, end);
     103          evaluationImpacts[variableName] = CalculateMSE(referenceValues, newValues);
     104          dirtyDataset.ReplaceVariableValues(variableName, oldValues, start, end);
     105        }
     106      }
     107
     108      double impactsSum = evaluationImpacts.Values.Sum();
     109      if (impactsSum.IsAlmost(0.0)) impactsSum = 1.0;
     110      foreach (KeyValuePair<string, double> p in evaluationImpacts.ToList())
     111        evaluationImpacts[p.Key] = p.Value / impactsSum;
     112
     113      return evaluationImpacts;
     114    }
     115
     116    private static double CalculateMSE(double[] referenceValues, double[] newValues) {
    52117      try {
    53         return SimpleMSEEvaluator.Calculate(CombineOutputs(referenceValue, newValue));
     118        return SimpleMSEEvaluator.Calculate(MatrixCreator<double>.CreateMatrix(referenceValues, newValues));
    54119      }
    55120      catch (ArgumentException) {
     
    57122      }
    58123    }
    59 
    60     protected override double[] CalculateValue(IScope scope, Dataset dataset, int targetVariable, int start, int end) {
    61       return GetOutputs(scope, dataset, targetVariable, start, end);
    62     }
    63 
    64     protected override double[] PostProcessImpacts(double[] impacts) {
    65       double mseSum = impacts.Sum();
    66       if (mseSum.IsAlmost(0.0)) mseSum = 1.0;
    67       for (int i = 0; i < impacts.Length; i++) {
    68         impacts[i] = impacts[i] / mseSum;
    69       }
    70       return impacts;
    71     }
    72 
    73     private bool IsAlmost(double x, double y) {
    74       return Math.Abs(x - y) < 1.0E-12;
    75     }
    76 
    77     protected abstract double[] GetOutputs(IScope scope, Dataset dataset, int targetVariable, int start, int end);
    78124  }
    79125}
  • trunk/sources/HeuristicLab.Modeling/3.2/VariableQualityImpactCalculator.cs

    r2165 r2319  
    3030
    3131namespace HeuristicLab.Modeling {
    32   public abstract class VariableQualityImpactCalculator : VariableImpactCalculatorBase<double> {
     32  public class VariableQualityImpactCalculator : OperatorBase {
     33
     34    public VariableQualityImpactCalculator()
     35      : base() {
     36      AddVariableInfo(new VariableInfo("Predictor", "The predictor used to evaluate the model", typeof(IPredictor), VariableKind.In));
     37      AddVariableInfo(new VariableInfo("Dataset", "Dataset", typeof(Dataset), VariableKind.In));
     38      AddVariableInfo(new VariableInfo("TargetVariable", "TargetVariable", typeof(IntData), VariableKind.In));
     39      AddVariableInfo(new VariableInfo("InputVariableNames", "Names of used variables in the model (optional)", typeof(ItemList<StringData>), VariableKind.In));
     40      AddVariableInfo(new VariableInfo("SamplesStart", "SamplesStart", typeof(IntData), VariableKind.In));
     41      AddVariableInfo(new VariableInfo("SamplesEnd", "SamplesEnd", typeof(IntData), VariableKind.In));
     42      AddVariableInfo(new VariableInfo("VariableQualityImpacts", "VariableQualityImpacts", typeof(ItemList), VariableKind.New));
     43    }
     44
    3345    public override string Description {
    3446      get { return @"Calculates the impact of all allowed input variables on the quality of the model using evaluator supplied as suboperator."; }
    3547    }
    3648
    37     public override string OutputVariableName {
    38       get { return "VariableQualityImpacts"; }
     49    public override IOperation Apply(IScope scope) {
     50      IPredictor predictor = GetVariableValue<IPredictor>("Predictor", scope, true);
     51      Dataset dataset = GetVariableValue<Dataset>("Dataset", scope, true);
     52      int targetVariable = GetVariableValue<IntData>("TargetVariable", scope, true).Data;
     53      string targetVariableName = dataset.GetVariableName(targetVariable);
     54      ItemList<StringData> inputVariableNames = GetVariableValue<ItemList<StringData>>("InputVariableNames", scope, true, false);
     55      int start = GetVariableValue<IntData>("SamplesStart", scope, true).Data;
     56      int end = GetVariableValue<IntData>("SamplesEnd", scope, true).Data;
     57
     58      Dictionary<string, double> qualityImpacts;
     59      if (inputVariableNames == null)
     60        qualityImpacts = Calculate(dataset, predictor, targetVariableName, start, end);
     61      else
     62        qualityImpacts = Calculate(dataset, predictor, targetVariableName, inputVariableNames.Select(iv => iv.Data), start, end);
     63
     64      ItemList variableImpacts = new ItemList();
     65      foreach (KeyValuePair<string, double> p in qualityImpacts) {
     66        if (p.Key != targetVariableName) {
     67          ItemList row = new ItemList();
     68          row.Add(new StringData(p.Key));
     69          row.Add(new DoubleData(p.Value));
     70          variableImpacts.Add(row);
     71        }
     72      }
     73
     74      scope.AddVariable(new Variable(scope.TranslateName("VariableQualityImpacts"), variableImpacts));
     75      return null;
    3976    }
    4077
    41     protected override double CalculateImpact(double referenceValue, double newValue) {
     78    public static Dictionary<string, double> Calculate(Dataset dataset, IPredictor predictor, string targetVariableName, int start, int end) {
     79      return Calculate(dataset, predictor, targetVariableName, null, start, end);
     80    }
     81
     82    public static Dictionary<string, double> Calculate(Dataset dataset, IPredictor predictor, string targetVariableName, IEnumerable<string> inputVariableNames, int start, int end) {
     83      Dictionary<string, double> evaluationImpacts = new Dictionary<string, double>();
     84      Dataset dirtyDataset = (Dataset)dataset.Clone();
     85
     86      double[] predictedValues = predictor.Predict(dataset, start, end);
     87      double[] targetValues = dataset.GetVariableValues(targetVariableName, start, end);
     88
     89      double oldMSE = CalculateMSE(predictedValues, targetValues);
     90      double newMSE;
     91
     92      double mean;
     93      IEnumerable<double> oldValues;
     94      IEnumerable<string> variables;
     95      if (inputVariableNames != null)
     96        variables = inputVariableNames;
     97      else
     98        variables = dataset.VariableNames;
     99
     100      foreach (string variableName in variables) {
     101        if (variableName != targetVariableName) {
     102          mean = dataset.GetMean(variableName, start, end);
     103          oldValues = dirtyDataset.ReplaceVariableValues(variableName, Enumerable.Repeat(mean, end - start), start, end);
     104          predictedValues = predictor.Predict(dirtyDataset, start, end);
     105          newMSE = CalculateMSE(predictedValues, targetValues);
     106          evaluationImpacts[variableName] = newMSE / oldMSE;
     107          dirtyDataset.ReplaceVariableValues(variableName, oldValues, start, end);
     108        }
     109      }
     110
     111      return evaluationImpacts;
     112    }
     113
     114    private static double CalculateImpact(double referenceValue, double newValue) {
    42115      return newValue / referenceValue;
    43116    }
    44117
    45     protected override double CalculateValue(IScope scope, Dataset dataset, int targetVariable, int start, int end) {
    46       return CalculateQuality(scope, dataset, targetVariable, start, end);
     118    private static double CalculateMSE(double[] referenceValues, double[] newValues) {
     119      try {
     120        return SimpleMSEEvaluator.Calculate(MatrixCreator<double>.CreateMatrix(referenceValues, newValues));
     121      }
     122      catch (ArgumentException) {
     123        return double.PositiveInfinity;
     124      }
    47125    }
    48 
    49     protected abstract double CalculateQuality(IScope scope, Dataset dataset, int targetVariable, int start, int end);
    50126  }
    51127}
  • trunk/sources/HeuristicLab.SupportVectorMachines/3.2/HeuristicLab.SupportVectorMachines-3.2.csproj

    r2285 r2319  
    8484  <ItemGroup>
    8585    <Compile Include="Predictor.cs" />
     86    <Compile Include="PredictorBuilder.cs" />
    8687    <Compile Include="SupportVectorRegression.cs" />
    8788    <Compile Include="SVMModel.cs" />
     
    9798      <DependentUpon>SVMModelView.cs</DependentUpon>
    9899    </Compile>
    99     <Compile Include="VariableEvaluationImpactCalculator.cs" />
    100     <Compile Include="VariableQualityImpactCalculator.cs" />
    101100  </ItemGroup>
    102101  <ItemGroup>
  • trunk/sources/HeuristicLab.SupportVectorMachines/3.2/SupportVectorRegression.cs

    r2290 r2319  
    403403      SequentialSubScopesProcessor seqSubScopeProc = new SequentialSubScopesProcessor();
    404404      SequentialProcessor seqProc = new SequentialProcessor();
     405      PredictorBuilder predictorBuilder = new PredictorBuilder();
     406      predictorBuilder.GetVariableInfo("SVMModel").ActualName = "Model";
    405407      VariableEvaluationImpactCalculator evalImpactCalc = new VariableEvaluationImpactCalculator();
    406       evalImpactCalc.GetVariableInfo("TrainingSamplesStart").ActualName = "ActualTrainingSamplesStart";
    407       evalImpactCalc.GetVariableInfo("TrainingSamplesEnd").ActualName = "ActualTrainingSamplesEnd";
    408       evalImpactCalc.GetVariableInfo("SVMModel").ActualName = "Model";
     408      evalImpactCalc.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart";
     409      evalImpactCalc.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd";
    409410      VariableQualityImpactCalculator qualImpactCalc = new VariableQualityImpactCalculator();
    410       qualImpactCalc.GetVariableInfo("TrainingSamplesStart").ActualName = "ActualTrainingSamplesStart";
    411       qualImpactCalc.GetVariableInfo("TrainingSamplesEnd").ActualName = "ActualTrainingSamplesEnd";
    412       qualImpactCalc.GetVariableInfo("SVMModel").ActualName = "Model";
    413 
     411      qualImpactCalc.GetVariableInfo("SamplesStart").ActualName = "ActualTrainingSamplesStart";
     412      qualImpactCalc.GetVariableInfo("SamplesEnd").ActualName = "ActualTrainingSamplesEnd";
     413     
     414      seqProc.AddSubOperator(predictorBuilder);
    414415      seqProc.AddSubOperator(evalImpactCalc);
    415416      seqProc.AddSubOperator(qualImpactCalc);
     
    448449      model.TestSamplesStart = bestModelScope.GetVariableValue<IntData>("TestSamplesStart", true).Data;
    449450      model.TestSamplesEnd = bestModelScope.GetVariableValue<IntData>("TestSamplesEnd", true).Data;
    450       Dictionary<string, int> variableNames = new Dictionary<string, int>();
    451       for (int i = 0; i < ds.Columns; i++) variableNames[ds.GetVariableName(i)] = i;
    452       model.Predictor = new Predictor(bestModelScope.GetVariableValue<SVMModel>("Model", false), model.TargetVariable, variableNames);
    453 
     451      model.Predictor = bestModelScope.GetVariableValue<IPredictor>("Predictor", true);
    454452
    455453      ItemList evaluationImpacts = bestModelScope.GetVariableValue<ItemList>("VariableEvaluationImpacts", false);
Note: See TracChangeset for help on using the changeset viewer.