Changeset 13835


Ignore:
Timestamp:
05/04/16 16:56:49 (3 years ago)
Author:
bburlacu
Message:

#2288: Performance improvements in the SymbolicRegressionVariableImpactsAnalyzer.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.VariableInteractionNetworks/HeuristicLab.VariableInteractionNetworks/3.3/SymbolicDataAnalysisVariableImpactsAnalyzer.cs

    r13728 r13835  
    2121
    2222using System;
     23using System.Collections.Generic;
    2324using System.Linq;
    2425using HeuristicLab.Analysis;
     
    225226      var variablesToTreeIndices = allowedInputVariables.ToDictionary(x => x, x => Enumerable.Range(0, simplifiedTrees.Count).Where(i => ContainsVariable(simplifiedTrees[i], x)).ToList());
    226227
    227       foreach (var mapping in variablesToTreeIndices) {
    228         var variableName = mapping.Key;
    229         var median = problemData.Dataset.GetDoubleValues(variableName, problemData.TrainingIndices).Median();
    230         var ds = new ModifiableDataset(problemData.Dataset.DoubleVariables, problemData.Dataset.DoubleVariables.Select(x => problemData.Dataset.GetReadOnlyDoubleValues(x).ToList()));
    231         foreach (var i in problemData.TrainingIndices) {
    232           ds.SetVariableValue(median, variableName, i);
    233         }
    234         var pd = new RegressionProblemData(ds, allowedInputVariables, problemData.TargetVariable);
    235         pd.TrainingPartition.Start = problemData.TrainingPartition.Start;
    236         pd.TrainingPartition.End = problemData.TrainingPartition.End;
    237         pd.TestPartition.Start = problemData.TestPartition.Start;
    238         pd.TestPartition.End = problemData.TestPartition.End;
    239 
    240         var indices = mapping.Value;
     228      // variable values used for restoring original values in the dataset
     229      var variableValues = allowedInputVariables.Select(x => problemData.Dataset.GetReadOnlyDoubleValues(x).ToList()).ToList();
     230      // the ds gets new variable values (not the above).
     231      var variableNames = allowedInputVariables.Concat(new[] { problemData.TargetVariable }).ToList();
     232      var ds = new ModifiableDataset(variableNames, variableNames.Select(x => problemData.Dataset.GetReadOnlyDoubleValues(x).ToList()));
     233      var pd = new RegressionProblemData(ds, allowedInputVariables, problemData.TargetVariable);
     234      pd.TrainingPartition.Start = problemData.TrainingPartition.Start;
     235      pd.TrainingPartition.End = problemData.TrainingPartition.End;
     236      pd.TestPartition.Start = problemData.TestPartition.Start;
     237      pd.TestPartition.End = problemData.TestPartition.End;
     238
     239      for (int i = 0; i < allowedInputVariables.Count; ++i) {
     240        var v = allowedInputVariables[i];
     241        var median = problemData.Dataset.GetDoubleValues(v, problemData.TrainingIndices).Median();
     242        var values = new List<double>(Enumerable.Repeat(median, problemData.Dataset.Rows));
     243        // replace values with median
     244        ds.ReplaceVariable(v, values);
     245
     246        var indices = variablesToTreeIndices[v];
     247        if (!indices.Any()) {
     248          dataTable.Rows[v].Values.Add(0);
     249          continue;
     250        }
     251
    241252        var averageImpact = 0d;
    242         for (int i = 0; i < indices.Count; ++i) {
    243           var tree = simplifiedTrees[i];
    244           var originalQuality = qualities[i].Value;
     253        for (int j = 0; j < indices.Count; ++j) {
     254          var tree = simplifiedTrees[j];
     255          var originalQuality = qualities[j].Value;
    245256          double newQuality;
    246257          if (optimizeConstants) {
     
    253264        }
    254265        averageImpact /= indices.Count;
    255         dataTable.Rows[variableName].Values.Add(averageImpact);
     266        dataTable.Rows[v].Values.Add(averageImpact);
     267        // restore original values
     268        ds.ReplaceVariable(v, variableValues[i]);
    256269      }
    257270
     
    275288  }
    276289}
     290
Note: See TracChangeset for help on using the changeset viewer.