Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2288_HeuristicLab.VariableInteractionNetworks/HeuristicLab.VariableInteractionNetworks/3.3/ImpactCalculator.cs @ 17717

Last change on this file since 17717 was 16966, checked in by jzenisek, 6 years ago

#2288

  • splitted creation of networks with/without possible cycles
  • included additional check to prevent errors within the impact calculation
File size: 3.3 KB
Line 
1using System.Collections.Generic;
2using System.Linq;
3using HeuristicLab.Data;
4using HeuristicLab.Problems.DataAnalysis;
5using HeuristicLab.Random;
6
7namespace HeuristicLab.VariableInteractionNetworks {
8  /// <summary>
9  /// For the variable interaction network we found the existing code for variable impact calculation in HL trunk insufficient
10  /// </summary>
11  public class ImpactCalculator {
12    /// <summary>
13    /// Calculate the impact of a variable within a regression solution.
14    /// The impact is calculated by shuffling the variable values (distribution is unchanged but relation to y is broken up).
15    /// To account for correlations between input variables the method also shuffles sets of varibles up to the specified maximum number of interactions.
16    /// </summary>
17    /// <returns>The loss in R² on the specified partition when the variable values are shuffled.</returns>
18    public static double Calculate(IRegressionSolution solution, IntRange partition, string variableName, int maxInteractions = 1) {
19      var problemData = solution.ProblemData;
20      var ds = problemData.Dataset;
21      var model = solution.Model;
22
23      var rows = Enumerable.Range(partition.Start, partition.Size).ToArray();
24      var y = ds.GetDoubleValues(problemData.TargetVariable, rows);
25      var y_pred = model.GetEstimatedValues(ds, rows);
26      var original_r = OnlinePearsonsRCalculator.Calculate(y, y_pred, out OnlineCalculatorError error);
27      if (error != OnlineCalculatorError.None) original_r = 0;
28
29      var variableNames = model.VariablesUsedForPrediction.ToList();
30
31      // try all combinations of variables up to the given number of interactions and take the minimum impact
32      var best_r2 = 0.0;
33      var shuffledRows = problemData.AllIndices.ToList();
34      shuffledRows.ShuffleInPlace(new FastRandom(1234), partition.Start, partition.End);     
35
36      for (int setSize = 1; setSize <= maxInteractions && setSize <= variableNames.Count; setSize++) {
37        var combinations = HeuristicLab.Common.EnumerableExtensions.Combinations(variableNames, setSize)
38                                   .Where(comb => comb.Any(v => v == variableName)); // variable combinations that contain the selected variable
39        foreach (var combination in combinations) {
40          // create a ds and shuffle all values in the variable set (keeping values of the variable set together)
41          var shuffledDs = ((Dataset)ds).ToModifiable();
42          foreach(var variable in combination) {
43            var originalValues = shuffledDs.GetDoubleValues(variable).ToArray();
44            var shuffledValues = new List<double>(originalValues); // clone
45            foreach(var row in rows) {
46              shuffledValues[row] = originalValues[shuffledRows[row]];
47            }
48            shuffledDs.ReplaceVariable(variable, shuffledValues);
49          }
50
51          y_pred = model.GetEstimatedValues(shuffledDs, rows);
52          var r2 = OnlinePearsonsRCalculator.Calculate(y, y_pred, out error);
53          if (error != OnlineCalculatorError.None) r2 = 0;
54          r2 = r2 * r2; // r^2
55
56          if (r2 > best_r2) {
57            best_r2 = r2;
58          }
59        }
60      }
61      return original_r * original_r - best_r2; // impact is loss in R²
62    }
63  }
64}
Note: See TracBrowser for help on using the repository browser.