1 | using System.Collections.Generic;
2 | using System.Linq;
3 | using HeuristicLab.Data;
4 | using HeuristicLab.Problems.DataAnalysis;
5 | using HeuristicLab.Random;
6 |
7 | namespace HeuristicLab.VariableInteractionNetworks {
8 | /// <summary>
9 | /// For the variable interaction network we found the existing code for variable impact calculation in HL trunk insufficient
10 | /// </summary>
11 | public class ImpactCalculator {
12 | /// <summary>
13 | /// Calculate the impact of a variable within a regression solution.
14 | /// The impact is calculated by shuffling the variable values (distribution is unchanged but relation to y is broken up).
15 | /// To account for correlations between input variables the method also shuffles sets of varibles up to the specified maximum number of interactions.
16 | /// </summary>
17 | /// <returns>The loss in R² on the specified partition when the variable values are shuffled.</returns>
18 | public static double Calculate(IRegressionSolution solution, IntRange partition, string variableName, int maxInteractions = 1) {
19 | var problemData = solution.ProblemData;
20 | var ds = problemData.Dataset;
21 | var model = solution.Model;
22 |
23 | var rows = Enumerable.Range(partition.Start, partition.Size).ToArray();
24 | var y = ds.GetDoubleValues(problemData.TargetVariable, rows);
25 | var y_pred = model.GetEstimatedValues(ds, rows);
26 | var original_r = OnlinePearsonsRCalculator.Calculate(y, y_pred, out OnlineCalculatorError error);
27 | if (error != OnlineCalculatorError.None) original_r = 0;
28 |
29 | var variableNames = model.VariablesUsedForPrediction.ToList();
30 |
31 | // try all combinations of variables up to the given number of interactions and take the minimum impact
32 | var best_r2 = 0.0;
33 | var shuffledRows = problemData.AllIndices.ToList();
34 | shuffledRows.ShuffleInPlace(new FastRandom(1234), partition.Start, partition.End);
35 |
36 | for (int setSize = 1; setSize <= maxInteractions && setSize <= variableNames.Count; setSize++) {
37 | var combinations = HeuristicLab.Common.EnumerableExtensions.Combinations(variableNames, setSize)
38 | .Where(comb => comb.Any(v => v == variableName)); // variable combinations that contain the selected variable
39 | foreach (var combination in combinations) {
40 | // create a ds and shuffle all values in the variable set (keeping values of the variable set together)
41 | var shuffledDs = ((Dataset)ds).ToModifiable();
42 | foreach(var variable in combination) {
43 | var originalValues = shuffledDs.GetDoubleValues(variable).ToArray();
44 | var shuffledValues = new List<double>(originalValues); // clone
45 | foreach(var row in rows) {
46 | shuffledValues[row] = originalValues[shuffledRows[row]];
47 | }
48 | shuffledDs.ReplaceVariable(variable, shuffledValues);
49 | }
50 |
51 | y_pred = model.GetEstimatedValues(shuffledDs, rows);
52 | var r2 = OnlinePearsonsRCalculator.Calculate(y, y_pred, out error);
53 | if (error != OnlineCalculatorError.None) r2 = 0;
54 | r2 = r2 * r2; // r^2
55 |
56 | if (r2 > best_r2) {
57 | best_r2 = r2;
58 | }
59 | }
60 | }
61 | return original_r * original_r - best_r2; // impact is loss in R²
62 | }
63 | }
64 | }