1 | using System.Collections.Generic;
|
---|
2 | using System.Linq;
|
---|
3 | using HeuristicLab.Data;
|
---|
4 | using HeuristicLab.Problems.DataAnalysis;
|
---|
5 | using HeuristicLab.Random;
|
---|
6 |
|
---|
7 | namespace HeuristicLab.VariableInteractionNetworks {
|
---|
8 | /// <summary>
|
---|
9 | /// For the variable interaction network we found the existing code for variable impact calculation in HL trunk insufficient
|
---|
10 | /// </summary>
|
---|
11 | public class ImpactCalculator {
|
---|
12 | /// <summary>
|
---|
13 | /// Calculate the impact of a variable within a regression solution.
|
---|
14 | /// The impact is calculated by shuffling the variable values (distribution is unchanged but relation to y is broken up).
|
---|
15 | /// To account for correlations between input variables the method also shuffles sets of varibles up to the specified maximum number of interactions.
|
---|
16 | /// </summary>
|
---|
17 | /// <returns>The loss in R² on the specified partition when the variable values are shuffled.</returns>
|
---|
18 | public static double Calculate(IRegressionSolution solution, IntRange partition, string variableName, int maxInteractions = 1) {
|
---|
19 | var problemData = solution.ProblemData;
|
---|
20 | var ds = problemData.Dataset;
|
---|
21 | var model = solution.Model;
|
---|
22 |
|
---|
23 | var rows = Enumerable.Range(partition.Start, partition.Size).ToArray();
|
---|
24 | var y = ds.GetDoubleValues(problemData.TargetVariable, rows);
|
---|
25 | var y_pred = model.GetEstimatedValues(ds, rows);
|
---|
26 | var original_r = OnlinePearsonsRCalculator.Calculate(y, y_pred, out OnlineCalculatorError error);
|
---|
27 | if (error != OnlineCalculatorError.None) original_r = 0;
|
---|
28 |
|
---|
29 | var variableNames = model.VariablesUsedForPrediction.ToList();
|
---|
30 |
|
---|
31 | // try all combinations of variables up to the given number of interactions and take the minimum impact
|
---|
32 | var best_r2 = 0.0;
|
---|
33 | var shuffledRows = problemData.AllIndices.ToList();
|
---|
34 | shuffledRows.ShuffleInPlace(new FastRandom(1234), partition.Start, partition.End);
|
---|
35 |
|
---|
36 | for (int setSize = 1; setSize <= maxInteractions && setSize <= variableNames.Count; setSize++) {
|
---|
37 | var combinations = HeuristicLab.Common.EnumerableExtensions.Combinations(variableNames, setSize)
|
---|
38 | .Where(comb => comb.Any(v => v == variableName)); // variable combinations that contain the selected variable
|
---|
39 | foreach (var combination in combinations) {
|
---|
40 | // create a ds and shuffle all values in the variable set (keeping values of the variable set together)
|
---|
41 | var shuffledDs = ((Dataset)ds).ToModifiable();
|
---|
42 | foreach(var variable in combination) {
|
---|
43 | var originalValues = shuffledDs.GetDoubleValues(variable).ToArray();
|
---|
44 | var shuffledValues = new List<double>(originalValues); // clone
|
---|
45 | foreach(var row in rows) {
|
---|
46 | shuffledValues[row] = originalValues[shuffledRows[row]];
|
---|
47 | }
|
---|
48 | shuffledDs.ReplaceVariable(variable, shuffledValues);
|
---|
49 | }
|
---|
50 |
|
---|
51 | y_pred = model.GetEstimatedValues(shuffledDs, rows);
|
---|
52 | var r2 = OnlinePearsonsRCalculator.Calculate(y, y_pred, out error);
|
---|
53 | if (error != OnlineCalculatorError.None) r2 = 0;
|
---|
54 | r2 = r2 * r2; // r^2
|
---|
55 |
|
---|
56 | if (r2 > best_r2) {
|
---|
57 | best_r2 = r2;
|
---|
58 | }
|
---|
59 | }
|
---|
60 | }
|
---|
61 | return original_r * original_r - best_r2; // impact is loss in R²
|
---|
62 | }
|
---|
63 | }
|
---|
64 | }
|
---|