Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2904_CalculateImpacts/HeuristicLab.Tests/HeuristicLab.Problems.DataAnalysis-3.4/VariableImpactCalculationTest.cs @ 16058

Last change on this file since 16058 was 16058, checked in by fholzing, 6 years ago

#2904: Added Unit-Tests for Regression

File size: 13.4 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using HeuristicLab.Common;
5using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
6using HeuristicLab.Problems.DataAnalysis.Symbolic;
7using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
8using HeuristicLab.Problems.Instances.DataAnalysis;
9using HeuristicLab.Random;
10using Microsoft.VisualStudio.TestTools.UnitTesting;
11
12namespace HeuristicLab.Problems.DataAnalysis.Tests {
13
14  [TestClass()]
15  public class VariableImpactCalculationTest {
16    private static readonly double epsilon = 0.00001;
17
18    [TestMethod]
19    [TestCategory("Problems.DataAnalysis")]
20    [TestProperty("Time", "short")]
21    public void ConstantModelVariableImpactTest() {
22      IRegressionProblemData problemData = LoadDefaultTowerProblem();
23      IRegressionModel model = new ConstantModel(5, "y");
24      IRegressionSolution solution = new RegressionSolution(model, problemData);
25      Dictionary<string, double> expectedImpacts = GetExpectedValuesForConstantModel();
26
27      CheckDefaultAsserts(solution, expectedImpacts);
28    }
29
30    [TestMethod]
31    [TestCategory("Problems.DataAnalysis")]
32    [TestProperty("Time", "short")]
33    public void LinearRegressionModelVariableImpactTowerTest() {
34      IRegressionProblemData problemData = LoadDefaultTowerProblem();
35      ISymbolicExpressionTree tree = CreateLRExpressionTree(problemData);
36      IRegressionModel model = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter());
37      IRegressionSolution solution = new RegressionSolution(model, (IRegressionProblemData)problemData.Clone());
38      Dictionary<string, double> expectedImpacts = GetExpectedValuesForLRTower();
39
40      CheckDefaultAsserts(solution, expectedImpacts);
41    }
42
43    [TestMethod]
44    [TestCategory("Problems.DataAnalysis")]
45    [TestProperty("Time", "short")]
46    public void LinearRegressionModelVariableImpactMibaTest() {
47      IRegressionProblemData problemData = LoadDefaultMibaProblem();
48      ISymbolicExpressionTree tree = CreateLRExpressionTree(problemData);
49      IRegressionModel model = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter());
50      IRegressionSolution solution = new RegressionSolution(model, (IRegressionProblemData)problemData.Clone());
51      Dictionary<string, double> expectedImpacts = GetExpectedValuesForLRMiba();
52
53      CheckDefaultAsserts(solution, expectedImpacts);
54    }
55
56    [TestMethod]
57    [TestCategory("Problems.DataAnalysis")]
58    [TestProperty("Time", "short")]
59    public void CustomModelVariableImpactTest() {
60      IRegressionProblemData problemData = CreateDefaultProblem();
61      ISymbolicExpressionTree tree = CreateCustomExpressionTree();
62      IRegressionModel model = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter());
63      IRegressionSolution solution = new RegressionSolution(model, (IRegressionProblemData)problemData.Clone());
64      Dictionary<string, double> expectedImpacts = GetExpectedValuesForCustomProblem();
65
66      CheckDefaultAsserts(solution, expectedImpacts);
67    }
68
69    [TestMethod]
70    [TestCategory("Problems.DataAnalysis")]
71    [TestProperty("Time", "short")]
72    public void CustomModelVariableImpactNoInfluenceTest() {
73      IRegressionProblemData problemData = CreateDefaultProblem();
74      ISymbolicExpressionTree tree = CreateCustomExpressionTreeNoInfluenceX1();
75      IRegressionModel model = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter());
76      IRegressionSolution solution = new RegressionSolution(model, (IRegressionProblemData)problemData.Clone());
77      Dictionary<string, double> expectedImpacts = GetExpectedValuesForCustomProblemNoInfluence();
78
79      CheckDefaultAsserts(solution, expectedImpacts);
80    }
81
82    #region Load RegressionProblemData
83    private IRegressionProblemData LoadDefaultTowerProblem() {
84      RegressionRealWorldInstanceProvider provider = new RegressionRealWorldInstanceProvider();
85      var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("Tower")).Single();
86      return provider.LoadData(instance);
87    }
88    private IRegressionProblemData LoadDefaultMibaProblem() {
89      MibaFrictionRegressionInstanceProvider provider = new MibaFrictionRegressionInstanceProvider();
90      var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("CF1")).Single();
91      return provider.LoadData(instance);
92    }
93    private IRegressionProblemData CreateDefaultProblem() {
94      List<string> allowedInputVariables = new List<string>() { "x1", "x2", "x3", "x4", "x5" };
95      string targetVariable = "y";
96      var variableNames = allowedInputVariables.Union(targetVariable.ToEnumerable());
97      double[,] variableValues = new double[100, variableNames.Count()];
98
99      FastRandom random = new FastRandom(12345);
100      for (int i = 0; i < variableValues.GetLength(0); i++) {
101        for (int j = 0; j < variableValues.GetLength(1); j++) {
102          variableValues[i, j] = random.Next(1, 100);
103        }
104      }
105
106      Dataset dataset = new Dataset(variableNames, variableValues);
107      return new RegressionProblemData(dataset, allowedInputVariables, targetVariable);
108    }
109    #endregion
110
111    #region Create SymbolicExpressionTree
112    private ISymbolicExpressionTree CreateLRExpressionTree(IRegressionProblemData problemData) {
113      IEnumerable<int> rows = problemData.TrainingIndices;
114      var doubleVariables = problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<double>);
115      var factorVariableNames = problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<string>);
116      var factorVariables = problemData.Dataset.GetFactorVariableValues(factorVariableNames, rows);
117      double[,] binaryMatrix = problemData.Dataset.ToArray(factorVariables, rows);
118      double[,] doubleVarMatrix = problemData.Dataset.ToArray(doubleVariables.Concat(new string[] { problemData.TargetVariable }), rows);
119      var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix);
120
121      alglib.linearmodel lm = new alglib.linearmodel();
122      alglib.lrreport ar = new alglib.lrreport();
123      int nRows = inputMatrix.GetLength(0);
124      int nFeatures = inputMatrix.GetLength(1) - 1;
125      double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant
126
127      int retVal = 1;
128      alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
129      if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution");
130
131      alglib.lrunpack(lm, out coefficients, out nFeatures);
132
133      int nFactorCoeff = binaryMatrix.GetLength(1);
134      int nVarCoeff = doubleVariables.Count();
135      return LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(),
136        doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(),
137        @const: coefficients[nFeatures]);
138    }
139    private ISymbolicExpressionTree CreateCustomExpressionTree() {
140      return new InfixExpressionParser().Parse("x1*x2 - x2*x2 + x3*x3 + x4*x4 - x5*x5 + 14/12");
141    }
142    private ISymbolicExpressionTree CreateCustomExpressionTreeNoInfluenceX1() {
143      return new InfixExpressionParser().Parse("x1/x1*x2 - x2*x2 + x3*x3 + x4*x4 - x5*x5 + 14/12");
144    }
145    #endregion
146
147    #region Get Expected Values
148    private Dictionary<string, double> GetExpectedValuesForConstantModel() {
149      Dictionary<string, double> expectedImpacts = new Dictionary<string, double>();
150      expectedImpacts.Add("x1", 0);
151      expectedImpacts.Add("x10", 0);
152      expectedImpacts.Add("x11", 0);
153      expectedImpacts.Add("x12", 0);
154      expectedImpacts.Add("x13", 0);
155      expectedImpacts.Add("x14", 0);
156      expectedImpacts.Add("x15", 0);
157      expectedImpacts.Add("x16", 0);
158      expectedImpacts.Add("x17", 0);
159      expectedImpacts.Add("x18", 0);
160      expectedImpacts.Add("x19", 0);
161      expectedImpacts.Add("x2", 0);
162      expectedImpacts.Add("x20", 0);
163      expectedImpacts.Add("x21", 0);
164      expectedImpacts.Add("x22", 0);
165      expectedImpacts.Add("x23", 0);
166      expectedImpacts.Add("x24", 0);
167      expectedImpacts.Add("x25", 0);
168      expectedImpacts.Add("x3", 0);
169      expectedImpacts.Add("x4", 0);
170      expectedImpacts.Add("x5", 0);
171      expectedImpacts.Add("x6", 0);
172      expectedImpacts.Add("x7", 0);
173      expectedImpacts.Add("x8", 0);
174      expectedImpacts.Add("x9", 0);
175
176      return expectedImpacts;
177    }
178    private Dictionary<string, double> GetExpectedValuesForLRTower() {
179      Dictionary<string, double> expectedImpacts = new Dictionary<string, double>();
180      expectedImpacts.Add("x1", 0.639933657675427);
181      expectedImpacts.Add("x10", 0.0127006885259798);
182      expectedImpacts.Add("x11", 0.648236047877475);
183      expectedImpacts.Add("x12", 0.248350173524562);
184      expectedImpacts.Add("x13", 0.550889987109547);
185      expectedImpacts.Add("x14", 0.0882824237877192);
186      expectedImpacts.Add("x15", 0.0391276799061169);
187      expectedImpacts.Add("x16", 0.743632451088798);
188      expectedImpacts.Add("x17", 0.00254276857715308);
189      expectedImpacts.Add("x18", 0.0021548147614302);
190      expectedImpacts.Add("x19", 0.00513473927463037);
191      expectedImpacts.Add("x2", 0.0107583487931443);
192      expectedImpacts.Add("x20", 0.18085069746933);
193      expectedImpacts.Add("x21", 0.138053600700762);
194      expectedImpacts.Add("x22", 0.000339539790460086);
195      expectedImpacts.Add("x23", 0.362111965467117);
196      expectedImpacts.Add("x24", 0.0320167935572304);
197      expectedImpacts.Add("x25", 0.57460423230969);
198      expectedImpacts.Add("x3", 0.688142635515862);
199      expectedImpacts.Add("x4", 0.000176632348454664);
200      expectedImpacts.Add("x5", 0.0213915503114581);
201      expectedImpacts.Add("x6", 0.807976486909701);
202      expectedImpacts.Add("x7", 0.716217843319252);
203      expectedImpacts.Add("x8", 0.772701841392564);
204      expectedImpacts.Add("x9", 0.178418730050997);
205
206      return expectedImpacts;
207    }
208    private Dictionary<string, double> GetExpectedValuesForLRMiba() {
209      Dictionary<string, double> expectedImpacts = new Dictionary<string, double>();
210      expectedImpacts.Add("Grooving", 0.0380558091030508);
211      expectedImpacts.Add("Material", 0.02195836766156);
212      expectedImpacts.Add("Material_Cat", 0.000338687689067418);
213      expectedImpacts.Add("Oil", 0.363464994447857);
214      expectedImpacts.Add("x10", 0.0015309669014415);
215      expectedImpacts.Add("x11", -3.60432578908609E-05);
216      expectedImpacts.Add("x12", 0.00118953859087612);
217      expectedImpacts.Add("x13", 0.00164240977191832);
218      expectedImpacts.Add("x14", 0.000688363685380056);
219      expectedImpacts.Add("x15", -4.75067203969948E-05);
220      expectedImpacts.Add("x16", 0.00130388206125076);
221      expectedImpacts.Add("x17", 0.132351838646134);
222      expectedImpacts.Add("x2", -2.47981401556574E-05);
223      expectedImpacts.Add("x20", 0.716541716605016);
224      expectedImpacts.Add("x22", 0.174959377282835);
225      expectedImpacts.Add("x3", -2.65979754026091E-05);
226      expectedImpacts.Add("x4", -1.24764212947603E-05);
227      expectedImpacts.Add("x5", 0.001184959455798);
228      expectedImpacts.Add("x6", 0.000743336665237626);
229      expectedImpacts.Add("x7", 0.00188965927889773);
230      expectedImpacts.Add("x8", 0.00415201581536351);
231      expectedImpacts.Add("x9", 0.00365653880518491);
232
233      return expectedImpacts;
234    }
235    private Dictionary<string, double> GetExpectedValuesForCustomProblem() {
236      Dictionary<string, double> expectedImpacts = new Dictionary<string, double>();
237      expectedImpacts.Add("x1", -0.000573340275115796);
238      expectedImpacts.Add("x2", 0.000781819784095592);
239      expectedImpacts.Add("x3", -0.000390473234921058);
240      expectedImpacts.Add("x4", -0.00116083274627995);
241      expectedImpacts.Add("x5", -0.00036161186207545);
242
243      return expectedImpacts;
244    }
245    private Dictionary<string, double> GetExpectedValuesForCustomProblemNoInfluence() {
246      Dictionary<string, double> expectedImpacts = new Dictionary<string, double>();
247      expectedImpacts.Add("x1", 0);
248      expectedImpacts.Add("x2", 0.00263393690342982);
249      expectedImpacts.Add("x3", -0.00053248037514929);
250      expectedImpacts.Add("x4", 0.00450365819257568);
251      expectedImpacts.Add("x5", -0.000550911612888904);
252
253      return expectedImpacts;
254    }
255    #endregion
256
257    private void CheckDefaultAsserts(IRegressionSolution solution, Dictionary<string, double> expectedImpacts) {
258      IRegressionProblemData problemData = solution.ProblemData;
259      IEnumerable<double> estimatedValues = solution.GetEstimatedValues(solution.ProblemData.TrainingIndices);
260
261      var solutionImpacts = RegressionSolutionVariableImpactsCalculator.CalculateImpacts(solution);
262      var modelImpacts = RegressionSolutionVariableImpactsCalculator.CalculateImpacts(solution.Model, problemData, estimatedValues, problemData.TrainingIndices);
263      //Both ways should return equal results
264      Assert.IsTrue(solutionImpacts.SequenceEqual(modelImpacts));
265
266      //Check if impacts are as expected
267      Assert.AreEqual(modelImpacts.Count(), expectedImpacts.Count);
268      foreach (var entry in modelImpacts.OrderBy(v => v.Item1)) {
269        Assert.IsTrue(Math.Abs(expectedImpacts[entry.Item1] - entry.Item2) < epsilon);
270      }
271    }
272  }
273}
Note: See TracBrowser for help on using the repository browser.