Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2904_CalculateImpacts/HeuristicLab.Tests/HeuristicLab.Problems.DataAnalysis-3.4/RegressionVariableImpactCalculationTest.cs @ 16065

Last change on this file since 16065 was 16061, checked in by fholzing, 6 years ago

#2904: Added Unit-Tests for Classification/Performance and renamed VariableImpactCalculationTest.cs to RegressionVariableImpactCalculationTest.cs

File size: 15.5 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using HeuristicLab.Common;
6using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
7using HeuristicLab.Problems.DataAnalysis.Symbolic;
8using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
9using HeuristicLab.Problems.Instances.DataAnalysis;
10using HeuristicLab.Random;
11using Microsoft.VisualStudio.TestTools.UnitTesting;
12
13namespace HeuristicLab.Problems.DataAnalysis.Tests {
14
15  [TestClass()]
16  public class RegressionVariableImpactCalculationTest {
17    private TestContext testContextInstance;
18    /// <summary>
19    ///Gets or sets the test context which provides
20    ///information about and functionality for the current test run.
21    ///</summary>
22    public TestContext TestContext {
23      get { return testContextInstance; }
24      set { testContextInstance = value; }
25    }
26
27    private static readonly double epsilon = 0.00001;
28
29    [TestMethod]
30    [TestCategory("Problems.DataAnalysis")]
31    [TestProperty("Time", "short")]
32    public void ConstantModelVariableImpactTest() {
33      IRegressionProblemData problemData = LoadDefaultTowerProblem();
34      IRegressionModel model = new ConstantModel(5, "y");
35      IRegressionSolution solution = new RegressionSolution(model, problemData);
36      Dictionary<string, double> expectedImpacts = GetExpectedValuesForConstantModel();
37
38      CheckDefaultAsserts(solution, expectedImpacts);
39    }
40
41    [TestMethod]
42    [TestCategory("Problems.DataAnalysis")]
43    [TestProperty("Time", "short")]
44    public void LinearRegressionModelVariableImpactTowerTest() {
45      IRegressionProblemData problemData = LoadDefaultTowerProblem();
46      ISymbolicExpressionTree tree = CreateLRExpressionTree(problemData);
47      IRegressionModel model = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter());
48      IRegressionSolution solution = new RegressionSolution(model, (IRegressionProblemData)problemData.Clone());
49      Dictionary<string, double> expectedImpacts = GetExpectedValuesForLRTower();
50
51      CheckDefaultAsserts(solution, expectedImpacts);
52    }
53
54    [TestMethod]
55    [TestCategory("Problems.DataAnalysis")]
56    [TestProperty("Time", "short")]
57    public void LinearRegressionModelVariableImpactMibaTest() {
58      IRegressionProblemData problemData = LoadDefaultMibaProblem();
59      ISymbolicExpressionTree tree = CreateLRExpressionTree(problemData);
60      IRegressionModel model = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter());
61      IRegressionSolution solution = new RegressionSolution(model, (IRegressionProblemData)problemData.Clone());
62      Dictionary<string, double> expectedImpacts = GetExpectedValuesForLRMiba();
63
64      CheckDefaultAsserts(solution, expectedImpacts);
65    }
66
67    [TestMethod]
68    [TestCategory("Problems.DataAnalysis")]
69    [TestProperty("Time", "short")]
70    public void CustomModelVariableImpactTest() {
71      IRegressionProblemData problemData = CreateDefaultProblem();
72      ISymbolicExpressionTree tree = CreateCustomExpressionTree();
73      IRegressionModel model = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter());
74      IRegressionSolution solution = new RegressionSolution(model, (IRegressionProblemData)problemData.Clone());
75      Dictionary<string, double> expectedImpacts = GetExpectedValuesForCustomProblem();
76
77      CheckDefaultAsserts(solution, expectedImpacts);
78    }
79
80    [TestMethod]
81    [TestCategory("Problems.DataAnalysis")]
82    [TestProperty("Time", "short")]
83    public void CustomModelVariableImpactNoInfluenceTest() {
84      IRegressionProblemData problemData = CreateDefaultProblem();
85      ISymbolicExpressionTree tree = CreateCustomExpressionTreeNoInfluenceX1();
86      IRegressionModel model = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter());
87      IRegressionSolution solution = new RegressionSolution(model, (IRegressionProblemData)problemData.Clone());
88      Dictionary<string, double> expectedImpacts = GetExpectedValuesForCustomProblemNoInfluence();
89
90      CheckDefaultAsserts(solution, expectedImpacts);
91    }
92
93    [TestMethod]
94    [TestCategory("Problems.DataAnalysis")]
95    [TestProperty("Time", "short")]
96    [ExpectedException(typeof(ArgumentException))]
97    public void WrongDataSetTest() {
98      IRegressionProblemData problemData = LoadDefaultTowerProblem();
99      ISymbolicExpressionTree tree = CreateLRExpressionTree(problemData);
100      IRegressionModel model = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter());
101      IRegressionSolution solution = new RegressionSolution(model, (IRegressionProblemData)problemData.Clone());
102
103      solution.ProblemData = LoadDefaultMibaProblem();
104      RegressionSolutionVariableImpactsCalculator.CalculateImpacts(solution);
105
106    }
107
108    [TestMethod]
109    [TestCategory("Problems.DataAnalysis")]
110    [TestProperty("Time", "medium")]
111    public void PerformanceTest() {
112      int rows = 20000;
113      int columns = 77;
114      var dataSet = OnlineCalculatorPerformanceTest.CreateRandomDataset(new MersenneTwister(1234), rows, columns);
115      IRegressionProblemData problemData = new RegressionProblemData(dataSet, dataSet.VariableNames.Except("y".ToEnumerable()), "y");
116      ISymbolicExpressionTree tree = CreateLRExpressionTree(problemData);
117      IRegressionModel model = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter());
118      IRegressionSolution solution = new RegressionSolution(model, (IRegressionProblemData)problemData.Clone());
119
120      Stopwatch watch = new Stopwatch();
121      watch.Start();
122      var results = RegressionSolutionVariableImpactsCalculator.CalculateImpacts(solution);
123      watch.Stop();
124
125      TestContext.WriteLine("");
126      TestContext.WriteLine("Calculated cells per millisecond: {0}.", rows * columns / watch.ElapsedMilliseconds);
127
128    }
129
130    #region Load RegressionProblemData
131    private IRegressionProblemData LoadDefaultTowerProblem() {
132      RegressionRealWorldInstanceProvider provider = new RegressionRealWorldInstanceProvider();
133      var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("Tower")).Single();
134      return provider.LoadData(instance);
135    }
136    private IRegressionProblemData LoadDefaultMibaProblem() {
137      MibaFrictionRegressionInstanceProvider provider = new MibaFrictionRegressionInstanceProvider();
138      var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("CF1")).Single();
139      return provider.LoadData(instance);
140    }
141    private IRegressionProblemData CreateDefaultProblem() {
142      List<string> allowedInputVariables = new List<string>() { "x1", "x2", "x3", "x4", "x5" };
143      string targetVariable = "y";
144      var variableNames = allowedInputVariables.Union(targetVariable.ToEnumerable());
145      double[,] variableValues = new double[100, variableNames.Count()];
146
147      FastRandom random = new FastRandom(12345);
148      for (int i = 0; i < variableValues.GetLength(0); i++) {
149        for (int j = 0; j < variableValues.GetLength(1); j++) {
150          variableValues[i, j] = random.Next(1, 100);
151        }
152      }
153
154      Dataset dataset = new Dataset(variableNames, variableValues);
155      return new RegressionProblemData(dataset, allowedInputVariables, targetVariable);
156    }
157    #endregion
158
159    #region Create SymbolicExpressionTree
160    private ISymbolicExpressionTree CreateLRExpressionTree(IRegressionProblemData problemData) {
161      IEnumerable<int> rows = problemData.TrainingIndices;
162      var doubleVariables = problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<double>);
163      var factorVariableNames = problemData.AllowedInputVariables.Where(problemData.Dataset.VariableHasType<string>);
164      var factorVariables = problemData.Dataset.GetFactorVariableValues(factorVariableNames, rows);
165      double[,] binaryMatrix = problemData.Dataset.ToArray(factorVariables, rows);
166      double[,] doubleVarMatrix = problemData.Dataset.ToArray(doubleVariables.Concat(new string[] { problemData.TargetVariable }), rows);
167      var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix);
168
169      alglib.linearmodel lm = new alglib.linearmodel();
170      alglib.lrreport ar = new alglib.lrreport();
171      int nRows = inputMatrix.GetLength(0);
172      int nFeatures = inputMatrix.GetLength(1) - 1;
173      double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant
174
175      int retVal = 1;
176      alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
177      if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution");
178
179      alglib.lrunpack(lm, out coefficients, out nFeatures);
180
181      int nFactorCoeff = binaryMatrix.GetLength(1);
182      int nVarCoeff = doubleVariables.Count();
183      return LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(),
184        doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(),
185        @const: coefficients[nFeatures]);
186    }
187    private ISymbolicExpressionTree CreateCustomExpressionTree() {
188      return new InfixExpressionParser().Parse("x1*x2 - x2*x2 + x3*x3 + x4*x4 - x5*x5 + 14/12");
189    }
190    private ISymbolicExpressionTree CreateCustomExpressionTreeNoInfluenceX1() {
191      return new InfixExpressionParser().Parse("x1/x1*x2 - x2*x2 + x3*x3 + x4*x4 - x5*x5 + 14/12");
192    }
193    #endregion
194
195    #region Get Expected Values
196    private Dictionary<string, double> GetExpectedValuesForConstantModel() {
197      Dictionary<string, double> expectedImpacts = new Dictionary<string, double>();
198      expectedImpacts.Add("x1", 0);
199      expectedImpacts.Add("x10", 0);
200      expectedImpacts.Add("x11", 0);
201      expectedImpacts.Add("x12", 0);
202      expectedImpacts.Add("x13", 0);
203      expectedImpacts.Add("x14", 0);
204      expectedImpacts.Add("x15", 0);
205      expectedImpacts.Add("x16", 0);
206      expectedImpacts.Add("x17", 0);
207      expectedImpacts.Add("x18", 0);
208      expectedImpacts.Add("x19", 0);
209      expectedImpacts.Add("x2", 0);
210      expectedImpacts.Add("x20", 0);
211      expectedImpacts.Add("x21", 0);
212      expectedImpacts.Add("x22", 0);
213      expectedImpacts.Add("x23", 0);
214      expectedImpacts.Add("x24", 0);
215      expectedImpacts.Add("x25", 0);
216      expectedImpacts.Add("x3", 0);
217      expectedImpacts.Add("x4", 0);
218      expectedImpacts.Add("x5", 0);
219      expectedImpacts.Add("x6", 0);
220      expectedImpacts.Add("x7", 0);
221      expectedImpacts.Add("x8", 0);
222      expectedImpacts.Add("x9", 0);
223
224      return expectedImpacts;
225    }
226    private Dictionary<string, double> GetExpectedValuesForLRTower() {
227      Dictionary<string, double> expectedImpacts = new Dictionary<string, double>();
228      expectedImpacts.Add("x1", 0.639933657675427);
229      expectedImpacts.Add("x10", 0.0127006885259798);
230      expectedImpacts.Add("x11", 0.648236047877475);
231      expectedImpacts.Add("x12", 0.248350173524562);
232      expectedImpacts.Add("x13", 0.550889987109547);
233      expectedImpacts.Add("x14", 0.0882824237877192);
234      expectedImpacts.Add("x15", 0.0391276799061169);
235      expectedImpacts.Add("x16", 0.743632451088798);
236      expectedImpacts.Add("x17", 0.00254276857715308);
237      expectedImpacts.Add("x18", 0.0021548147614302);
238      expectedImpacts.Add("x19", 0.00513473927463037);
239      expectedImpacts.Add("x2", 0.0107583487931443);
240      expectedImpacts.Add("x20", 0.18085069746933);
241      expectedImpacts.Add("x21", 0.138053600700762);
242      expectedImpacts.Add("x22", 0.000339539790460086);
243      expectedImpacts.Add("x23", 0.362111965467117);
244      expectedImpacts.Add("x24", 0.0320167935572304);
245      expectedImpacts.Add("x25", 0.57460423230969);
246      expectedImpacts.Add("x3", 0.688142635515862);
247      expectedImpacts.Add("x4", 0.000176632348454664);
248      expectedImpacts.Add("x5", 0.0213915503114581);
249      expectedImpacts.Add("x6", 0.807976486909701);
250      expectedImpacts.Add("x7", 0.716217843319252);
251      expectedImpacts.Add("x8", 0.772701841392564);
252      expectedImpacts.Add("x9", 0.178418730050997);
253
254      return expectedImpacts;
255    }
256    private Dictionary<string, double> GetExpectedValuesForLRMiba() {
257      Dictionary<string, double> expectedImpacts = new Dictionary<string, double>();
258      expectedImpacts.Add("Grooving", 0.0380558091030508);
259      expectedImpacts.Add("Material", 0.02195836766156);
260      expectedImpacts.Add("Material_Cat", 0.000338687689067418);
261      expectedImpacts.Add("Oil", 0.363464994447857);
262      expectedImpacts.Add("x10", 0.0015309669014415);
263      expectedImpacts.Add("x11", -3.60432578908609E-05);
264      expectedImpacts.Add("x12", 0.00118953859087612);
265      expectedImpacts.Add("x13", 0.00164240977191832);
266      expectedImpacts.Add("x14", 0.000688363685380056);
267      expectedImpacts.Add("x15", -4.75067203969948E-05);
268      expectedImpacts.Add("x16", 0.00130388206125076);
269      expectedImpacts.Add("x17", 0.132351838646134);
270      expectedImpacts.Add("x2", -2.47981401556574E-05);
271      expectedImpacts.Add("x20", 0.716541716605016);
272      expectedImpacts.Add("x22", 0.174959377282835);
273      expectedImpacts.Add("x3", -2.65979754026091E-05);
274      expectedImpacts.Add("x4", -1.24764212947603E-05);
275      expectedImpacts.Add("x5", 0.001184959455798);
276      expectedImpacts.Add("x6", 0.000743336665237626);
277      expectedImpacts.Add("x7", 0.00188965927889773);
278      expectedImpacts.Add("x8", 0.00415201581536351);
279      expectedImpacts.Add("x9", 0.00365653880518491);
280
281      return expectedImpacts;
282    }
283    private Dictionary<string, double> GetExpectedValuesForCustomProblem() {
284      Dictionary<string, double> expectedImpacts = new Dictionary<string, double>();
285      expectedImpacts.Add("x1", -0.000573340275115796);
286      expectedImpacts.Add("x2", 0.000781819784095592);
287      expectedImpacts.Add("x3", -0.000390473234921058);
288      expectedImpacts.Add("x4", -0.00116083274627995);
289      expectedImpacts.Add("x5", -0.00036161186207545);
290
291      return expectedImpacts;
292    }
293    private Dictionary<string, double> GetExpectedValuesForCustomProblemNoInfluence() {
294      Dictionary<string, double> expectedImpacts = new Dictionary<string, double>();
295      expectedImpacts.Add("x1", 0);
296      expectedImpacts.Add("x2", 0.00263393690342982);
297      expectedImpacts.Add("x3", -0.00053248037514929);
298      expectedImpacts.Add("x4", 0.00450365819257568);
299      expectedImpacts.Add("x5", -0.000550911612888904);
300
301      return expectedImpacts;
302    }
303    #endregion
304
305    private void CheckDefaultAsserts(IRegressionSolution solution, Dictionary<string, double> expectedImpacts) {
306      IRegressionProblemData problemData = solution.ProblemData;
307      IEnumerable<double> estimatedValues = solution.GetEstimatedValues(solution.ProblemData.TrainingIndices);
308
309      var solutionImpacts = RegressionSolutionVariableImpactsCalculator.CalculateImpacts(solution);
310      var modelImpacts = RegressionSolutionVariableImpactsCalculator.CalculateImpacts(solution.Model, problemData, estimatedValues, problemData.TrainingIndices);
311
312      //Both ways should return equal results
313      Assert.IsTrue(solutionImpacts.SequenceEqual(modelImpacts));
314
315      //Check if impacts are as expected
316      Assert.AreEqual(modelImpacts.Count(), expectedImpacts.Count);
317      Assert.IsTrue(modelImpacts.All(v => Math.Abs(expectedImpacts[v.Item1] - v.Item2) < epsilon));
318    }
319  }
320}
Note: See TracBrowser for help on using the repository browser.