1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using HeuristicLab.Algorithms.ALPS;
5 | using HeuristicLab.Algorithms.DataAnalysis;
6 | using HeuristicLab.Data;
7 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
8 | using HeuristicLab.Problems.DataAnalysis.Symbolic;
9 | using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
10 | using HeuristicLab.Problems.Instances.DataAnalysis;
11 | using HeuristicLab.Selection;
12 | using HeuristicLab.Tests;
13 | using Microsoft.VisualStudio.TestTools.UnitTesting;
14 |
15 | namespace HeuristicLab.Problems.DataAnalysis.Tests {
16 |
17 | [TestClass()]
18 | public class VariableImpactCalculationTest {
19 | [TestMethod]
20 | [TestCategory("Problems.DataAnalysis")]
21 | [TestProperty("Time", "medium")]
22 | public void RunAlpsSymRegFactoryVariableMibaC1Test() {
23 | var alpsGA = CreateAlpsGaSymRegMibaC1Sample();
24 | alpsGA.Start();
25 | var ers = alpsGA.Results.FirstOrDefault(v => v.Name == "Variable impacts").Value as DoubleMatrix;
26 | Assert.IsNotNull(ers);
27 | Assert.IsTrue(ers.Rows == 22);
28 | List<string> expectedVariableOrder = new List<string> { "Oil", "Grooving", "Material_Cat", "x20", "Material", "x14", "x12", "x3", "x7", "x2", "x16", "x15", "x8", "x10", "x11", "x22", "x4", "x5", "x6", "x9", "x17", "x13" };
29 | List<double> expectedVariableImpacts = new List<double> { 0.206, 0.179, 0.136, 0.099, 0.092, 0.07, 0.048, 0.033, 0.029, 0.026, 0.017, 0.01, 0.007, 0.007, 0.007, 0.006, 0.005, 0.005, 0.005, 0.005, 0.005, 0.003 };
30 | Assert.IsTrue(ers.RowNames.SequenceEqual(expectedVariableOrder));
31 | Assert.IsTrue(ers.SequenceEqual(expectedVariableImpacts));
32 | }
33 |
34 | [TestMethod]
35 | [TestCategory("Problems.DataAnalysis")]
36 | [TestProperty("Time", "medium")]
37 | public void RunAlpsSymRegFactoryVariableMibaWear1Test() {
38 | var alpsGA = CreateAlpsGaSymRegMibaWear1Sample();
39 | alpsGA.Start();
40 | var ers = alpsGA.Results.FirstOrDefault(v => v.Name == "Variable impacts").Value as DoubleMatrix;
41 | Assert.IsNotNull(ers);
42 | Assert.IsTrue(ers.Rows == 27);
43 | List<string> expectedVariableOrder = new List<string> { "Oil", "Material_Cat", "Material", "Grooving", "x8", "x1", "x14", "x11", "x19", "x10", "x5", "x9", "x17", "x6", "x18", "x7", "Source1", "x2", "x3", "x4", "x16", "x20", "x12", "x13", "x15", "x22", "x21" };
44 | List<double> expectedVariableImpacts = new List<double> { 0.194, 0.184, 0.163, 0.155, 0.022, 0.019, 0.019, 0.018, 0.017, 0.016, 0.015, 0.015, 0.015, 0.014, 0.014, 0.013, 0.012, 0.011, 0.011, 0.011, 0.011, 0.011, 0.01, 0.009, 0.009, 0.008, 0.006 };
45 | Assert.IsTrue(ers.RowNames.SequenceEqual(expectedVariableOrder));
46 | Assert.IsTrue(ers.SequenceEqual(expectedVariableImpacts));
47 | }
48 |
49 | [TestMethod]
50 | [TestCategory("Problems.DataAnalysis")]
51 | [TestProperty("Time", "medium")]
52 | public void RunAlpsSymRegFactoryVariableRealworldTowerTest() {
53 | var alpsGA = CreateAlpsGaSymReRealworldTowerSample();
54 | alpsGA.Start();
55 | var ers = alpsGA.Results.FirstOrDefault(v => v.Name == "Variable impacts").Value as DoubleMatrix;
56 | Assert.IsNotNull(ers);
57 | Assert.IsTrue(ers.Rows == 25);
58 | List<string> expectedVariableOrder = new List<string> { "x5", "x4", "x21", "x22", "x13", "x3", "x9", "x19", "x1", "x12", "x8", "x10", "x25", "x11", "x23", "x7", "x24", "x6", "x14", "x15", "x20", "x2", "x16", "x18", "x17" };
59 | List<double> expectedVariableImpacts = new List<double> { 0.057, 0.051, 0.047, 0.047, 0.046, 0.045, 0.044, 0.044, 0.043, 0.043, 0.041, 0.04, 0.04, 0.038, 0.038, 0.037, 0.037, 0.036, 0.036, 0.034, 0.033, 0.031, 0.031, 0.031, 0.03 };
60 | Assert.IsTrue(ers.RowNames.SequenceEqual(expectedVariableOrder));
61 | Assert.IsTrue(ers.SequenceEqual(expectedVariableImpacts));
62 | }
63 |
64 | [TestMethod]
65 | [TestCategory("Problems.DataAnalysis")]
66 | [TestProperty("Time", "short")]
67 | public void RunRandomForestPolyTenTest() {
68 | var randomForestRegression = new RandomForestRegression() {
69 | M = 0.4,
70 | R = 0.3,
71 | NumberOfTrees = 50,
72 | SetSeedRandomly = false,
73 | Seed = 42
74 | };
75 |
76 | var symbRegProblem = new SymbolicRegressionSingleObjectiveProblem();
77 | symbRegProblem.ProblemData = new PolyTen(555000).GenerateRegressionData();
78 | randomForestRegression.Problem = symbRegProblem;
79 | randomForestRegression.Start();
80 |
81 | var rfs = randomForestRegression.Results.FirstOrDefault(v => v.Name == "Random forest regression solution").Value as RandomForestRegressionSolution;
82 | Assert.IsNotNull(rfs);
83 | var result = new RegressionSolutionVariableImpactsCalculator().Calculate(rfs);
84 |
85 | //Set expected impacts
86 | List<Tuple<string, double>> aux = new List<Tuple<string, double>>();
87 | aux.Add(new Tuple<string, double>("X6", 0.14984398650032071));
88 | aux.Add(new Tuple<string, double>("X5", 0.14361275357221248));
89 | aux.Add(new Tuple<string, double>("X1", 0.10607502238071009));
90 | aux.Add(new Tuple<string, double>("X3", 0.1056849194554953));
91 | aux.Add(new Tuple<string, double>("X4", 0.0906750779077492));
92 | aux.Add(new Tuple<string, double>("X2", 0.089817766590950532));
93 | aux.Add(new Tuple<string, double>("X8", 0.042797597332305481));
94 | aux.Add(new Tuple<string, double>("X9", 0.038609311722408446));
95 | aux.Add(new Tuple<string, double>("X10", 0.036202503161574362));
96 | aux.Add(new Tuple<string, double>("X7", 0.033539038256832465));
97 | Assert.IsTrue(result.SequenceEqual(aux));
98 | }
99 |
100 | [TestMethod]
101 | [TestCategory("Problems.DataAnalysis")]
102 | [TestProperty("Time", "short")]
103 | public void RunLinearRegressionBreimanITest() {
104 | LinearRegression lr = new LinearRegression();
105 | var symbRegProblem = new SymbolicRegressionSingleObjectiveProblem();
106 | symbRegProblem.ProblemData = new BreimanOne(1234).GenerateRegressionData();
107 |
108 | lr.Problem = symbRegProblem;
109 | lr.Start();
110 |
111 | var lrs = lr.Results.FirstOrDefault(v => v.Name == "Linear regression solution").Value as SymbolicRegressionSolution;
112 | Assert.IsNotNull(lrs);
113 | var result = new RegressionSolutionVariableImpactsCalculator().Calculate(lrs);
114 |
115 | //Set expected impacts
116 | List<Tuple<string, double>> aux = new List<Tuple<string, double>>();
117 | aux.Add(new Tuple<string, double>("X1", 0.43328518823918716));
118 | aux.Add(new Tuple<string, double>("X2", 0.073284548674773631));
119 | aux.Add(new Tuple<string, double>("X5", 0.070306657566311159));
120 | aux.Add(new Tuple<string, double>("X3", 0.035352205426012917));
121 | aux.Add(new Tuple<string, double>("X6", 0.031710492680145475));
122 | aux.Add(new Tuple<string, double>("X4", 0.0081659530036176653));
123 | aux.Add(new Tuple<string, double>("X7", 0.0070869550705255913));
124 | aux.Add(new Tuple<string, double>("X9", 8.0546619615096127E-05));
125 | aux.Add(new Tuple<string, double>("X8", 6.2072921147349192E-05));
126 | aux.Add(new Tuple<string, double>("X10", 1.9113559758254794E-06));
127 |
128 | Assert.IsTrue(result.SequenceEqual(aux));
129 | }
130 |
131 | private AlpsGeneticAlgorithm CreateAlpsGaSymRegMibaC1Sample() {
132 | AlpsGeneticAlgorithm alpsGa = new AlpsGeneticAlgorithm();
133 | #region Problem Configuration
134 | var provider = new MibaFrictionRegressionInstanceProvider();
135 | var instance = provider.GetDataDescriptors().Single(x => x.Name.StartsWith("CF1"));
136 | var symbRegProblem = new SymbolicRegressionSingleObjectiveProblem();
137 | symbRegProblem.Load(provider.LoadData(instance));
138 |
139 | symbRegProblem.MaximumSymbolicExpressionTreeDepth.Value = 35;
140 | symbRegProblem.MaximumSymbolicExpressionTreeLength.Value = 35;
141 |
142 | var grammar = (TypeCoherentExpressionGrammar)symbRegProblem.SymbolicExpressionTreeGrammar;
143 | grammar.Symbols.OfType<Exponential>().Single().Enabled = false;
144 | grammar.Symbols.OfType<Logarithm>().Single().Enabled = false;
145 |
146 | #endregion
147 | #region Algorithm Configuration
148 | alpsGa.Name = "ALPS Genetic Programming - Symbolic Regression";
149 | alpsGa.Description = "An ALPS-GP to solve a symbolic regression problem";
150 | alpsGa.Problem = symbRegProblem;
151 | SamplesUtils.ConfigureAlpsGeneticAlgorithmParameters<GeneralizedRankSelector, SubtreeCrossover, MultiSymbolicExpressionTreeManipulator>(alpsGa,
152 | numberOfLayers: 1000,
153 | popSize: 100,
154 | mutationRate: 0.25,
155 | elites: 1,
156 | plusSelection: false,
157 | agingScheme: AgingScheme.Polynomial,
158 | ageGap: 15,
159 | ageInheritance: 1.0,
160 | maxGens: 10);
161 |
162 | alpsGa.SetSeedRandomly.Value = false;
163 | alpsGa.Seed.Value = 1234;
164 | #endregion
165 | return alpsGa;
166 | }
167 | private AlpsGeneticAlgorithm CreateAlpsGaSymRegMibaWear1Sample() {
168 | AlpsGeneticAlgorithm alpsGa = new AlpsGeneticAlgorithm();
169 | #region Problem Configuration
170 | var provider = new MibaFrictionRegressionInstanceProvider();
171 | var instance = provider.GetDataDescriptors().Single(x => x.Name.StartsWith("Wear1"));
172 | var symbRegProblem = new SymbolicRegressionSingleObjectiveProblem();
173 | symbRegProblem.Load(provider.LoadData(instance));
174 |
175 | symbRegProblem.MaximumSymbolicExpressionTreeDepth.Value = 77;
176 | symbRegProblem.MaximumSymbolicExpressionTreeLength.Value = 77;
177 |
178 | var grammar = (TypeCoherentExpressionGrammar)symbRegProblem.SymbolicExpressionTreeGrammar;
179 |
180 | #endregion
181 | #region Algorithm Configuration
182 | alpsGa.Name = "ALPS Genetic Programming - Symbolic Regression";
183 | alpsGa.Description = "An ALPS-GP to solve a symbolic regression problem";
184 | alpsGa.Problem = symbRegProblem;
185 | SamplesUtils.ConfigureAlpsGeneticAlgorithmParameters<RandomSelector, SubtreeCrossover, MultiSymbolicExpressionTreeManipulator>(alpsGa,
186 | numberOfLayers: 1000,
187 | popSize: 200,
188 | mutationRate: 0.25,
189 | elites: 1,
190 | plusSelection: false,
191 | agingScheme: AgingScheme.Polynomial,
192 | ageGap: 15,
193 | ageInheritance: 1.0,
194 | maxGens: 10);
195 |
196 | alpsGa.SetSeedRandomly.Value = false;
197 | alpsGa.Seed.Value = 11121314;
198 | #endregion
199 | return alpsGa;
200 | }
201 | private AlpsGeneticAlgorithm CreateAlpsGaSymReRealworldTowerSample() {
202 | AlpsGeneticAlgorithm alpsGa = new AlpsGeneticAlgorithm();
203 | #region Problem Configuration
204 | var provider = new RegressionRealWorldInstanceProvider();
205 | var instance = provider.GetDataDescriptors().Single(x => x.Name.StartsWith("Tower"));
206 | var symbRegProblem = new SymbolicRegressionSingleObjectiveProblem();
207 | symbRegProblem.Load(provider.LoadData(instance));
208 |
209 | symbRegProblem.MaximumSymbolicExpressionTreeDepth.Value = 77;
210 | symbRegProblem.MaximumSymbolicExpressionTreeLength.Value = 77;
211 |
212 | var grammar = (TypeCoherentExpressionGrammar)symbRegProblem.SymbolicExpressionTreeGrammar;
213 |
214 | #endregion
215 | #region Algorithm Configuration
216 | alpsGa.Name = "ALPS Genetic Programming - Symbolic Regression";
217 | alpsGa.Description = "An ALPS-GP to solve a symbolic regression problem";
218 | alpsGa.Problem = symbRegProblem;
219 | SamplesUtils.ConfigureAlpsGeneticAlgorithmParameters<RandomSelector, SubtreeCrossover, MultiSymbolicExpressionTreeManipulator>(alpsGa,
220 | numberOfLayers: 1000,
221 | popSize: 200,
222 | mutationRate: 0.25,
223 | elites: 1,
224 | plusSelection: false,
225 | agingScheme: AgingScheme.Polynomial,
226 | ageGap: 15,
227 | ageInheritance: 1.0,
228 | maxGens: 10);
229 |
230 | alpsGa.SetSeedRandomly.Value = false;
231 | alpsGa.Seed.Value = 1111;
232 | #endregion
233 | return alpsGa;
234 | }
235 |
236 | //TODO: Add Function-Tests once the Branch of #2904 is done
237 | }
238 | }