Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/Test/GrammarEnumerationTest.cs @ 15832

Last change on this file since 15832 was 15832, checked in by lkammere, 6 years ago

#2886: Fix Equals methods in Symbols.
Move semantical hashing of phrases to separate class.

File size: 31.3 KB
Line 
1using System;
2using System.Collections;
3using System.Collections.Generic;
4using System.Linq;
5using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration;
6using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
7using HeuristicLab.Common;
8using HeuristicLab.Core;
9using HeuristicLab.Problems.DataAnalysis;
10using HeuristicLab.Problems.Instances.DataAnalysis;
11using HeuristicLab.Random;
12using Microsoft.VisualStudio.TestTools.UnitTesting;
13
14namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
15  [TestClass]
16  public class MctsSymbolicRegressionTest {
17    private const int Seed = 1234;
18    private IRandom rand;
19
20    private const double SuccessThreshold = 0.9999999;
21
22    private GrammarEnumerationAlgorithm alg;
23    private RegressionProblem problem;
24
25    [TestInitialize]
26    public void InitTest() {
27      rand = new FastRandom(Seed);
28
29      alg = new GrammarEnumerationAlgorithm();
30      problem = new RegressionProblem();
31      alg.Problem = problem;
32      alg.GuiUpdateInterval = int.MaxValue;
33    }
34
35    [TestCleanup]
36    public void Cleanup() {
37      if (alg.BestTrainingSentence != null) {
38        Console.WriteLine("Training: " + alg.Grammar.ToInfixString(alg.BestTrainingSentence));
39      }
40    }
41
42
43    private void EvaluateGrammarEnumeration() {
44      // Evaluate results
45      var eps = 1.0 - SuccessThreshold;
46
47      // Check if algorithm terminated correctly
48      Assert.IsTrue(alg.Results.ContainsKey("Best solution (Training)"), "No training solution returned!");
49
50      // Check resultss
51      Assert.AreEqual(1.0, ((IRegressionSolution)alg.Results["Best solution (Training)"].Value).TestRSquared, eps, "Test quality too low!");
52    }
53
54
55    [TestMethod]
56    [TestProperty("Goal", "structure search")]
57    public void NoConstants_Nguyen1() {
58      // x³ + x² + x
59      alg.MaxTreeSize = 12;
60      alg.Problem.ProblemData = new NguyenFunctionOne(Seed).GenerateRegressionData();
61
62      alg.Start();
63
64      TerminalSymbol varSymbol = alg.Grammar.Var.VariableTerminalSymbols.First();
65      TerminalSymbol mulSymbol = alg.Grammar.Multiplication;
66      TerminalSymbol addSymbol = alg.Grammar.Addition;
67
68      SymbolString targetSolution = new SymbolString(new[] {
69        varSymbol, varSymbol, varSymbol, mulSymbol, mulSymbol,
70        varSymbol, varSymbol, mulSymbol, addSymbol,
71        varSymbol, addSymbol
72      });
73
74      int targetSolutionHash = alg.Grammar.Hasher.CalcHashCode(targetSolution);
75      int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence);
76
77      Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
78
79      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
80
81      // Evaluate
82      EvaluateGrammarEnumeration();
83    }
84
85    // Too "large" target model for now...
86    //[TestMethod]
87    [TestProperty("Goal", "structure search")]
88    public void NoConstants_Nguyen2() {
89      // x^4 + x³ + x² + x
90      alg.MaxTreeSize = 20;
91      alg.Problem.ProblemData = new NguyenFunctionTwo(Seed).GenerateRegressionData();
92
93      alg.Start();
94      EvaluateGrammarEnumeration();
95    }
96
97    // Too "large" target model for now...
98    //[TestMethod]
99    [TestProperty("Goal", "structure search")]
100    public void NoConstants_Nguyen3() {
101      // x^5 + x^4 + x^3 + x^2 + x
102      alg.MaxTreeSize = 32;
103      alg.Problem.ProblemData = new NguyenFunctionThree(Seed).GenerateRegressionData();
104
105      alg.Start();
106
107      EvaluateGrammarEnumeration();
108    }
109
110    [TestMethod]
111    [TestProperty("Goal", "structure search")]
112    public void NoConstants_Nguyen6() {
113      // sin(x) + sin(x + x²)
114      alg.MaxTreeSize = 10;
115      alg.Problem.ProblemData = new NguyenFunctionSix(Seed).GenerateRegressionData();
116
117      alg.Start();
118
119      TerminalSymbol varSymbol = alg.Grammar.Var.VariableTerminalSymbols.First();
120      TerminalSymbol mulSymbol = alg.Grammar.Multiplication;
121      TerminalSymbol addSymbol = alg.Grammar.Addition;
122      TerminalSymbol sinSymbol = alg.Grammar.Sin;
123
124      SymbolString targetSolution = new SymbolString(new[] {
125        varSymbol, sinSymbol,
126        varSymbol, varSymbol, mulSymbol, varSymbol, addSymbol, sinSymbol, addSymbol
127      });
128
129      int targetSolutionHash = alg.Grammar.Hasher.CalcHashCode(targetSolution);
130      int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence);
131
132      Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
133      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
134
135      EvaluateGrammarEnumeration();
136    }
137
138    [TestMethod]
139    [TestProperty("Goal", "structure search")]
140    public void NoConstants_Nguyen9() {
141      // sin(x) + sin(y²)
142      alg.MaxTreeSize = 11;
143      alg.Problem.ProblemData = new NguyenFunctionNine(Seed).GenerateRegressionData();
144
145      alg.Start();
146
147      TerminalSymbol xSymbol = alg.Grammar.Var.VariableTerminalSymbols.First(v => v.StringRepresentation == "X");
148      TerminalSymbol ySymbol = alg.Grammar.Var.VariableTerminalSymbols.First(v => v.StringRepresentation == "Y");
149      TerminalSymbol mulSymbol = alg.Grammar.Multiplication;
150      TerminalSymbol addSymbol = alg.Grammar.Addition;
151      TerminalSymbol sinSymbol = alg.Grammar.Sin;
152
153      SymbolString targetSolution = new SymbolString(new[] {
154        xSymbol, sinSymbol,
155        ySymbol, ySymbol, mulSymbol, sinSymbol, addSymbol
156      });
157
158      int targetSolutionHash = alg.Grammar.Hasher.CalcHashCode(targetSolution);
159      int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence);
160
161      Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
162      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
163
164      EvaluateGrammarEnumeration();
165    }
166
167    // Too much variables for now...
168    //[TestMethod]
169    [TestProperty("Goal", "structure search")]
170    public void MctsSymbReg_NoConstants_Poly10() {
171      alg.MaxTreeSize = 10;
172      alg.Problem.ProblemData = new PolyTen(Seed).GenerateRegressionData();
173
174      alg.Start();
175      EvaluateGrammarEnumeration();
176    }
177
178    [TestMethod]
179    [TestProperty("Goal", "structure search")]
180    public void NoConstants_Inverse() {
181      // x / (log(x)*x + x)
182      alg.MaxTreeSize = 12;
183
184      var x = Enumerable.Range(0, 100).Select(_ => rand.NextDouble() + 1.1).ToList();
185      var y = x.Select(xi => xi / (Math.Log(xi) * xi + xi)).ToList();
186      alg.Problem.ProblemData = new RegressionProblemData(new Dataset(new List<string>() { "x", "y" }, new List<IList>() { x, y }), "x".ToEnumerable(), "y");
187
188      alg.Start();
189      EvaluateGrammarEnumeration();
190    }
191
192
193#if false
194
195    [TestMethod]
196    [TestProperty("Goal", "structure search")]
197    public void MctsSymbReg_NoConstants_15() {
198      alg.MaxTreeSize = 5;
199      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
200      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("15")));
201      alg.Problem.ProblemData = regProblem;
202
203      alg.Start();
204      EvaluateGrammarEnumeration();
205    }
206
207
208    [TestMethod]
209    [TestCategory("Algorithms.DataAnalysis")]
210    [TestProperty("Time", "short")]
211    public void MctsSymbReg_NoConstants_Nguyen7() {
212      // log(x + 1) + log(x² + 1)
213      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
214      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F7 ")));
215      TestGrammarEnumeration(regProblem);
216    }
217
218    [TestMethod]
219    [TestCategory("Algorithms.DataAnalysis")]
220    [TestProperty("Time", "short")]
221    public void MctsSymbReg_NoConstants_Poly10_Part1() {
222      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
223      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
224
225      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
226      //  Y' = X1*X2 + X3*X4 + X5*X6
227      // simplify problem by changing target
228      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
229      var ys = ds.GetDoubleValues("Y").ToArray();
230      var x1 = ds.GetDoubleValues("X1").ToArray();
231      var x2 = ds.GetDoubleValues("X2").ToArray();
232      var x3 = ds.GetDoubleValues("X3").ToArray();
233      var x4 = ds.GetDoubleValues("X4").ToArray();
234      var x5 = ds.GetDoubleValues("X5").ToArray();
235      var x6 = ds.GetDoubleValues("X6").ToArray();
236      var x7 = ds.GetDoubleValues("X7").ToArray();
237      var x8 = ds.GetDoubleValues("X8").ToArray();
238      var x9 = ds.GetDoubleValues("X9").ToArray();
239      var x10 = ds.GetDoubleValues("X10").ToArray();
240      for (int i = 0; i < ys.Length; i++) {
241        ys[i] -= x1[i] * x7[i] * x9[i];
242        ys[i] -= x3[i] * x6[i] * x10[i];
243      }
244      ds.ReplaceVariable("Y", ys.ToList());
245
246      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
247
248      TestGrammarEnumeration(modifiedProblemData);
249    }
250
251    [TestMethod]
252    [TestCategory("Algorithms.DataAnalysis")]
253    [TestProperty("Time", "short")]
254    public void MctsSymbReg_NoConstants_Poly10_Part2() {
255      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
256      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
257
258      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
259      //  Y' = X1*X7*X9 + X3*X6*X10
260      // simplify problem by changing target
261      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
262      var ys = ds.GetDoubleValues("Y").ToArray();
263      var x1 = ds.GetDoubleValues("X1").ToArray();
264      var x2 = ds.GetDoubleValues("X2").ToArray();                                           
265      var x3 = ds.GetDoubleValues("X3").ToArray();
266      var x4 = ds.GetDoubleValues("X4").ToArray();
267      var x5 = ds.GetDoubleValues("X5").ToArray();
268      var x6 = ds.GetDoubleValues("X6").ToArray();
269      var x7 = ds.GetDoubleValues("X7").ToArray();
270      var x8 = ds.GetDoubleValues("X8").ToArray();
271      var x9 = ds.GetDoubleValues("X9").ToArray();
272      var x10 = ds.GetDoubleValues("X10").ToArray();
273      for (int i = 0; i < ys.Length; i++) {
274        ys[i] -= x1[i] * x2[i];
275        ys[i] -= x3[i] * x4[i];
276        ys[i] -= x5[i] * x6[i];
277      }
278      ds.ReplaceVariable("Y", ys.ToList());
279
280      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
281
282      TestGrammarEnumeration(modifiedProblemData);
283    }
284
285    [TestMethod]
286    [TestCategory("Algorithms.DataAnalysis")]
287    [TestProperty("Time", "short")]
288    public void MctsSymbReg_NoConstants_Poly10_Part3() {
289      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
290      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
291
292      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
293      //  Y' = X1*X2 + X1*X7*X9
294      // simplify problem by changing target
295      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
296      var ys = ds.GetDoubleValues("Y").ToArray();
297      var x1 = ds.GetDoubleValues("X1").ToArray();
298      var x2 = ds.GetDoubleValues("X2").ToArray();
299      var x3 = ds.GetDoubleValues("X3").ToArray();
300      var x4 = ds.GetDoubleValues("X4").ToArray();
301      var x5 = ds.GetDoubleValues("X5").ToArray();
302      var x6 = ds.GetDoubleValues("X6").ToArray();
303      var x7 = ds.GetDoubleValues("X7").ToArray();
304      var x8 = ds.GetDoubleValues("X8").ToArray();
305      var x9 = ds.GetDoubleValues("X9").ToArray();
306      var x10 = ds.GetDoubleValues("X10").ToArray();
307      for (int i = 0; i < ys.Length; i++) {
308        ys[i] -= x3[i] * x4[i];
309        ys[i] -= x5[i] * x6[i];
310        ys[i] -= x3[i] * x6[i] * x10[i];
311      }
312      ds.ReplaceVariable("Y", ys.ToList());
313
314      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
315
316      TestGrammarEnumeration(modifiedProblemData);
317    }
318
319    [TestMethod]
320    [TestCategory("Algorithms.DataAnalysis")]
321    [TestProperty("Time", "short")]
322    public void MctsSymbReg_NoConstants_Poly10_Part4() {
323      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
324      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
325
326      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
327      //  Y' = X3*X4 + X5*X6 + X3*X6*X10
328      // simplify problem by changing target
329      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
330      var ys = ds.GetDoubleValues("Y").ToArray();
331      var x1 = ds.GetDoubleValues("X1").ToArray();
332      var x2 = ds.GetDoubleValues("X2").ToArray();
333      var x3 = ds.GetDoubleValues("X3").ToArray();
334      var x4 = ds.GetDoubleValues("X4").ToArray();
335      var x5 = ds.GetDoubleValues("X5").ToArray();
336      var x6 = ds.GetDoubleValues("X6").ToArray();
337      var x7 = ds.GetDoubleValues("X7").ToArray();
338      var x8 = ds.GetDoubleValues("X8").ToArray();
339      var x9 = ds.GetDoubleValues("X9").ToArray();
340      var x10 = ds.GetDoubleValues("X10").ToArray();
341      for (int i = 0; i < ys.Length; i++) {
342        ys[i] -= x1[i] * x2[i];
343        ys[i] -= x1[i] * x7[i] * x9[i];
344      }
345      ds.ReplaceVariable("Y", ys.ToList());
346      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
347
348
349      TestGrammarEnumeration(modifiedProblemData);
350    }
351
352    [TestMethod]
353    [TestCategory("Algorithms.DataAnalysis")]
354    [TestProperty("Time", "short")]
355    public void MctsSymbReg_NoConstants_Poly10_Part5() {
356      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
357      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
358
359      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
360      //  Y' = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9
361      // simplify problem by changing target
362      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
363      var ys = ds.GetDoubleValues("Y").ToArray();
364      var x1 = ds.GetDoubleValues("X1").ToArray();
365      var x2 = ds.GetDoubleValues("X2").ToArray();
366      var x3 = ds.GetDoubleValues("X3").ToArray();
367      var x4 = ds.GetDoubleValues("X4").ToArray();
368      var x5 = ds.GetDoubleValues("X5").ToArray();
369      var x6 = ds.GetDoubleValues("X6").ToArray();
370      var x7 = ds.GetDoubleValues("X7").ToArray();
371      var x8 = ds.GetDoubleValues("X8").ToArray();
372      var x9 = ds.GetDoubleValues("X9").ToArray();
373      var x10 = ds.GetDoubleValues("X10").ToArray();
374      for (int i = 0; i < ys.Length; i++) {
375        ys[i] -= x3[i] * x6[i] * x10[i];
376      }
377      ds.ReplaceVariable("Y", ys.ToList());
378      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
379
380
381      TestGrammarEnumeration(modifiedProblemData);
382    }
383
384    [TestMethod]
385    [TestCategory("Algorithms.DataAnalysis")]
386    [TestProperty("Time", "short")]
387    public void MctsSymbReg_NoConstants_Poly10_Part6() {
388      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
389      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
390
391      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
392      //  Y' = X1*X2 + X3*X4 + X5*X6 + X3*X6*X10
393      // simplify problem by changing target
394      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
395      var ys = ds.GetDoubleValues("Y").ToArray();
396      var x1 = ds.GetDoubleValues("X1").ToArray();
397      var x2 = ds.GetDoubleValues("X2").ToArray();
398      var x3 = ds.GetDoubleValues("X3").ToArray();
399      var x4 = ds.GetDoubleValues("X4").ToArray();
400      var x5 = ds.GetDoubleValues("X5").ToArray();
401      var x6 = ds.GetDoubleValues("X6").ToArray();
402      var x7 = ds.GetDoubleValues("X7").ToArray();
403      var x8 = ds.GetDoubleValues("X8").ToArray();
404      var x9 = ds.GetDoubleValues("X9").ToArray();
405      var x10 = ds.GetDoubleValues("X10").ToArray();
406      for (int i = 0; i < ys.Length; i++) {
407        ys[i] -= x1[i] * x7[i] * x9[i];
408      }
409      ds.ReplaceVariable("Y", ys.ToList());
410      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
411
412      TestGrammarEnumeration(modifiedProblemData);
413    }
414
415
416    [TestMethod]
417    [TestCategory("Algorithms.DataAnalysis")]
418    [TestProperty("Time", "long")]
419    public void MctsSymbReg_NoConstants_Poly10_250rows() {
420      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
421      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
422      regProblem.TrainingPartition.Start = 0;
423      regProblem.TrainingPartition.End = regProblem.Dataset.Rows;
424      regProblem.TestPartition.Start = 0;
425      regProblem.TestPartition.End = 2;
426      TestGrammarEnumeration(regProblem);
427    }
428
429    [TestMethod]
430    [TestCategory("Algorithms.DataAnalysis")]
431    [TestProperty("Time", "long")]
432    public void MctsSymbReg_NoConstants_Poly10_10000rows() {
433      // as poly-10 but more rows
434      var x1 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
435      var x2 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
436      var x3 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
437      var x4 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
438      var x5 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
439      var x6 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
440      var x7 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
441      var x8 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
442      var x9 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
443      var x10 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
444      var ys = new List<double>();
445      for (int i = 0; i < x1.Count; i++) {
446        ys.Add(x1[i] * x2[i] + x3[i] * x4[i] + x5[i] * x6[i] + x1[i] * x7[i] * x9[i] + x3[i] * x6[i] * x10[i]);
447      }
448
449      var ds = new Dataset(new string[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "y" },
450        new[] { x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, ys });
451
452
453      var problemData = new RegressionProblemData(ds, new string[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j" }, "y");
454
455      problemData.TrainingPartition.Start = 0;
456      problemData.TrainingPartition.End = problemData.Dataset.Rows;
457      problemData.TestPartition.Start = 0;
458      problemData.TestPartition.End = 2; // must not be empty
459
460
461      TestGrammarEnumeration(problemData);
462    }
463
464    [TestMethod]
465    [TestCategory("Algorithms.DataAnalysis")]
466    [TestProperty("Time", "short")]
467    public void MctsSymbReg_NoConstants_TwoVars() {
468
469      // y = x1 + x2 + x1*x2 + x1*x2*x2 + x1*x1*x2
470      var x1 = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
471      var x2 = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
472      var ys = x1.Zip(x2, (x1i, x2i) => x1i + x2i + x1i * x2i + x1i * x2i * x2i + x1i * x1i * x2i).ToList();
473
474      var ds = new Dataset(new string[] { "a", "b", "y" }, new[] { x1, x2, ys });
475      var problemData = new RegressionProblemData(ds, new string[] { "a", "b" }, "y");
476
477      TestGrammarEnumeration(problemData);
478    }
479
480    [TestMethod]
481    [TestCategory("Algorithms.DataAnalysis")]
482    [TestProperty("Time", "short")]
483    public void MctsSymbReg_NoConstants_Misleading() {
484
485      // y = a + baaaaa (the effect of the second term should be very small)
486      // the alg will quickly find that a has big effect and will search below a
487      // since we prevent a + a... the algorithm must find the correct expression via a + b...
488      // however b has a small effect so the branch might not be identified as relevant
489      var @as = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
490      var bs = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
491      var cs = Enumerable.Range(0, 100).Select(_ => rand.NextDouble() * 1.0e-3).ToList();
492      var ds = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
493      var es = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
494      var ys = new double[@as.Count];
495      for (int i = 0; i < ys.Length; i++)
496        ys[i] = @as[i] + bs[i] + @as[i] * bs[i] * cs[i];
497
498      var dataset = new Dataset(new string[] { "a", "b", "c", "d", "e", "y" }, new[] { @as, bs, cs, ds, es, ys.ToList() });
499
500      var problemData = new RegressionProblemData(dataset, new string[] { "a", "b", "c", "d", "e" }, "y");
501
502      TestGrammarEnumeration(problemData);
503    }
504
505    [TestMethod]
506    [TestCategory("Algorithms.DataAnalysis")]
507    [TestProperty("Time", "short")]
508    public void MctsSymbRegKeijzer7() {
509      // ln(x)
510      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
511      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 7 f(")));
512      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
513      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
514      TestGrammarEnumeration(regProblem);
515    }
516
517   
518    [TestMethod]
519    [TestCategory("Algorithms.DataAnalysis")]
520    [TestProperty("Time", "short")]
521    public void MctsSymbRegBenchmarkNguyen5() {
522      // sin(x²)cos(x) - 1
523      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
524      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F5 ")));
525      TestGrammarEnumeration(regProblem);
526    }
527
528    [TestMethod]
529    [TestCategory("Algorithms.DataAnalysis")]
530    [TestProperty("Time", "short")]
531    public void MctsSymbRegBenchmarkNguyen6() {
532      // sin(x) + sin(x + x²)
533      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
534      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F6 ")));
535      TestGrammarEnumeration(regProblem);
536    }
537   
538    [TestMethod]
539    [TestCategory("Algorithms.DataAnalysis")]
540    [TestProperty("Time", "short")]
541    public void MctsSymbRegBenchmarkNguyen7() {
542      //  log(x + 1) + log(x² + 1)
543      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
544      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F7 ")));
545      TestGrammarEnumeration(regProblem);
546    }
547    [TestMethod]
548    [TestCategory("Algorithms.DataAnalysis")]
549    [TestProperty("Time", "short")]
550    public void MctsSymbRegBenchmarkNguyen8() {
551      // Sqrt(x)
552      // = x ^ 0.5
553      // = exp(0.5 * log(x))
554      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
555      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F8 ")));
556      TestGrammarEnumeration(regProblem);
557    }
558   
559    // [TestMethod]
560    [TestCategory("Algorithms.DataAnalysis")]
561    [TestProperty("Time", "short")]
562    public void MctsSymbRegBenchmarkNguyen9() {
563      //  sin(x) + sin(y²)
564      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
565      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F9 ")));
566      TestGrammarEnumeration(regProblem);
567    }
568
569    // [TestMethod]
570    [TestCategory("Algorithms.DataAnalysis")]
571    [TestProperty("Time", "short")]
572    public void MctsSymbRegBenchmarkNguyen10() {
573      // 2sin(x)cos(y)
574      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
575      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F10 ")));
576      TestGrammarEnumeration(regProblem);
577    }
578   
579    [TestMethod]
580    [TestCategory("Algorithms.DataAnalysis")]
581    [TestProperty("Time", "short")]
582    public void MctsSymbRegBenchmarkNguyen11() {
583      // x ^ y  , x > 0, y > 0   
584      // = exp(y * log(x))
585      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
586      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F11 ")));
587      TestGrammarEnumeration(regProblem);
588    }
589    [TestMethod]
590    [TestCategory("Algorithms.DataAnalysis")]
591    [TestProperty("Time", "short")]
592    public void MctsSymbRegBenchmarkNguyen12() {
593      // x^4 - x³ + y²/2 - y
594      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
595      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F12 ")));
596      TestGrammarEnumeration(regProblem);
597    }
598
599    [TestMethod]
600    [TestCategory("Algorithms.DataAnalysis")]
601    [TestProperty("Time", "long")]
602    public void MctsSymbRegBenchmarkKeijzer5() {
603      // (30 * x * z) / ((x - 10)  * y²)
604      // = 30 x z / (xy² - y²)
605      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
606      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 5 f(")));
607      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
608      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
609      TestGrammarEnumeration(regProblem);
610    }
611
612    [TestMethod]
613    [TestCategory("Algorithms.DataAnalysis")]
614    [TestProperty("Time", "short")]
615    public void MctsSymbRegBenchmarkKeijzer6() {
616      // Keijzer 6 f(x) = Sum(1 / i) From 1 to X  , x \in [0..120]
617      // we can only approximate this
618      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
619      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 6 f(")));
620      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
621      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
622      TestGrammarEnumeration(regProblem);
623    }
624
625    [TestMethod]
626    [TestCategory("Algorithms.DataAnalysis")]
627    [TestProperty("Time", "short")]
628    public void MctsSymbRegBenchmarkKeijzer8() {
629      // sqrt(x)
630      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
631      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 8 f(")));
632      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
633      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
634      TestGrammarEnumeration(regProblem);
635    }
636
637    [TestMethod]
638    [TestCategory("Algorithms.DataAnalysis")]
639    [TestProperty("Time", "short")]
640    public void MctsSymbRegBenchmarkKeijzer9() {
641      // arcsinh(x)  i.e. ln(x + sqrt(x² + 1))
642      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
643      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 9 f(")));
644      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
645      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
646      TestGrammarEnumeration(regProblem);
647    }
648
649    [TestMethod]
650    [TestCategory("Algorithms.DataAnalysis")]
651    [TestProperty("Time", "short")]
652    public void MctsSymbRegBenchmarkKeijzer11() {
653      // xy + sin( (x-1) (y-1) )
654      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider();
655      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 11 f(")));
656      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
657      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
658      TestGrammarEnumeration(regProblem);
659    }
660
661    [TestMethod]
662    [TestCategory("Algorithms.DataAnalysis")]
663    [TestProperty("Time", "short")]
664    public void MctsSymbRegBenchmarkKeijzer12() {
665      // x^4 - x³ + y² / 2 - y,  same as Nguyen 12             
666      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
667      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 12 f(")));
668      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
669      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
670      TestGrammarEnumeration(regProblem);
671    }
672
673    [TestMethod]
674    [TestCategory("Algorithms.DataAnalysis")]
675    [TestProperty("Time", "short")]
676    public void MctsSymbRegBenchmarkKeijzer14() {
677      // 8 / (2 + x² + y²)
678      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
679      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 14 f(")));
680      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
681      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
682      TestGrammarEnumeration(regProblem);
683    }
684
685    [TestMethod]
686    [TestCategory("Algorithms.DataAnalysis")]
687    [TestProperty("Time", "short")]
688    public void MctsSymbRegBenchmarkKeijzer15() {
689      // x³ / 5 + y³ / 2 - y - x
690      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
691      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 15 f(")));
692      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
693      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
694      TestGrammarEnumeration(regProblem);
695    }
696#endif
697  }
698}
Note: See TracBrowser for help on using the repository browser.