Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/Test/GrammarEnumerationTest.cs @ 15784

Last change on this file since 15784 was 15784, checked in by lkammere, 6 years ago

#2886: Add basic implementation for inverse factors.

File size: 30.3 KB
Line 
1using System;
2using System.Collections;
3using System.Collections.Generic;
4using System.Linq;
5using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration;
6using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
7using HeuristicLab.Common;
8using HeuristicLab.Core;
9using HeuristicLab.Problems.DataAnalysis;
10using HeuristicLab.Problems.Instances.DataAnalysis;
11using HeuristicLab.Random;
12using Microsoft.VisualStudio.TestTools.UnitTesting;
13
14namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
15  [TestClass]
16  public class MctsSymbolicRegressionTest {
17    private const int Seed = 1234;
18    private IRandom rand;
19
20    private const double SuccessThreshold = 0.9999999;
21
22    private GrammarEnumerationAlgorithm alg;
23    private RegressionProblem problem;
24
25    [TestInitialize]
26    public void InitTest() {
27      rand = new FastRandom(Seed);
28
29      alg = new GrammarEnumerationAlgorithm();
30      problem = new RegressionProblem();
31      alg.Problem = problem;
32      alg.GuiUpdateInterval = int.MaxValue;
33    }
34
35    [TestCleanup]
36    public void Cleanup() {
37      if (alg.BestTrainingSentence != null) {
38        Console.WriteLine("Training: " + alg.Grammar.PostfixToInfixParser(alg.BestTrainingSentence));
39      }
40    }
41
42
43    private void EvaluateGrammarEnumeration() {
44      // Evaluate results
45      var eps = 1.0 - SuccessThreshold;
46
47      // Check if algorithm terminated correctly
48      Assert.IsTrue(alg.Results.ContainsKey("Best solution (Training)"), "No training solution returned!");
49
50      // Check resultss
51      Assert.AreEqual(1.0, ((IRegressionSolution)alg.Results["Best solution (Training)"].Value).TestRSquared, eps, "Test quality too low!");
52    }
53
54
55    [TestMethod]
56    [TestProperty("Goal", "structure search")]
57    public void NoConstants_Nguyen1() {
58      // x³ + x² + x
59      alg.MaxTreeSize = 12;
60      alg.Problem.ProblemData = new NguyenFunctionOne(Seed).GenerateRegressionData();
61
62      alg.Start();
63
64      TerminalSymbol varSymbol = alg.Grammar.Var.VariableTerminalSymbols.First();
65      TerminalSymbol mulSymbol = alg.Grammar.Multiplication;
66      TerminalSymbol addSymbol = alg.Grammar.Addition;
67
68      SymbolString targetSolution = new SymbolString(new[] {
69        varSymbol, varSymbol, varSymbol, mulSymbol, mulSymbol,
70        varSymbol, varSymbol, mulSymbol, addSymbol,
71        varSymbol, addSymbol
72      });
73
74      int targetSolutionHash = alg.Grammar.CalcHashCode(targetSolution);
75      int actualSolutionHash = alg.Grammar.CalcHashCode(alg.BestTrainingSentence);
76
77      Assert.IsTrue(alg.DistinctSentences.ContainsKey(actualSolutionHash), "Actual solution was not generated!");
78
79      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
80
81      // Evaluate
82      EvaluateGrammarEnumeration();
83    }
84
85    // Too "large" target model for now...
86    //[TestMethod]
87    [TestProperty("Goal", "structure search")]
88    public void NoConstants_Nguyen2() {
89      // x^4 + x³ + x² + x
90      alg.MaxTreeSize = 20;
91      alg.Problem.ProblemData = new NguyenFunctionTwo(Seed).GenerateRegressionData();
92
93      alg.Start();
94      EvaluateGrammarEnumeration();
95    }
96
97    // Too "large" target model for now...
98    //[TestMethod]
99    [TestProperty("Goal", "structure search")]
100    public void NoConstants_Nguyen3() {
101      // x^5 + x^4 + x^3 + x^2 + x
102      alg.MaxTreeSize = 32;
103      alg.Problem.ProblemData = new NguyenFunctionThree(Seed).GenerateRegressionData();
104
105      alg.Start();
106
107      EvaluateGrammarEnumeration();
108    }
109
110    [TestMethod]
111    [TestProperty("Goal", "structure search")]
112    public void NoConstants_Nguyen6() {
113      // sin(x) + sin(x + x²)
114      alg.MaxTreeSize = 13;
115      alg.Problem.ProblemData = new NguyenFunctionSix(Seed).GenerateRegressionData();
116
117      alg.Start();
118
119      TerminalSymbol varSymbol = alg.Grammar.Var.VariableTerminalSymbols.First();
120      TerminalSymbol mulSymbol = alg.Grammar.Multiplication;
121      TerminalSymbol addSymbol = alg.Grammar.Addition;
122      TerminalSymbol sinSymbol = alg.Grammar.Sin;
123
124      SymbolString targetSolution = new SymbolString(new[] {
125        varSymbol, sinSymbol,
126        varSymbol, varSymbol, mulSymbol, varSymbol, addSymbol, sinSymbol, addSymbol
127      });
128
129      int targetSolutionHash = alg.Grammar.CalcHashCode(targetSolution);
130      int actualSolutionHash = alg.Grammar.CalcHashCode(alg.BestTrainingSentence);
131
132      Assert.IsTrue(alg.DistinctSentences.ContainsKey(actualSolutionHash), "Actual solution was not generated!");
133
134      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
135
136      EvaluateGrammarEnumeration();
137    }
138
139    [TestMethod]
140    [TestProperty("Goal", "structure search")]
141    public void NoConstants_Nguyen9() {
142      // sin(x) + sin(y²)
143      alg.MaxTreeSize = 10;
144      alg.Problem.ProblemData = new NguyenFunctionNine(Seed).GenerateRegressionData();
145
146      alg.Start();
147      EvaluateGrammarEnumeration();
148    }
149
150    // Too much variables for now...
151    //[TestMethod]
152    [TestProperty("Goal", "structure search")]
153    public void MctsSymbReg_NoConstants_Poly10() {
154      alg.MaxTreeSize = 10;
155      alg.Problem.ProblemData = new PolyTen(Seed).GenerateRegressionData();
156
157      alg.Start();
158      EvaluateGrammarEnumeration();
159    }
160
161    [TestMethod]
162    [TestProperty("Goal", "structure search")]
163    public void NoConstants_Inverse() {
164      // 1 / (log(x)*x + x)
165      alg.MaxTreeSize = 12;
166
167      var x = Enumerable.Range(0, 100).Select(_ => rand.NextDouble() + 1.1).ToList();
168      var y = x.Select(xi => 1 / (Math.Log(xi) * xi + xi)).ToList();
169      alg.Problem.ProblemData = new RegressionProblemData(new Dataset(new List<string>() { "x", "y" }, new List<IList>() { x, y }), "x".ToEnumerable(), "y");
170
171      alg.Start();
172      EvaluateGrammarEnumeration();
173    }
174
175
176#if false
177
178    [TestMethod]
179    [TestProperty("Goal", "structure search")]
180    public void MctsSymbReg_NoConstants_15() {
181      alg.MaxTreeSize = 5;
182      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
183      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("15")));
184      alg.Problem.ProblemData = regProblem;
185
186      alg.Start();
187      EvaluateGrammarEnumeration();
188    }
189
190
191    [TestMethod]
192    [TestCategory("Algorithms.DataAnalysis")]
193    [TestProperty("Time", "short")]
194    public void MctsSymbReg_NoConstants_Nguyen7() {
195      // log(x + 1) + log(x² + 1)
196      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
197      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F7 ")));
198      TestGrammarEnumeration(regProblem);
199    }
200
201    [TestMethod]
202    [TestCategory("Algorithms.DataAnalysis")]
203    [TestProperty("Time", "short")]
204    public void MctsSymbReg_NoConstants_Poly10_Part1() {
205      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
206      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
207
208      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
209      //  Y' = X1*X2 + X3*X4 + X5*X6
210      // simplify problem by changing target
211      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
212      var ys = ds.GetDoubleValues("Y").ToArray();
213      var x1 = ds.GetDoubleValues("X1").ToArray();
214      var x2 = ds.GetDoubleValues("X2").ToArray();
215      var x3 = ds.GetDoubleValues("X3").ToArray();
216      var x4 = ds.GetDoubleValues("X4").ToArray();
217      var x5 = ds.GetDoubleValues("X5").ToArray();
218      var x6 = ds.GetDoubleValues("X6").ToArray();
219      var x7 = ds.GetDoubleValues("X7").ToArray();
220      var x8 = ds.GetDoubleValues("X8").ToArray();
221      var x9 = ds.GetDoubleValues("X9").ToArray();
222      var x10 = ds.GetDoubleValues("X10").ToArray();
223      for (int i = 0; i < ys.Length; i++) {
224        ys[i] -= x1[i] * x7[i] * x9[i];
225        ys[i] -= x3[i] * x6[i] * x10[i];
226      }
227      ds.ReplaceVariable("Y", ys.ToList());
228
229      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
230
231      TestGrammarEnumeration(modifiedProblemData);
232    }
233
234    [TestMethod]
235    [TestCategory("Algorithms.DataAnalysis")]
236    [TestProperty("Time", "short")]
237    public void MctsSymbReg_NoConstants_Poly10_Part2() {
238      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
239      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
240
241      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
242      //  Y' = X1*X7*X9 + X3*X6*X10
243      // simplify problem by changing target
244      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
245      var ys = ds.GetDoubleValues("Y").ToArray();
246      var x1 = ds.GetDoubleValues("X1").ToArray();
247      var x2 = ds.GetDoubleValues("X2").ToArray();                                           
248      var x3 = ds.GetDoubleValues("X3").ToArray();
249      var x4 = ds.GetDoubleValues("X4").ToArray();
250      var x5 = ds.GetDoubleValues("X5").ToArray();
251      var x6 = ds.GetDoubleValues("X6").ToArray();
252      var x7 = ds.GetDoubleValues("X7").ToArray();
253      var x8 = ds.GetDoubleValues("X8").ToArray();
254      var x9 = ds.GetDoubleValues("X9").ToArray();
255      var x10 = ds.GetDoubleValues("X10").ToArray();
256      for (int i = 0; i < ys.Length; i++) {
257        ys[i] -= x1[i] * x2[i];
258        ys[i] -= x3[i] * x4[i];
259        ys[i] -= x5[i] * x6[i];
260      }
261      ds.ReplaceVariable("Y", ys.ToList());
262
263      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
264
265      TestGrammarEnumeration(modifiedProblemData);
266    }
267
268    [TestMethod]
269    [TestCategory("Algorithms.DataAnalysis")]
270    [TestProperty("Time", "short")]
271    public void MctsSymbReg_NoConstants_Poly10_Part3() {
272      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
273      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
274
275      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
276      //  Y' = X1*X2 + X1*X7*X9
277      // simplify problem by changing target
278      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
279      var ys = ds.GetDoubleValues("Y").ToArray();
280      var x1 = ds.GetDoubleValues("X1").ToArray();
281      var x2 = ds.GetDoubleValues("X2").ToArray();
282      var x3 = ds.GetDoubleValues("X3").ToArray();
283      var x4 = ds.GetDoubleValues("X4").ToArray();
284      var x5 = ds.GetDoubleValues("X5").ToArray();
285      var x6 = ds.GetDoubleValues("X6").ToArray();
286      var x7 = ds.GetDoubleValues("X7").ToArray();
287      var x8 = ds.GetDoubleValues("X8").ToArray();
288      var x9 = ds.GetDoubleValues("X9").ToArray();
289      var x10 = ds.GetDoubleValues("X10").ToArray();
290      for (int i = 0; i < ys.Length; i++) {
291        ys[i] -= x3[i] * x4[i];
292        ys[i] -= x5[i] * x6[i];
293        ys[i] -= x3[i] * x6[i] * x10[i];
294      }
295      ds.ReplaceVariable("Y", ys.ToList());
296
297      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
298
299      TestGrammarEnumeration(modifiedProblemData);
300    }
301
302    [TestMethod]
303    [TestCategory("Algorithms.DataAnalysis")]
304    [TestProperty("Time", "short")]
305    public void MctsSymbReg_NoConstants_Poly10_Part4() {
306      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
307      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
308
309      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
310      //  Y' = X3*X4 + X5*X6 + X3*X6*X10
311      // simplify problem by changing target
312      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
313      var ys = ds.GetDoubleValues("Y").ToArray();
314      var x1 = ds.GetDoubleValues("X1").ToArray();
315      var x2 = ds.GetDoubleValues("X2").ToArray();
316      var x3 = ds.GetDoubleValues("X3").ToArray();
317      var x4 = ds.GetDoubleValues("X4").ToArray();
318      var x5 = ds.GetDoubleValues("X5").ToArray();
319      var x6 = ds.GetDoubleValues("X6").ToArray();
320      var x7 = ds.GetDoubleValues("X7").ToArray();
321      var x8 = ds.GetDoubleValues("X8").ToArray();
322      var x9 = ds.GetDoubleValues("X9").ToArray();
323      var x10 = ds.GetDoubleValues("X10").ToArray();
324      for (int i = 0; i < ys.Length; i++) {
325        ys[i] -= x1[i] * x2[i];
326        ys[i] -= x1[i] * x7[i] * x9[i];
327      }
328      ds.ReplaceVariable("Y", ys.ToList());
329      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
330
331
332      TestGrammarEnumeration(modifiedProblemData);
333    }
334
335    [TestMethod]
336    [TestCategory("Algorithms.DataAnalysis")]
337    [TestProperty("Time", "short")]
338    public void MctsSymbReg_NoConstants_Poly10_Part5() {
339      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
340      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
341
342      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
343      //  Y' = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9
344      // simplify problem by changing target
345      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
346      var ys = ds.GetDoubleValues("Y").ToArray();
347      var x1 = ds.GetDoubleValues("X1").ToArray();
348      var x2 = ds.GetDoubleValues("X2").ToArray();
349      var x3 = ds.GetDoubleValues("X3").ToArray();
350      var x4 = ds.GetDoubleValues("X4").ToArray();
351      var x5 = ds.GetDoubleValues("X5").ToArray();
352      var x6 = ds.GetDoubleValues("X6").ToArray();
353      var x7 = ds.GetDoubleValues("X7").ToArray();
354      var x8 = ds.GetDoubleValues("X8").ToArray();
355      var x9 = ds.GetDoubleValues("X9").ToArray();
356      var x10 = ds.GetDoubleValues("X10").ToArray();
357      for (int i = 0; i < ys.Length; i++) {
358        ys[i] -= x3[i] * x6[i] * x10[i];
359      }
360      ds.ReplaceVariable("Y", ys.ToList());
361      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
362
363
364      TestGrammarEnumeration(modifiedProblemData);
365    }
366
367    [TestMethod]
368    [TestCategory("Algorithms.DataAnalysis")]
369    [TestProperty("Time", "short")]
370    public void MctsSymbReg_NoConstants_Poly10_Part6() {
371      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
372      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
373
374      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
375      //  Y' = X1*X2 + X3*X4 + X5*X6 + X3*X6*X10
376      // simplify problem by changing target
377      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
378      var ys = ds.GetDoubleValues("Y").ToArray();
379      var x1 = ds.GetDoubleValues("X1").ToArray();
380      var x2 = ds.GetDoubleValues("X2").ToArray();
381      var x3 = ds.GetDoubleValues("X3").ToArray();
382      var x4 = ds.GetDoubleValues("X4").ToArray();
383      var x5 = ds.GetDoubleValues("X5").ToArray();
384      var x6 = ds.GetDoubleValues("X6").ToArray();
385      var x7 = ds.GetDoubleValues("X7").ToArray();
386      var x8 = ds.GetDoubleValues("X8").ToArray();
387      var x9 = ds.GetDoubleValues("X9").ToArray();
388      var x10 = ds.GetDoubleValues("X10").ToArray();
389      for (int i = 0; i < ys.Length; i++) {
390        ys[i] -= x1[i] * x7[i] * x9[i];
391      }
392      ds.ReplaceVariable("Y", ys.ToList());
393      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
394
395      TestGrammarEnumeration(modifiedProblemData);
396    }
397
398
399    [TestMethod]
400    [TestCategory("Algorithms.DataAnalysis")]
401    [TestProperty("Time", "long")]
402    public void MctsSymbReg_NoConstants_Poly10_250rows() {
403      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
404      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
405      regProblem.TrainingPartition.Start = 0;
406      regProblem.TrainingPartition.End = regProblem.Dataset.Rows;
407      regProblem.TestPartition.Start = 0;
408      regProblem.TestPartition.End = 2;
409      TestGrammarEnumeration(regProblem);
410    }
411
412    [TestMethod]
413    [TestCategory("Algorithms.DataAnalysis")]
414    [TestProperty("Time", "long")]
415    public void MctsSymbReg_NoConstants_Poly10_10000rows() {
416      // as poly-10 but more rows
417      var x1 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
418      var x2 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
419      var x3 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
420      var x4 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
421      var x5 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
422      var x6 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
423      var x7 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
424      var x8 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
425      var x9 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
426      var x10 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
427      var ys = new List<double>();
428      for (int i = 0; i < x1.Count; i++) {
429        ys.Add(x1[i] * x2[i] + x3[i] * x4[i] + x5[i] * x6[i] + x1[i] * x7[i] * x9[i] + x3[i] * x6[i] * x10[i]);
430      }
431
432      var ds = new Dataset(new string[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "y" },
433        new[] { x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, ys });
434
435
436      var problemData = new RegressionProblemData(ds, new string[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j" }, "y");
437
438      problemData.TrainingPartition.Start = 0;
439      problemData.TrainingPartition.End = problemData.Dataset.Rows;
440      problemData.TestPartition.Start = 0;
441      problemData.TestPartition.End = 2; // must not be empty
442
443
444      TestGrammarEnumeration(problemData);
445    }
446
447    [TestMethod]
448    [TestCategory("Algorithms.DataAnalysis")]
449    [TestProperty("Time", "short")]
450    public void MctsSymbReg_NoConstants_TwoVars() {
451
452      // y = x1 + x2 + x1*x2 + x1*x2*x2 + x1*x1*x2
453      var x1 = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
454      var x2 = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
455      var ys = x1.Zip(x2, (x1i, x2i) => x1i + x2i + x1i * x2i + x1i * x2i * x2i + x1i * x1i * x2i).ToList();
456
457      var ds = new Dataset(new string[] { "a", "b", "y" }, new[] { x1, x2, ys });
458      var problemData = new RegressionProblemData(ds, new string[] { "a", "b" }, "y");
459
460      TestGrammarEnumeration(problemData);
461    }
462
463    [TestMethod]
464    [TestCategory("Algorithms.DataAnalysis")]
465    [TestProperty("Time", "short")]
466    public void MctsSymbReg_NoConstants_Misleading() {
467
468      // y = a + baaaaa (the effect of the second term should be very small)
469      // the alg will quickly find that a has big effect and will search below a
470      // since we prevent a + a... the algorithm must find the correct expression via a + b...
471      // however b has a small effect so the branch might not be identified as relevant
472      var @as = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
473      var bs = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
474      var cs = Enumerable.Range(0, 100).Select(_ => rand.NextDouble() * 1.0e-3).ToList();
475      var ds = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
476      var es = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
477      var ys = new double[@as.Count];
478      for (int i = 0; i < ys.Length; i++)
479        ys[i] = @as[i] + bs[i] + @as[i] * bs[i] * cs[i];
480
481      var dataset = new Dataset(new string[] { "a", "b", "c", "d", "e", "y" }, new[] { @as, bs, cs, ds, es, ys.ToList() });
482
483      var problemData = new RegressionProblemData(dataset, new string[] { "a", "b", "c", "d", "e" }, "y");
484
485      TestGrammarEnumeration(problemData);
486    }
487
488    [TestMethod]
489    [TestCategory("Algorithms.DataAnalysis")]
490    [TestProperty("Time", "short")]
491    public void MctsSymbRegKeijzer7() {
492      // ln(x)
493      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
494      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 7 f(")));
495      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
496      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
497      TestGrammarEnumeration(regProblem);
498    }
499
500   
501    [TestMethod]
502    [TestCategory("Algorithms.DataAnalysis")]
503    [TestProperty("Time", "short")]
504    public void MctsSymbRegBenchmarkNguyen5() {
505      // sin(x²)cos(x) - 1
506      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
507      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F5 ")));
508      TestGrammarEnumeration(regProblem);
509    }
510
511    [TestMethod]
512    [TestCategory("Algorithms.DataAnalysis")]
513    [TestProperty("Time", "short")]
514    public void MctsSymbRegBenchmarkNguyen6() {
515      // sin(x) + sin(x + x²)
516      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
517      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F6 ")));
518      TestGrammarEnumeration(regProblem);
519    }
520   
521    [TestMethod]
522    [TestCategory("Algorithms.DataAnalysis")]
523    [TestProperty("Time", "short")]
524    public void MctsSymbRegBenchmarkNguyen7() {
525      //  log(x + 1) + log(x² + 1)
526      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
527      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F7 ")));
528      TestGrammarEnumeration(regProblem);
529    }
530    [TestMethod]
531    [TestCategory("Algorithms.DataAnalysis")]
532    [TestProperty("Time", "short")]
533    public void MctsSymbRegBenchmarkNguyen8() {
534      // Sqrt(x)
535      // = x ^ 0.5
536      // = exp(0.5 * log(x))
537      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
538      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F8 ")));
539      TestGrammarEnumeration(regProblem);
540    }
541   
542    // [TestMethod]
543    [TestCategory("Algorithms.DataAnalysis")]
544    [TestProperty("Time", "short")]
545    public void MctsSymbRegBenchmarkNguyen9() {
546      //  sin(x) + sin(y²)
547      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
548      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F9 ")));
549      TestGrammarEnumeration(regProblem);
550    }
551
552    // [TestMethod]
553    [TestCategory("Algorithms.DataAnalysis")]
554    [TestProperty("Time", "short")]
555    public void MctsSymbRegBenchmarkNguyen10() {
556      // 2sin(x)cos(y)
557      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
558      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F10 ")));
559      TestGrammarEnumeration(regProblem);
560    }
561   
562    [TestMethod]
563    [TestCategory("Algorithms.DataAnalysis")]
564    [TestProperty("Time", "short")]
565    public void MctsSymbRegBenchmarkNguyen11() {
566      // x ^ y  , x > 0, y > 0   
567      // = exp(y * log(x))
568      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
569      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F11 ")));
570      TestGrammarEnumeration(regProblem);
571    }
572    [TestMethod]
573    [TestCategory("Algorithms.DataAnalysis")]
574    [TestProperty("Time", "short")]
575    public void MctsSymbRegBenchmarkNguyen12() {
576      // x^4 - x³ + y²/2 - y
577      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
578      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F12 ")));
579      TestGrammarEnumeration(regProblem);
580    }
581
582    [TestMethod]
583    [TestCategory("Algorithms.DataAnalysis")]
584    [TestProperty("Time", "long")]
585    public void MctsSymbRegBenchmarkKeijzer5() {
586      // (30 * x * z) / ((x - 10)  * y²)
587      // = 30 x z / (xy² - y²)
588      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
589      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 5 f(")));
590      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
591      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
592      TestGrammarEnumeration(regProblem);
593    }
594
595    [TestMethod]
596    [TestCategory("Algorithms.DataAnalysis")]
597    [TestProperty("Time", "short")]
598    public void MctsSymbRegBenchmarkKeijzer6() {
599      // Keijzer 6 f(x) = Sum(1 / i) From 1 to X  , x \in [0..120]
600      // we can only approximate this
601      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
602      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 6 f(")));
603      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
604      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
605      TestGrammarEnumeration(regProblem);
606    }
607
608    [TestMethod]
609    [TestCategory("Algorithms.DataAnalysis")]
610    [TestProperty("Time", "short")]
611    public void MctsSymbRegBenchmarkKeijzer8() {
612      // sqrt(x)
613      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
614      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 8 f(")));
615      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
616      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
617      TestGrammarEnumeration(regProblem);
618    }
619
620    [TestMethod]
621    [TestCategory("Algorithms.DataAnalysis")]
622    [TestProperty("Time", "short")]
623    public void MctsSymbRegBenchmarkKeijzer9() {
624      // arcsinh(x)  i.e. ln(x + sqrt(x² + 1))
625      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
626      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 9 f(")));
627      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
628      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
629      TestGrammarEnumeration(regProblem);
630    }
631
632    [TestMethod]
633    [TestCategory("Algorithms.DataAnalysis")]
634    [TestProperty("Time", "short")]
635    public void MctsSymbRegBenchmarkKeijzer11() {
636      // xy + sin( (x-1) (y-1) )
637      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider();
638      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 11 f(")));
639      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
640      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
641      TestGrammarEnumeration(regProblem);
642    }
643
644    [TestMethod]
645    [TestCategory("Algorithms.DataAnalysis")]
646    [TestProperty("Time", "short")]
647    public void MctsSymbRegBenchmarkKeijzer12() {
648      // x^4 - x³ + y² / 2 - y,  same as Nguyen 12             
649      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
650      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 12 f(")));
651      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
652      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
653      TestGrammarEnumeration(regProblem);
654    }
655
656    [TestMethod]
657    [TestCategory("Algorithms.DataAnalysis")]
658    [TestProperty("Time", "short")]
659    public void MctsSymbRegBenchmarkKeijzer14() {
660      // 8 / (2 + x² + y²)
661      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
662      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 14 f(")));
663      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
664      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
665      TestGrammarEnumeration(regProblem);
666    }
667
668    [TestMethod]
669    [TestCategory("Algorithms.DataAnalysis")]
670    [TestProperty("Time", "short")]
671    public void MctsSymbRegBenchmarkKeijzer15() {
672      // x³ / 5 + y³ / 2 - y - x
673      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
674      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 15 f(")));
675      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
676      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
677      TestGrammarEnumeration(regProblem);
678    }
679#endif
680  }
681}
Note: See TracBrowser for help on using the repository browser.