Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/Test/GrammarEnumerationTest.cs @ 15849

Last change on this file since 15849 was 15849, checked in by lkammere, 7 years ago

#2886: Add constants to grammar.

File size: 32.2 KB
Line 
1using System;
2using System.Collections;
3using System.Collections.Generic;
4using System.Linq;
5using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration;
6using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
7using HeuristicLab.Common;
8using HeuristicLab.Core;
9using HeuristicLab.Problems.DataAnalysis;
10using HeuristicLab.Problems.Instances.DataAnalysis;
11using HeuristicLab.Random;
12using Microsoft.VisualStudio.TestTools.UnitTesting;
13
14namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
15  [TestClass]
16  public class MctsSymbolicRegressionTest {
17    private const int Seed = 1234;
18    private IRandom rand;
19
20    private const double SuccessThreshold = 0.9999999;
21
22    private GrammarEnumerationAlgorithm alg;
23    private RegressionProblem problem;
24
25    [TestInitialize]
26    public void InitTest() {
27      rand = new FastRandom(Seed);
28
29      alg = new GrammarEnumerationAlgorithm();
30      problem = new RegressionProblem();
31      alg.Problem = problem;
32      alg.GuiUpdateInterval = int.MaxValue;
33      foreach (IGrammarEnumerationAnalyzer grammarEnumerationAnalyzer in alg.Analyzers) {
34        alg.Analyzers.SetItemCheckedState(grammarEnumerationAnalyzer, grammarEnumerationAnalyzer is RSquaredEvaluator);
35      }
36    }
37
38    [TestCleanup]
39    public void Cleanup() {
40      if (alg.BestTrainingSentence != null) {
41        Console.WriteLine("Training: " + alg.Grammar.ToInfixString(alg.BestTrainingSentence));
42      }
43    }
44
45
46    private void EvaluateGrammarEnumeration() {
47      // Evaluate results
48      var eps = 1.0 - SuccessThreshold;
49
50      // Check if algorithm terminated correctly
51      Assert.IsTrue(alg.Results.ContainsKey("Best solution (Training)"), "No training solution returned!");
52
53      // Check resultss
54      Assert.AreEqual(1.0, ((IRegressionSolution)alg.Results["Best solution (Training)"].Value).TestRSquared, eps, "Test quality too low!");
55    }
56
57
58    [TestMethod]
59    [TestProperty("Goal", "structure search")]
60    public void NoConstants_Nguyen1() {
61      // x³ + x² + x
62      alg.MaxTreeSize = 20;
63      alg.Problem.ProblemData = new NguyenFunctionOne(Seed).GenerateRegressionData();
64
65      alg.Start();
66
67      TerminalSymbol constSymbol = alg.Grammar.Const;
68      TerminalSymbol varSymbol = alg.Grammar.VarTerminals.First();
69      TerminalSymbol mulSymbol = alg.Grammar.Multiplication;
70      TerminalSymbol addSymbol = alg.Grammar.Addition;
71
72      SymbolString targetSolution = new SymbolString(new[] {
73        constSymbol, varSymbol, varSymbol, varSymbol, mulSymbol, mulSymbol, mulSymbol,
74        constSymbol, varSymbol, varSymbol, mulSymbol, mulSymbol, addSymbol,
75        constSymbol, varSymbol, mulSymbol, addSymbol,
76        constSymbol, addSymbol
77      });
78
79      int targetSolutionHash = alg.Grammar.Hasher.CalcHashCode(targetSolution);
80      int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence);
81
82      Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
83
84      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
85
86      // Evaluate
87      EvaluateGrammarEnumeration();
88    }
89
90    // Too "large" target model for now...
91    //[TestMethod]
92    [TestProperty("Goal", "structure search")]
93    public void NoConstants_Nguyen2() {
94      // x^4 + x³ + x² + x
95      alg.MaxTreeSize = 30;
96      alg.Problem.ProblemData = new NguyenFunctionTwo(Seed).GenerateRegressionData();
97
98      alg.Start();
99      EvaluateGrammarEnumeration();
100    }
101
102    // Too "large" target model for now...
103    //[TestMethod]
104    [TestProperty("Goal", "structure search")]
105    public void NoConstants_Nguyen3() {
106      // x^5 + x^4 + x^3 + x^2 + x
107      alg.MaxTreeSize = 32;
108      alg.Problem.ProblemData = new NguyenFunctionThree(Seed).GenerateRegressionData();
109
110      alg.Start();
111
112      EvaluateGrammarEnumeration();
113    }
114
115    [TestMethod]
116    [TestProperty("Goal", "structure search")]
117    public void NoConstants_Nguyen6() {
118      // sin(x) + sin(x + x²)
119      alg.MaxTreeSize = 25;
120      alg.Problem.ProblemData = new NguyenFunctionSix(Seed).GenerateRegressionData();
121
122      alg.Start();
123
124      TerminalSymbol constSymbol = alg.Grammar.Const;
125      TerminalSymbol varSymbol = alg.Grammar.VarTerminals.First();
126      TerminalSymbol mulSymbol = alg.Grammar.Multiplication;
127      TerminalSymbol addSymbol = alg.Grammar.Addition;
128      TerminalSymbol sinSymbol = alg.Grammar.Sin;
129
130      // c * sin(c x + c) + c * sin(c * x * x + c * x) + c
131      SymbolString targetSolution = new SymbolString(new[] {
132        varSymbol, constSymbol, mulSymbol, constSymbol, addSymbol, sinSymbol, constSymbol, mulSymbol,
133        varSymbol, varSymbol, mulSymbol, constSymbol, mulSymbol, varSymbol, constSymbol, mulSymbol, addSymbol, constSymbol, addSymbol, sinSymbol, constSymbol, mulSymbol, addSymbol,
134        constSymbol, addSymbol
135      });
136
137      int targetSolutionHash = alg.Grammar.Hasher.CalcHashCode(targetSolution);
138      int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence);
139
140      Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
141      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
142
143      EvaluateGrammarEnumeration();
144    }
145
146    [TestMethod]
147    [TestProperty("Goal", "structure search")]
148    public void NoConstants_Nguyen9() {
149      // sin(x) + sin(y²)
150      alg.MaxTreeSize = 22;
151      alg.Problem.ProblemData = new NguyenFunctionNine(Seed).GenerateRegressionData();
152
153      alg.Start();
154
155      TerminalSymbol xSymbol = alg.Grammar.VarTerminals.First(v => v.StringRepresentation == "X");
156      TerminalSymbol ySymbol = alg.Grammar.VarTerminals.First(v => v.StringRepresentation == "Y");
157      TerminalSymbol constSymbol = alg.Grammar.Const;
158      TerminalSymbol mulSymbol = alg.Grammar.Multiplication;
159      TerminalSymbol addSymbol = alg.Grammar.Addition;
160      TerminalSymbol sinSymbol = alg.Grammar.Sin;
161
162      // c*sin(c*x + c) + c*sin(c*y*y + c) + c
163      SymbolString targetSolution = new SymbolString(new[] {
164        xSymbol, constSymbol, mulSymbol, constSymbol, addSymbol, sinSymbol, constSymbol, mulSymbol,
165        ySymbol, ySymbol, mulSymbol, constSymbol, mulSymbol, constSymbol, addSymbol, sinSymbol, constSymbol, mulSymbol, addSymbol,
166        constSymbol, addSymbol
167      });
168
169      int targetSolutionHash = alg.Grammar.Hasher.CalcHashCode(targetSolution);
170      int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence);
171
172      Assert.IsTrue(alg.DistinctSentencesLength.ContainsKey(targetSolutionHash), "Actual solution was not generated!");
173      Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one.");
174
175      EvaluateGrammarEnumeration();
176    }
177
178    // Too much variables for now...
179    //[TestMethod]
180    [TestProperty("Goal", "structure search")]
181    public void MctsSymbReg_NoConstants_Poly10() {
182      alg.MaxTreeSize = 10;
183      alg.Problem.ProblemData = new PolyTen(Seed).GenerateRegressionData();
184
185      alg.Start();
186      EvaluateGrammarEnumeration();
187    }
188
189    [TestMethod]
190    [TestProperty("Goal", "structure search")]
191    public void NoConstants_Inverse() {
192      // x / (log(x)*x + x)
193      alg.MaxTreeSize = 23;
194
195      var x = Enumerable.Range(0, 100).Select(_ => rand.NextDouble() + 1.1).ToList();
196      var y = x.Select(xi => xi / (Math.Log(xi) * xi + xi)).ToList();
197      alg.Problem.ProblemData = new RegressionProblemData(new Dataset(new List<string>() { "x", "y" }, new List<IList>() { x, y }), "x".ToEnumerable(), "y");
198
199      alg.Start();
200      EvaluateGrammarEnumeration();
201    }
202
203
204#if false
205
206    [TestMethod]
207    [TestProperty("Goal", "structure search")]
208    public void MctsSymbReg_NoConstants_15() {
209      alg.MaxTreeSize = 5;
210      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
211      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("15")));
212      alg.Problem.ProblemData = regProblem;
213
214      alg.Start();
215      EvaluateGrammarEnumeration();
216    }
217
218
219    [TestMethod]
220    [TestCategory("Algorithms.DataAnalysis")]
221    [TestProperty("Time", "short")]
222    public void MctsSymbReg_NoConstants_Nguyen7() {
223      // log(x + 1) + log(x² + 1)
224      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
225      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F7 ")));
226      TestGrammarEnumeration(regProblem);
227    }
228
229    [TestMethod]
230    [TestCategory("Algorithms.DataAnalysis")]
231    [TestProperty("Time", "short")]
232    public void MctsSymbReg_NoConstants_Poly10_Part1() {
233      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
234      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
235
236      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
237      //  Y' = X1*X2 + X3*X4 + X5*X6
238      // simplify problem by changing target
239      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
240      var ys = ds.GetDoubleValues("Y").ToArray();
241      var x1 = ds.GetDoubleValues("X1").ToArray();
242      var x2 = ds.GetDoubleValues("X2").ToArray();
243      var x3 = ds.GetDoubleValues("X3").ToArray();
244      var x4 = ds.GetDoubleValues("X4").ToArray();
245      var x5 = ds.GetDoubleValues("X5").ToArray();
246      var x6 = ds.GetDoubleValues("X6").ToArray();
247      var x7 = ds.GetDoubleValues("X7").ToArray();
248      var x8 = ds.GetDoubleValues("X8").ToArray();
249      var x9 = ds.GetDoubleValues("X9").ToArray();
250      var x10 = ds.GetDoubleValues("X10").ToArray();
251      for (int i = 0; i < ys.Length; i++) {
252        ys[i] -= x1[i] * x7[i] * x9[i];
253        ys[i] -= x3[i] * x6[i] * x10[i];
254      }
255      ds.ReplaceVariable("Y", ys.ToList());
256
257      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
258
259      TestGrammarEnumeration(modifiedProblemData);
260    }
261
262    [TestMethod]
263    [TestCategory("Algorithms.DataAnalysis")]
264    [TestProperty("Time", "short")]
265    public void MctsSymbReg_NoConstants_Poly10_Part2() {
266      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
267      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
268
269      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
270      //  Y' = X1*X7*X9 + X3*X6*X10
271      // simplify problem by changing target
272      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
273      var ys = ds.GetDoubleValues("Y").ToArray();
274      var x1 = ds.GetDoubleValues("X1").ToArray();
275      var x2 = ds.GetDoubleValues("X2").ToArray();                                           
276      var x3 = ds.GetDoubleValues("X3").ToArray();
277      var x4 = ds.GetDoubleValues("X4").ToArray();
278      var x5 = ds.GetDoubleValues("X5").ToArray();
279      var x6 = ds.GetDoubleValues("X6").ToArray();
280      var x7 = ds.GetDoubleValues("X7").ToArray();
281      var x8 = ds.GetDoubleValues("X8").ToArray();
282      var x9 = ds.GetDoubleValues("X9").ToArray();
283      var x10 = ds.GetDoubleValues("X10").ToArray();
284      for (int i = 0; i < ys.Length; i++) {
285        ys[i] -= x1[i] * x2[i];
286        ys[i] -= x3[i] * x4[i];
287        ys[i] -= x5[i] * x6[i];
288      }
289      ds.ReplaceVariable("Y", ys.ToList());
290
291      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
292
293      TestGrammarEnumeration(modifiedProblemData);
294    }
295
296    [TestMethod]
297    [TestCategory("Algorithms.DataAnalysis")]
298    [TestProperty("Time", "short")]
299    public void MctsSymbReg_NoConstants_Poly10_Part3() {
300      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
301      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
302
303      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
304      //  Y' = X1*X2 + X1*X7*X9
305      // simplify problem by changing target
306      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
307      var ys = ds.GetDoubleValues("Y").ToArray();
308      var x1 = ds.GetDoubleValues("X1").ToArray();
309      var x2 = ds.GetDoubleValues("X2").ToArray();
310      var x3 = ds.GetDoubleValues("X3").ToArray();
311      var x4 = ds.GetDoubleValues("X4").ToArray();
312      var x5 = ds.GetDoubleValues("X5").ToArray();
313      var x6 = ds.GetDoubleValues("X6").ToArray();
314      var x7 = ds.GetDoubleValues("X7").ToArray();
315      var x8 = ds.GetDoubleValues("X8").ToArray();
316      var x9 = ds.GetDoubleValues("X9").ToArray();
317      var x10 = ds.GetDoubleValues("X10").ToArray();
318      for (int i = 0; i < ys.Length; i++) {
319        ys[i] -= x3[i] * x4[i];
320        ys[i] -= x5[i] * x6[i];
321        ys[i] -= x3[i] * x6[i] * x10[i];
322      }
323      ds.ReplaceVariable("Y", ys.ToList());
324
325      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
326
327      TestGrammarEnumeration(modifiedProblemData);
328    }
329
330    [TestMethod]
331    [TestCategory("Algorithms.DataAnalysis")]
332    [TestProperty("Time", "short")]
333    public void MctsSymbReg_NoConstants_Poly10_Part4() {
334      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
335      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
336
337      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
338      //  Y' = X3*X4 + X5*X6 + X3*X6*X10
339      // simplify problem by changing target
340      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
341      var ys = ds.GetDoubleValues("Y").ToArray();
342      var x1 = ds.GetDoubleValues("X1").ToArray();
343      var x2 = ds.GetDoubleValues("X2").ToArray();
344      var x3 = ds.GetDoubleValues("X3").ToArray();
345      var x4 = ds.GetDoubleValues("X4").ToArray();
346      var x5 = ds.GetDoubleValues("X5").ToArray();
347      var x6 = ds.GetDoubleValues("X6").ToArray();
348      var x7 = ds.GetDoubleValues("X7").ToArray();
349      var x8 = ds.GetDoubleValues("X8").ToArray();
350      var x9 = ds.GetDoubleValues("X9").ToArray();
351      var x10 = ds.GetDoubleValues("X10").ToArray();
352      for (int i = 0; i < ys.Length; i++) {
353        ys[i] -= x1[i] * x2[i];
354        ys[i] -= x1[i] * x7[i] * x9[i];
355      }
356      ds.ReplaceVariable("Y", ys.ToList());
357      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
358
359
360      TestGrammarEnumeration(modifiedProblemData);
361    }
362
363    [TestMethod]
364    [TestCategory("Algorithms.DataAnalysis")]
365    [TestProperty("Time", "short")]
366    public void MctsSymbReg_NoConstants_Poly10_Part5() {
367      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
368      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
369
370      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
371      //  Y' = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9
372      // simplify problem by changing target
373      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
374      var ys = ds.GetDoubleValues("Y").ToArray();
375      var x1 = ds.GetDoubleValues("X1").ToArray();
376      var x2 = ds.GetDoubleValues("X2").ToArray();
377      var x3 = ds.GetDoubleValues("X3").ToArray();
378      var x4 = ds.GetDoubleValues("X4").ToArray();
379      var x5 = ds.GetDoubleValues("X5").ToArray();
380      var x6 = ds.GetDoubleValues("X6").ToArray();
381      var x7 = ds.GetDoubleValues("X7").ToArray();
382      var x8 = ds.GetDoubleValues("X8").ToArray();
383      var x9 = ds.GetDoubleValues("X9").ToArray();
384      var x10 = ds.GetDoubleValues("X10").ToArray();
385      for (int i = 0; i < ys.Length; i++) {
386        ys[i] -= x3[i] * x6[i] * x10[i];
387      }
388      ds.ReplaceVariable("Y", ys.ToList());
389      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
390
391
392      TestGrammarEnumeration(modifiedProblemData);
393    }
394
395    [TestMethod]
396    [TestCategory("Algorithms.DataAnalysis")]
397    [TestProperty("Time", "short")]
398    public void MctsSymbReg_NoConstants_Poly10_Part6() {
399      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
400      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
401
402      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
403      //  Y' = X1*X2 + X3*X4 + X5*X6 + X3*X6*X10
404      // simplify problem by changing target
405      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
406      var ys = ds.GetDoubleValues("Y").ToArray();
407      var x1 = ds.GetDoubleValues("X1").ToArray();
408      var x2 = ds.GetDoubleValues("X2").ToArray();
409      var x3 = ds.GetDoubleValues("X3").ToArray();
410      var x4 = ds.GetDoubleValues("X4").ToArray();
411      var x5 = ds.GetDoubleValues("X5").ToArray();
412      var x6 = ds.GetDoubleValues("X6").ToArray();
413      var x7 = ds.GetDoubleValues("X7").ToArray();
414      var x8 = ds.GetDoubleValues("X8").ToArray();
415      var x9 = ds.GetDoubleValues("X9").ToArray();
416      var x10 = ds.GetDoubleValues("X10").ToArray();
417      for (int i = 0; i < ys.Length; i++) {
418        ys[i] -= x1[i] * x7[i] * x9[i];
419      }
420      ds.ReplaceVariable("Y", ys.ToList());
421      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
422
423      TestGrammarEnumeration(modifiedProblemData);
424    }
425
426
427    [TestMethod]
428    [TestCategory("Algorithms.DataAnalysis")]
429    [TestProperty("Time", "long")]
430    public void MctsSymbReg_NoConstants_Poly10_250rows() {
431      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed);
432      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
433      regProblem.TrainingPartition.Start = 0;
434      regProblem.TrainingPartition.End = regProblem.Dataset.Rows;
435      regProblem.TestPartition.Start = 0;
436      regProblem.TestPartition.End = 2;
437      TestGrammarEnumeration(regProblem);
438    }
439
440    [TestMethod]
441    [TestCategory("Algorithms.DataAnalysis")]
442    [TestProperty("Time", "long")]
443    public void MctsSymbReg_NoConstants_Poly10_10000rows() {
444      // as poly-10 but more rows
445      var x1 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
446      var x2 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
447      var x3 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
448      var x4 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
449      var x5 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
450      var x6 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
451      var x7 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
452      var x8 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
453      var x9 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
454      var x10 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
455      var ys = new List<double>();
456      for (int i = 0; i < x1.Count; i++) {
457        ys.Add(x1[i] * x2[i] + x3[i] * x4[i] + x5[i] * x6[i] + x1[i] * x7[i] * x9[i] + x3[i] * x6[i] * x10[i]);
458      }
459
460      var ds = new Dataset(new string[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "y" },
461        new[] { x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, ys });
462
463
464      var problemData = new RegressionProblemData(ds, new string[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j" }, "y");
465
466      problemData.TrainingPartition.Start = 0;
467      problemData.TrainingPartition.End = problemData.Dataset.Rows;
468      problemData.TestPartition.Start = 0;
469      problemData.TestPartition.End = 2; // must not be empty
470
471
472      TestGrammarEnumeration(problemData);
473    }
474
475    [TestMethod]
476    [TestCategory("Algorithms.DataAnalysis")]
477    [TestProperty("Time", "short")]
478    public void MctsSymbReg_NoConstants_TwoVars() {
479
480      // y = x1 + x2 + x1*x2 + x1*x2*x2 + x1*x1*x2
481      var x1 = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
482      var x2 = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
483      var ys = x1.Zip(x2, (x1i, x2i) => x1i + x2i + x1i * x2i + x1i * x2i * x2i + x1i * x1i * x2i).ToList();
484
485      var ds = new Dataset(new string[] { "a", "b", "y" }, new[] { x1, x2, ys });
486      var problemData = new RegressionProblemData(ds, new string[] { "a", "b" }, "y");
487
488      TestGrammarEnumeration(problemData);
489    }
490
491    [TestMethod]
492    [TestCategory("Algorithms.DataAnalysis")]
493    [TestProperty("Time", "short")]
494    public void MctsSymbReg_NoConstants_Misleading() {
495
496      // y = a + baaaaa (the effect of the second term should be very small)
497      // the alg will quickly find that a has big effect and will search below a
498      // since we prevent a + a... the algorithm must find the correct expression via a + b...
499      // however b has a small effect so the branch might not be identified as relevant
500      var @as = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
501      var bs = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
502      var cs = Enumerable.Range(0, 100).Select(_ => rand.NextDouble() * 1.0e-3).ToList();
503      var ds = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
504      var es = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
505      var ys = new double[@as.Count];
506      for (int i = 0; i < ys.Length; i++)
507        ys[i] = @as[i] + bs[i] + @as[i] * bs[i] * cs[i];
508
509      var dataset = new Dataset(new string[] { "a", "b", "c", "d", "e", "y" }, new[] { @as, bs, cs, ds, es, ys.ToList() });
510
511      var problemData = new RegressionProblemData(dataset, new string[] { "a", "b", "c", "d", "e" }, "y");
512
513      TestGrammarEnumeration(problemData);
514    }
515
516    [TestMethod]
517    [TestCategory("Algorithms.DataAnalysis")]
518    [TestProperty("Time", "short")]
519    public void MctsSymbRegKeijzer7() {
520      // ln(x)
521      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
522      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 7 f(")));
523      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
524      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
525      TestGrammarEnumeration(regProblem);
526    }
527
528   
529    [TestMethod]
530    [TestCategory("Algorithms.DataAnalysis")]
531    [TestProperty("Time", "short")]
532    public void MctsSymbRegBenchmarkNguyen5() {
533      // sin(x²)cos(x) - 1
534      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
535      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F5 ")));
536      TestGrammarEnumeration(regProblem);
537    }
538
539    [TestMethod]
540    [TestCategory("Algorithms.DataAnalysis")]
541    [TestProperty("Time", "short")]
542    public void MctsSymbRegBenchmarkNguyen6() {
543      // sin(x) + sin(x + x²)
544      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
545      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F6 ")));
546      TestGrammarEnumeration(regProblem);
547    }
548   
549    [TestMethod]
550    [TestCategory("Algorithms.DataAnalysis")]
551    [TestProperty("Time", "short")]
552    public void MctsSymbRegBenchmarkNguyen7() {
553      //  log(x + 1) + log(x² + 1)
554      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
555      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F7 ")));
556      TestGrammarEnumeration(regProblem);
557    }
558    [TestMethod]
559    [TestCategory("Algorithms.DataAnalysis")]
560    [TestProperty("Time", "short")]
561    public void MctsSymbRegBenchmarkNguyen8() {
562      // Sqrt(x)
563      // = x ^ 0.5
564      // = exp(0.5 * log(x))
565      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
566      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F8 ")));
567      TestGrammarEnumeration(regProblem);
568    }
569   
570    // [TestMethod]
571    [TestCategory("Algorithms.DataAnalysis")]
572    [TestProperty("Time", "short")]
573    public void MctsSymbRegBenchmarkNguyen9() {
574      //  sin(x) + sin(y²)
575      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
576      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F9 ")));
577      TestGrammarEnumeration(regProblem);
578    }
579
580    // [TestMethod]
581    [TestCategory("Algorithms.DataAnalysis")]
582    [TestProperty("Time", "short")]
583    public void MctsSymbRegBenchmarkNguyen10() {
584      // 2sin(x)cos(y)
585      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
586      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F10 ")));
587      TestGrammarEnumeration(regProblem);
588    }
589   
590    [TestMethod]
591    [TestCategory("Algorithms.DataAnalysis")]
592    [TestProperty("Time", "short")]
593    public void MctsSymbRegBenchmarkNguyen11() {
594      // x ^ y  , x > 0, y > 0   
595      // = exp(y * log(x))
596      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
597      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F11 ")));
598      TestGrammarEnumeration(regProblem);
599    }
600    [TestMethod]
601    [TestCategory("Algorithms.DataAnalysis")]
602    [TestProperty("Time", "short")]
603    public void MctsSymbRegBenchmarkNguyen12() {
604      // x^4 - x³ + y²/2 - y
605      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed);
606      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F12 ")));
607      TestGrammarEnumeration(regProblem);
608    }
609
610    [TestMethod]
611    [TestCategory("Algorithms.DataAnalysis")]
612    [TestProperty("Time", "long")]
613    public void MctsSymbRegBenchmarkKeijzer5() {
614      // (30 * x * z) / ((x - 10)  * y²)
615      // = 30 x z / (xy² - y²)
616      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
617      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 5 f(")));
618      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
619      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
620      TestGrammarEnumeration(regProblem);
621    }
622
623    [TestMethod]
624    [TestCategory("Algorithms.DataAnalysis")]
625    [TestProperty("Time", "short")]
626    public void MctsSymbRegBenchmarkKeijzer6() {
627      // Keijzer 6 f(x) = Sum(1 / i) From 1 to X  , x \in [0..120]
628      // we can only approximate this
629      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
630      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 6 f(")));
631      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
632      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
633      TestGrammarEnumeration(regProblem);
634    }
635
636    [TestMethod]
637    [TestCategory("Algorithms.DataAnalysis")]
638    [TestProperty("Time", "short")]
639    public void MctsSymbRegBenchmarkKeijzer8() {
640      // sqrt(x)
641      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
642      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 8 f(")));
643      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
644      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
645      TestGrammarEnumeration(regProblem);
646    }
647
648    [TestMethod]
649    [TestCategory("Algorithms.DataAnalysis")]
650    [TestProperty("Time", "short")]
651    public void MctsSymbRegBenchmarkKeijzer9() {
652      // arcsinh(x)  i.e. ln(x + sqrt(x² + 1))
653      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
654      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 9 f(")));
655      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
656      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
657      TestGrammarEnumeration(regProblem);
658    }
659
660    [TestMethod]
661    [TestCategory("Algorithms.DataAnalysis")]
662    [TestProperty("Time", "short")]
663    public void MctsSymbRegBenchmarkKeijzer11() {
664      // xy + sin( (x-1) (y-1) )
665      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider();
666      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 11 f(")));
667      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
668      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
669      TestGrammarEnumeration(regProblem);
670    }
671
672    [TestMethod]
673    [TestCategory("Algorithms.DataAnalysis")]
674    [TestProperty("Time", "short")]
675    public void MctsSymbRegBenchmarkKeijzer12() {
676      // x^4 - x³ + y² / 2 - y,  same as Nguyen 12             
677      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
678      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 12 f(")));
679      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
680      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
681      TestGrammarEnumeration(regProblem);
682    }
683
684    [TestMethod]
685    [TestCategory("Algorithms.DataAnalysis")]
686    [TestProperty("Time", "short")]
687    public void MctsSymbRegBenchmarkKeijzer14() {
688      // 8 / (2 + x² + y²)
689      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
690      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 14 f(")));
691      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
692      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
693      TestGrammarEnumeration(regProblem);
694    }
695
696    [TestMethod]
697    [TestCategory("Algorithms.DataAnalysis")]
698    [TestProperty("Time", "short")]
699    public void MctsSymbRegBenchmarkKeijzer15() {
700      // x³ / 5 + y³ / 2 - y - x
701      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed);
702      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 15 f(")));
703      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
704      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
705      TestGrammarEnumeration(regProblem);
706    }
707#endif
708  }
709}
Note: See TracBrowser for help on using the repository browser.