Free cookie consent management tool by TermsFeed Policy Generator

source: branches/MCTS-SymbReg-2796/Tests/HeuristicLab.Algorithms.DataAnalysis-3.4/MctsSymbolicRegressionTest.cs @ 15438

Last change on this file since 15438 was 15438, checked in by gkronber, 7 years ago

#2796 refactoring to simplify the code

File size: 67.5 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Threading;
5using HeuristicLab.Algorithms.DataAnalysis.MCTSSymbReg;
6using HeuristicLab.Data;
7using HeuristicLab.Optimization;
8using HeuristicLab.Problems.DataAnalysis;
9using HeuristicLab.Problems.Instances.DataAnalysis;
10using HeuristicLab.Random;
11using Microsoft.VisualStudio.TestTools.UnitTesting;
12
13namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
14  [TestClass()]
15  public class MctsSymbolicRegressionTest {
16    #region heuristics
17    [TestMethod]
18    [TestCategory("Algorithms.DataAnalysis")]
19    [TestProperty("Time", "short")]
20    public void TestSimple2dInteractions() {
21      {
22        // a, b ~ U(0, 1) should be trivial
23        var nRand = new MersenneTwister(1234);
24
25        int n = 10000; // large sample so that we can use the thresholds below
26        var a = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
27        var b = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
28        var x = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
29        var y = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
30
31        var z = a.Zip(b, (ai, bi) => ai * bi).ToArray();
32
33        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05); // should be detected as relevant
34        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) > 0.05); // a and b > 0 so these should be detected as well
35        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) > 0.05);
36        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) > 0.05);
37        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) > 0.05);
38        Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05);
39      }
40      {
41        // a, b ~ U(1000, 2000) also trivial
42        var nRand = new UniformDistributedRandom(new MersenneTwister(1234), 1000, 2000);
43
44        int n = 10000; // large sample so that we can use the thresholds below
45        var a = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
46        var b = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
47        var x = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
48        var y = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
49
50        var z = a.Zip(b, (ai, bi) => ai * bi).ToArray();
51
52        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05); // should be detected as relevant
53        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) > 0.05);
54        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) > 0.05);
55        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) > 0.05);
56        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) > 0.05);
57        Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05);
58      }
59      {
60        // a, b ~ U(-1, 1)
61        var nRand = new UniformDistributedRandom(new MersenneTwister(1234), -1, 1);
62
63        int n = 10000; // large sample so that we can use the thresholds below
64        var a = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
65        var b = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
66        var x = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
67        var y = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
68
69        var z = a.Zip(b, (ai, bi) => ai * bi).ToArray();
70
71        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05); // should be detected as relevant
72        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) < 0.05);
73        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) < 0.05);
74        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) < 0.05);
75        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) < 0.05);
76        Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05);
77      }
78      {
79        // a, b ~ N(0, 1)
80        var nRand = new NormalDistributedRandom(new MersenneTwister(1234), 0, 1);
81
82        int n = 10000; // large sample so that we can use the thresholds below
83        var a = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
84        var b = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
85        var x = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
86        var y = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
87
88        var z = a.Zip(b, (ai, bi) => ai * bi).ToArray();
89
90        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05); // should be detected as relevant
91        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) < 0.05);
92        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) < 0.05);
93        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) < 0.05);
94        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) < 0.05);
95        Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05);
96      }
97      {
98        var rand = new MersenneTwister(1234);
99        // a ~ N(100, 1), b ~ N(-100, 1)
100        var nRand = new NormalDistributedRandom(rand, 0, 1);
101        var aRand = new NormalDistributedRandom(rand, 100, 1);
102        var bRand = new NormalDistributedRandom(rand, -100, 1);
103
104        int n = 10000; // large sample so that we can use the thresholds below
105        var a = Enumerable.Range(0, n).Select(_ => aRand.NextDouble()).ToArray();
106        var b = Enumerable.Range(0, n).Select(_ => bRand.NextDouble()).ToArray();
107        var x = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
108        var y = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
109
110        var z = a.Zip(b, (ai, bi) => ai * bi).ToArray();
111
112        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05); // should be detected as relevant
113        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) > 0.05); // a > 0
114        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) > 0.05);
115        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) > 0.05); // b < 0
116        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) > 0.05);
117        Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05); // random variables are not correlated
118      }
119    }
120
121    [TestMethod]
122    [TestCategory("Algorithms.DataAnalysis")]
123    [TestProperty("Time", "short")]
124    public void TestGeneral2dInteractions() {
125      {
126        // we should be able to reliably detect when a product of two variables is correlated with the target variable       
127
128        // the test samples x from a two dimensional normal distribution
129        // the covariance matrix for the normal distribution is randomly sampled
130        // this means x_1 and x_2 might be highly correlated
131        // the mean of the normal distribution is randomly sampled (most critical are probably zero-mean distributions)
132        // y is calculated as x_1*x_2
133
134        var conditionNumber = 10000;
135        for (int iter = 0; iter < 100; iter++) {
136          double m0 = 0.0; double m1 = 0.0;
137          alglib.hqrndstate randState;
138          alglib.hqrndseed(1234, 31415, out randState);
139
140          // uncomment if non-zero mean distributions should be tested
141          //alglib.hqrndnormal2(randState, out m0, out m1);
142
143          double[,] cov_ab = new double[2, 2];
144          double[,] cov_xy = new double[2, 2];
145          alglib.matgen.spdmatrixrndcond(2, conditionNumber, ref cov_ab);
146          alglib.spdmatrixcholesky(ref cov_ab, 2, true);
147
148          alglib.matgen.spdmatrixrndcond(2, conditionNumber, ref cov_xy);
149          alglib.spdmatrixcholesky(ref cov_xy, 2, true);
150
151          // generate a, b by sampling from a 2dim multivariate normal distribution
152          // generate x, y by sampling from another 2dim multivariate normal distribution
153          // a,b and x,y might be correlated but x,y are not correlated to a,b
154          int N = 1000; // large sample size to make sure the test thresholds hold
155          double[] a = new double[N];
156          double[] b = new double[N];
157          double[] x = new double[N];
158          double[] y = new double[N];
159          double[] z = new double[N];
160
161          for (int i = 0; i < N; i++) {
162            double r1, r2, r3, r4;
163            alglib.hqrndnormal2(randState, out r1, out r2);
164            alglib.hqrndnormal2(randState, out r3, out r4);
165
166            var r_ab = new double[] { r1, r2 };
167            var r_xy = new double[] { r3, r4 };
168            double[] s_ab = new double[2];
169            double[] s_xy = new double[2];
170            alglib.ablas.rmatrixmv(2, 2, cov_ab, 0, 0, 0, r_ab, 0, ref s_ab, 0);
171            alglib.ablas.rmatrixmv(2, 2, cov_xy, 0, 0, 0, r_xy, 0, ref s_xy, 0);
172
173            a[i] = s_ab[0] + m0;
174            b[i] = s_ab[1] + m1;
175            x[i] = s_xy[0] + m0; // use same mean (just for the sake of it)
176            y[i] = s_xy[1] + m1;
177
178            z[i] = a[i] * b[i];
179          }
180
181          Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05);
182          Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05);
183
184          /* we might see correlations when only using one of the two relevant factors.
185           * this depends on the distribution / location of a and b
186          // for zero-mean distributions the following should all be quasi-zero
187          Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) < 0.05);
188          Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) < 0.05);
189          Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) < 0.05);
190          Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) < 0.05);
191          */
192          Console.WriteLine("a,b: {0:N3}\tx,y: {1:N3}\ta,x: {2:N3}\tb,x: {3:N3}\ta,y: {4:N3}\tb,y: {5:N3}\tcov(a,b): {6:N3}",
193            Heuristics.CorrelationForInteraction(a, b, z),
194            Heuristics.CorrelationForInteraction(x, y, z),
195            Heuristics.CorrelationForInteraction(a, x, z),
196            Heuristics.CorrelationForInteraction(b, x, z),
197            Heuristics.CorrelationForInteraction(a, y, z),
198            Heuristics.CorrelationForInteraction(b, y, z),
199            alglib.cov2(a, b)
200            );
201        }
202      }
203    }
204    [TestMethod]
205    [TestCategory("Algorithms.DataAnalysis")]
206    [TestProperty("Time", "short")]
207    public void TestGeneral3dInteractions() {
208      {
209        // same as TestGeneral2dInteractions but for terms with three variables
210
211        var conditionNumber = 100;
212        for (int iter = 0; iter < 100; iter++) {
213          double m0 = 0.0; double m1 = 0.0; double m2 = 0.0;
214          alglib.hqrndstate randState;
215          alglib.hqrndseed(1234, 31415, out randState);
216
217          // uncomment if non-zero mean distributions should be tested
218          //alglib.hqrndnormal2(randState, out m0, out m1);
219          //alglib.hqrndnormal2(randState, out m1, out m2);
220
221          double[,] cov_abc = new double[3, 3];
222          double[,] cov_xyz = new double[3, 3];
223          alglib.matgen.spdmatrixrndcond(3, conditionNumber, ref cov_abc);
224          alglib.spdmatrixcholesky(ref cov_abc, 3, true);
225
226          alglib.matgen.spdmatrixrndcond(3, conditionNumber, ref cov_xyz);
227          alglib.spdmatrixcholesky(ref cov_xyz, 3, true);
228
229          int N = 1000; // large sample size to make sure the test thresholds hold
230          double[] a = new double[N];
231          double[] b = new double[N];
232          double[] c = new double[N];
233          double[] x = new double[N];
234          double[] y = new double[N];
235          double[] z = new double[N];
236          double[] t = new double[N];
237
238          for (int i = 0; i < N; i++) {
239            double r1, r2, r3, r4, r5, r6;
240            alglib.hqrndnormal2(randState, out r1, out r2);
241            alglib.hqrndnormal2(randState, out r3, out r4);
242            alglib.hqrndnormal2(randState, out r5, out r6);
243
244            var r_abc = new double[] { r1, r2, r3 };
245            var r_xyz = new double[] { r4, r5, r6 };
246            double[] s_abc = new double[3];
247            double[] s_xyz = new double[3];
248            alglib.ablas.rmatrixmv(3, 3, cov_abc, 0, 0, 0, r_abc, 0, ref s_abc, 0);
249            alglib.ablas.rmatrixmv(3, 3, cov_xyz, 0, 0, 0, r_xyz, 0, ref s_xyz, 0);
250
251            a[i] = s_abc[0] + m0;
252            b[i] = s_abc[1] + m1;
253            c[i] = s_abc[2] + m2;
254            x[i] = s_xyz[0] + m0; // use same mean (just for the sake of it)
255            y[i] = s_xyz[1] + m1;
256            z[i] = s_xyz[2] + m2;
257
258            t[i] = a[i] * b[i] * c[i];
259          }
260
261          Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, c, t) > 0.05);
262          Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z, t) < 0.05);
263
264          /* we might see correlations when only using one of the two relevant factors.
265           * this depends on the distribution / location of a and b
266          // for zero-mean distributions the following should all be quasi-zero
267          Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) < 0.05);
268          Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) < 0.05);
269          Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) < 0.05);
270          Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) < 0.05);
271          */
272          Console.WriteLine("a,b,c: {0:N3}\tx,y,z: {1:N3}\ta,b,x: {2:N3}\tb,c,x: {3:N3}",
273            Heuristics.CorrelationForInteraction(a, b, c, t),
274            Heuristics.CorrelationForInteraction(x, y, z, t),
275            Heuristics.CorrelationForInteraction(a, b, x, t),
276            Heuristics.CorrelationForInteraction(b, c, x, t)
277            );
278        }
279      }
280    }
281
282    [TestMethod]
283    [TestCategory("Algorithms.DataAnalysis")]
284    [TestProperty("Time", "short")]
285    public void TestPoly10Interactions() {
286      {
287        alglib.hqrndstate randState;
288        alglib.hqrndseed(1234, 31415, out randState);
289
290        int N = 25000; // large sample size to make sure the test thresholds hold
291        double[] a = new double[N];
292        double[] b = new double[N];
293        double[] c = new double[N];
294        double[] d = new double[N];
295        double[] e = new double[N];
296        double[] f = new double[N];
297        double[] g = new double[N];
298        double[] h = new double[N];
299        double[] i = new double[N];
300        double[] j = new double[N];
301        double[] y = new double[N];
302
303        for(int k=0;k<N;k++) {
304          a[k] = alglib.hqrnduniformr(randState) * 2 - 1;
305          b[k] = alglib.hqrnduniformr(randState) * 2 - 1;
306          c[k] = alglib.hqrnduniformr(randState) * 2 - 1;
307          d[k] = alglib.hqrnduniformr(randState) * 2 - 1;
308          e[k] = alglib.hqrnduniformr(randState) * 2 - 1;
309          f[k] = alglib.hqrnduniformr(randState) * 2 - 1;
310          g[k] = alglib.hqrnduniformr(randState) * 2 - 1;
311          h[k] = alglib.hqrnduniformr(randState) * 2 - 1;
312          i[k] = alglib.hqrnduniformr(randState) * 2 - 1;
313          j[k] = alglib.hqrnduniformr(randState) * 2 - 1;
314          y[k] = a[k] * b[k] + c[k] * d[k] + e[k] * f[k] + a[k] * g[k] * i[k] + c[k] * f[k] * j[k];
315        }
316
317        var x = new[] { a, b, c, d, e, f, g, h, i, j };
318        var all2Combinations = HeuristicLab.Common.EnumerableExtensions.Combinations(new[] {1,2,3,4,5,6,7,8,9,10}, 2);
319
320        var resultList = new List<Tuple<string, double>>();
321        foreach(var entry in all2Combinations) {
322          var aIdx = entry.First();
323          var bIdx = entry.Skip(1).First();
324          resultList.Add(Tuple.Create(aIdx + " " + bIdx, Heuristics.CorrelationForInteraction(x[aIdx - 1], x[bIdx - 1], y)));
325        }
326
327        foreach(var entry in resultList.OrderByDescending(t => t.Item2)) {
328          Console.WriteLine("{0} {1:N3}", entry.Item1, entry.Item2);
329        }
330
331        var all3Combinations = HeuristicLab.Common.EnumerableExtensions.Combinations(new[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, 3);
332
333        resultList = new List<Tuple<string, double>>();
334        foreach (var entry in all3Combinations) {
335          var aIdx = entry.First();
336          var bIdx = entry.Skip(1).First();
337          var cIdx = entry.Skip(2).First();
338          resultList.Add(Tuple.Create(aIdx + " " + bIdx + " " + cIdx, Heuristics.CorrelationForInteraction(x[aIdx - 1], x[bIdx - 1], x[cIdx - 1], y)));
339        }
340
341        //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
342
343        foreach (var entry in resultList.OrderByDescending(t => t.Item2)) {
344          Console.WriteLine("{0} {1:N3}", entry.Item1, entry.Item2);
345        }
346
347
348        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, y) > 0.01);
349        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, a, y) > 0.01);
350        Assert.IsTrue(Heuristics.CorrelationForInteraction(c, d, y) > 0.01);
351        Assert.IsTrue(Heuristics.CorrelationForInteraction(d, c, y) > 0.01);
352        Assert.IsTrue(Heuristics.CorrelationForInteraction(e, f, y) > 0.01);
353        Assert.IsTrue(Heuristics.CorrelationForInteraction(f, e, y) > 0.01);
354        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, g, i, y) > 0.01);
355        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, i, g, y) > 0.01);
356        Assert.IsTrue(Heuristics.CorrelationForInteraction(g, a, i, y) > 0.01);
357        Assert.IsTrue(Heuristics.CorrelationForInteraction(g, i, a, y) > 0.01);
358        Assert.IsTrue(Heuristics.CorrelationForInteraction(i, g, a, y) > 0.01);
359        Assert.IsTrue(Heuristics.CorrelationForInteraction(i, a, g, y) > 0.01);
360
361        Assert.IsTrue(Heuristics.CorrelationForInteraction(c, f, j, y) > 0.01);
362        Assert.IsTrue(Heuristics.CorrelationForInteraction(c, j, f, y) > 0.01);
363        Assert.IsTrue(Heuristics.CorrelationForInteraction(f, c, j, y) > 0.01);
364        Assert.IsTrue(Heuristics.CorrelationForInteraction(f, j, c, y) > 0.01);
365        Assert.IsTrue(Heuristics.CorrelationForInteraction(j, c, f, y) > 0.01);
366        Assert.IsTrue(Heuristics.CorrelationForInteraction(j, f, c, y) > 0.01);
367      }
368    }
369    #endregion
370
371
372    #region expression hashing
373    [TestMethod]
374    [TestCategory("Algorithms.DataAnalysis")]
375    [TestProperty("Time", "short")]
376    public void ExprHashTest() {
377      int nParams;
378      byte[] code;
379
380      {
381        // addition of variables
382        var codeGen = new CodeGenerator();
383        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
384        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
385        codeGen.Emit1(OpCodes.Add);
386        codeGen.Emit1(OpCodes.Exit);
387        codeGen.GetCode(out code, out nParams);
388        var h1 = ExprHash.GetHash(code, nParams);
389
390        codeGen = new CodeGenerator();
391        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
392        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
393        codeGen.Emit1(OpCodes.Add);
394        codeGen.Emit1(OpCodes.Exit);
395        codeGen.GetCode(out code, out nParams);
396        var h2 = ExprHash.GetHash(code, nParams);
397
398        Assert.AreEqual(h1, h2);
399      }
400
401      {
402        // multiplication of variables
403        var codeGen = new CodeGenerator();
404        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
405        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
406        codeGen.Emit1(OpCodes.Mul);
407        codeGen.Emit1(OpCodes.Exit);
408        codeGen.GetCode(out code, out nParams);
409        var h1 = ExprHash.GetHash(code, nParams);
410
411        codeGen = new CodeGenerator();
412        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
413        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
414        codeGen.Emit1(OpCodes.Mul);
415        codeGen.Emit1(OpCodes.Exit);
416        codeGen.GetCode(out code, out nParams);
417        var h2 = ExprHash.GetHash(code, nParams);
418
419        Assert.AreEqual(h1, h2);
420      }
421
422      {
423        // distributivity
424        var codeGen = new CodeGenerator();
425        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
426        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
427        codeGen.Emit1(OpCodes.Add);
428        codeGen.Emit2(OpCodes.LoadVar, 3);
429        codeGen.Emit1(OpCodes.Mul);
430        codeGen.Emit1(OpCodes.Exit);
431        codeGen.GetCode(out code, out nParams);
432        var h1 = ExprHash.GetHash(code, nParams);
433
434        codeGen = new CodeGenerator();
435        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
436        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 3);
437        codeGen.Emit1(OpCodes.Mul);
438        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
439        codeGen.Emit2(OpCodes.LoadVar, 3);
440        codeGen.Emit1(OpCodes.Mul);
441        codeGen.Emit1(OpCodes.Add);
442        codeGen.Emit1(OpCodes.Exit);
443        codeGen.GetCode(out code, out nParams);
444        var h2 = ExprHash.GetHash(code, nParams);
445
446        Assert.AreEqual(h1, h2);
447      }
448
449
450      {
451        // div
452        var codeGen = new CodeGenerator();
453        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
454        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
455        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
456        codeGen.Emit1(OpCodes.Inv);
457        codeGen.Emit1(OpCodes.Exit);
458        codeGen.GetCode(out code, out nParams);
459        var h1 = ExprHash.GetHash(code, nParams);
460
461        codeGen = new CodeGenerator();
462        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
463        codeGen.Emit1(OpCodes.Inv);
464        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
465        codeGen.Emit1(OpCodes.Inv);
466        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
467        codeGen.Emit1(OpCodes.Exit);
468        codeGen.GetCode(out code, out nParams);
469        var h2 = ExprHash.GetHash(code, nParams);
470
471        Assert.AreEqual(h1, h2);
472      }
473      {
474        // exp
475        var codeGen = new CodeGenerator();
476        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
477        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
478        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add);
479        codeGen.Emit1(OpCodes.Exp);
480        codeGen.Emit1(OpCodes.Exit);
481        codeGen.GetCode(out code, out nParams);
482        var h1 = ExprHash.GetHash(code, nParams);
483
484        codeGen = new CodeGenerator();
485        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
486        codeGen.Emit1(OpCodes.Exp);
487        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
488        codeGen.Emit1(OpCodes.Exp);
489        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
490        codeGen.GetCode(out code, out nParams);
491        codeGen.Emit1(OpCodes.Exit);
492        var h2 = ExprHash.GetHash(code, nParams);
493
494        Assert.AreEqual(h1, h2);
495      }
496      {
497        // log
498        var codeGen = new CodeGenerator();
499        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
500        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
501        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
502        codeGen.Emit1(OpCodes.Log);
503        codeGen.Emit1(OpCodes.Exit);
504        codeGen.GetCode(out code, out nParams);
505        var h1 = ExprHash.GetHash(code, nParams);
506
507        codeGen = new CodeGenerator();
508        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
509        codeGen.Emit1(OpCodes.Log);
510        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
511        codeGen.Emit1(OpCodes.Log);
512        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add);
513        codeGen.Emit1(OpCodes.Exit);
514        codeGen.GetCode(out code, out nParams);
515        var h2 = ExprHash.GetHash(code, nParams);
516
517        Assert.AreEqual(h1, h2);
518      }
519
520      {
521        // x1 + x1 is equivalent to x1
522        var codeGen = new CodeGenerator();
523        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
524        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
525        codeGen.Emit1(OpCodes.Add);
526        codeGen.Emit1(OpCodes.Exit);
527        codeGen.GetCode(out code, out nParams);
528        var h1 = ExprHash.GetHash(code, nParams);
529
530        codeGen = new CodeGenerator();
531        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
532        codeGen.Emit1(OpCodes.Exit);
533        codeGen.GetCode(out code, out nParams);
534        var h2 = ExprHash.GetHash(code, nParams);
535
536        Assert.AreEqual(h1, h2);
537      }
538      {
539        // c1*x1 + c2*x1 is equivalent to c3*x1
540        var codeGen = new CodeGenerator();
541        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
542        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
543        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
544
545        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
546        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
547        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
548
549        codeGen.Emit1(OpCodes.Add);
550        codeGen.Emit1(OpCodes.Exit);
551        codeGen.GetCode(out code, out nParams);
552        var h1 = ExprHash.GetHash(code, nParams);
553
554        codeGen = new CodeGenerator();
555        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
556        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
557        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
558        codeGen.Emit1(OpCodes.Exit);
559        codeGen.GetCode(out code, out nParams);
560        var h2 = ExprHash.GetHash(code, nParams);
561
562        Assert.AreEqual(h1, h2);
563      }
564
565      {
566        var codeGen = new CodeGenerator();
567        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0);
568        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1);
569        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
570        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
571        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
572
573        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
574        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
575        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
576
577        codeGen.Emit1(OpCodes.Add);
578
579        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
580        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add);
581
582        codeGen.Emit1(OpCodes.Exit);
583        codeGen.GetCode(out code, out nParams);
584        var h1 = ExprHash.GetHash(code, nParams);
585
586        codeGen = new CodeGenerator();
587        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0);
588        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1);
589        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
590        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
591        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
592        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
593        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add);
594        codeGen.Emit1(OpCodes.Exit);
595        codeGen.GetCode(out code, out nParams);
596        var h2 = ExprHash.GetHash(code, nParams);
597
598        Assert.AreEqual(h1, h2);
599      }
600    }
601    #endregion
602
603    #region number of solutions
604    // the algorithm should visits each solution only once
605    [TestMethod]
606    [TestCategory("Algorithms.DataAnalysis")]
607    [TestProperty("Time", "short")]
608    public void MctsSymbRegNumberOfSolutionsOneVariable() {
609      // this problem has only one variable
610      var provider = new NguyenInstanceProvider();
611      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F1 ")));
612      {
613        // possible solutions with max one variable reference:
614        // x
615        // log(x)
616        // exp(x)
617        // 1/x
618        TestMctsNumberOfSolutions(regProblem, 1, 4);
619      }
620      {
621        // possible solutions with max 4 variable references:
622        // without exp, log and inv
623        // x       
624        // x*x
625        // x*x*x
626        // x+x*x
627        // x+x*x*x
628        // x*x*x*x
629
630        TestMctsNumberOfSolutions(regProblem, 4, 6, allowLog: false, allowInv: false, allowExp: false);
631      }
632      {
633        // possible solutions with max 5 variable references:
634        // without exp, log and inv
635        // x       
636        // xx
637        // xxx
638        // x+xx
639        // xxxx
640        // x+xxx
641        // xxxxx
642        // x+xxxx
643        // xx+xxx
644        TestMctsNumberOfSolutions(regProblem, 5, 9, allowLog: false, allowInv: false, allowExp: false);
645      }
646      {
647        // possible solutions with max two variable references:
648        // x
649        // log(x+c)
650        // exp(x)
651        // 1/(x+c)
652        //              -- 4
653        // x * x
654        // x * log(x+c)
655        // x * exp(x)
656        // x * 1/(x + c)
657        // x + log(x+c)
658        // x + exp(x)
659        // x + 1/(x+c)
660        //              -- 7
661        // log(x + c) * log(x + c)
662        // log(x + c) * exp(x)
663        // log(x + c) * 1/(x + c)
664        // log(x + c) + log(x + c)                        // TODO
665        // log(x + c) + exp(x)
666        // log(x + c) + 1/(x+c)
667        //              -- 6
668        // exp(x) * 1/(x+c)
669        // exp(cx) + exp(cx)                              // TODO
670        // exp(cx) + 1/(x+c)
671        //              -- 3
672        // 1/(x+c) * 1/(x+c)                              // TODO
673        // 1/(x+c) + 1/(x+c)                              // TODO
674        //              -- 2
675        // log(x*x)
676        // exp(x*x)
677        // inv(x*x+c)
678        //              -- 3
679
680
681        TestMctsNumberOfSolutions(regProblem, 2, 27);
682      }
683      {
684        // possible solutions with max three variable references:
685        // without log and inv
686        // x
687        // exp(x)
688        //              -- 2
689        // x * x
690        // x + x                                            ?
691        // x * exp(x)
692        // x + exp(x)
693        // exp(x) * exp(x)
694        // exp(x) + exp(x)                                  ?
695        // exp(x*x)
696        //              -- 7
697        // x * x * x
698        // x + x * x                                       
699        // x + x + x                                        ?
700        // x * x * exp(x)
701        // x + x * exp(x)                                   
702        // x + x + exp(x)                                   ?
703        // exp(x) + x*x
704        // exp(x) + x*exp(x)                               
705        // x + exp(x) * exp(x)                             
706        // x + exp(x) + exp(x)                              ?
707        // x * exp(x) * exp(x)
708        // x * exp(x*x)
709        // x + exp(x*x)
710        //              -- 13
711
712        // exp(x) * exp(x) * exp(x)
713        // exp(x) + exp(x) * exp(x)                         
714        // exp(x) + exp(x) + exp(x)                         ?
715        //              -- 3
716
717        // exp(x)   * exp(x*x)
718        // exp(x)   + exp(x*x)
719        //              -- 2
720        // exp(x*x*x)
721        //              -- 1
722        TestMctsNumberOfSolutions(regProblem, 3, 2 + 7 + 13 + 3 + 2 + 1, allowLog: false, allowInv: false);
723      }     
724    }
725
726    [TestMethod]
727    [TestCategory("Algorithms.DataAnalysis")]
728    [TestProperty("Time", "short")]
729    public void MctsSymbRegNumberOfSolutionsTwoVariables() {
730      // this problem has only two input variables
731      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
732      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F9 ")));
733      {
734        // possible solutions with max one variable reference:
735        // x
736        // log(x)
737        // exp(x)
738        // 1/x
739        // y
740        // log(y)
741        // exp(y)
742        // 1/y
743        TestMctsNumberOfSolutions(regProblem, 1, 8);
744      }
745      {
746        // possible solutions with max one variable reference:
747        // without log and inv
748
749        // x
750        // exp(x)
751        // y
752        // exp(y)
753        TestMctsNumberOfSolutions(regProblem, 1, 4, allowLog: false, allowInv: false);
754      }
755      {
756        // possible solutions with max two variable references:
757        // without log and inv
758
759        // x
760        // y
761        // exp(x)
762        // exp(y)
763        //                  -- 4
764        // x (*|+) x
765        // x (*|+) exp(x)
766        // x (*|+) y
767        // x (*|+) exp(y)
768        //                  -- 8
769        // exp(x) (*|+) exp(x)
770        // exp(x) (*|+) exp(y)
771        //                  -- 4
772        // y (*|+) y
773        // y (*|+) exp(x)
774        // y (*|+) exp(y)
775        //                  -- 6
776        // exp(y) (*|+) exp(y)
777        //                  -- 2
778        //
779        // exp(x*x)
780        // exp(x*y)
781        // exp(y*y)
782        //                  -- 3
783
784        TestMctsNumberOfSolutions(regProblem, 2, 4 + 8 + 4 + 6 + 2 + 3, allowLog: false, allowInv: false);
785      }
786
787      {
788        // possible solutions with max two variable references:
789        // without exp and sum
790        // x
791        // y
792        // log(x)
793        // log(y)
794        // inv(x)
795        // inv(y)
796        //              -- 6
797        // x * x
798        // x * y
799        // x * log(x)
800        // x * log(y)
801        // x * inv(x)
802        // x * inv(y)
803        //              -- 6
804        // log(x) * log(x)
805        // log(x) * log(y)
806        // log(x) * inv(x)
807        // log(x) * inv(y)
808        //              -- 4
809        // inv(x) * inv(x)
810        // inv(x) * inv(y)
811        //              -- 2
812        // y * y
813        // y * log(x)
814        // y * log(y)
815        // y * inv(x)
816        // y * inv(y)
817        //              -- 5
818        // log(y) * log(y)
819        // log(y) * inv(x)
820        // log(y) * inv(y)
821        //              -- 3
822        // inv(y) * inv(y)
823        //              -- 1
824        // log(x*x)
825        // log(x*y)
826        // log(y*y)
827
828        // inv(x*x)
829        // inv(x*y)
830        // inv(y*y)
831        //             -- 6
832        // log(x+x)
833        // log(x+y)
834        // log(y+y)
835
836        // inv(x+x)
837        // inv(x+y)
838        // inv(y+y)
839        //             -- 6
840        TestMctsNumberOfSolutions(regProblem, 2, 6 + 6 + 4 + 2 + 5 + 3 + 1 + 6 + 6, allowExp: false, allowSum: false);
841      }
842    }
843    #endregion
844
845
846    #region test structure search (no constants)
847    [TestMethod]
848    [TestCategory("Algorithms.DataAnalysis")]
849    [TestProperty("Time", "short")]
850    public void MctsSymbReg_NoConstants_Nguyen1() {
851      // x³ + x² + x
852      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
853      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F1 ")));
854      TestMctsWithoutConstants(regProblem, nVarRefs: 10, allowExp: false, allowLog: false, allowInv: false);
855    }
856    [TestMethod]
857    [TestCategory("Algorithms.DataAnalysis")]
858    [TestProperty("Time", "short")]
859    public void MctsSymbReg_NoConstants_Nguyen2() {
860      // x^4 + x³ + x² + x
861      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
862      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F2 ")));
863      TestMctsWithoutConstants(regProblem, allowExp: false, allowLog: false, allowInv: false);
864    }
865    [TestMethod]
866    [TestCategory("Algorithms.DataAnalysis")]
867    [TestProperty("Time", "short")]
868    public void MctsSymbReg_NoConstants_Nguyen3() {
869      // x^5 + x^4 + x³ + x² + x
870      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
871      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F3 ")));
872      TestMctsWithoutConstants(regProblem, nVarRefs: 15, iterations: 1000000, allowExp: false, allowLog: false, allowInv: false);
873    }
874    [TestMethod]
875    [TestCategory("Algorithms.DataAnalysis")]
876    [TestProperty("Time", "short")]
877    public void MctsSymbReg_NoConstants_Nguyen4() {
878      // x^6 + x^5 + x^4 + x³ + x² + x
879      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
880      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F4 ")));
881      TestMctsWithoutConstants(regProblem, nVarRefs: 25, iterations: 1000000, allowExp: false, allowLog: false, allowInv: false);
882    }
883
884    [TestMethod]
885    [TestCategory("Algorithms.DataAnalysis")]
886    [TestProperty("Time", "short")]
887    public void MctsSymbReg_NoConstants_Nguyen7() {
888      // log(x + 1) + log(x² + 1)
889      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
890      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F7 ")));
891      TestMctsWithoutConstants(regProblem, nVarRefs: 10, iterations: 100000, allowExp: false, allowLog: true, allowInv: false);
892    }
893
894    [TestMethod]
895    [TestCategory("Algorithms.DataAnalysis")]
896    [TestProperty("Time", "short")]
897    public void MctsSymbReg_NoConstants_Poly10_Part1() {
898      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234);
899      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
900
901      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
902      //  Y' = X1*X2 + X3*X4 + X5*X6
903      // simplify problem by changing target
904      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
905      var ys = ds.GetDoubleValues("Y").ToArray();
906      var x1 = ds.GetDoubleValues("X1").ToArray();
907      var x2 = ds.GetDoubleValues("X2").ToArray();
908      var x3 = ds.GetDoubleValues("X3").ToArray();
909      var x4 = ds.GetDoubleValues("X4").ToArray();
910      var x5 = ds.GetDoubleValues("X5").ToArray();
911      var x6 = ds.GetDoubleValues("X6").ToArray();
912      var x7 = ds.GetDoubleValues("X7").ToArray();
913      var x8 = ds.GetDoubleValues("X8").ToArray();
914      var x9 = ds.GetDoubleValues("X9").ToArray();
915      var x10 = ds.GetDoubleValues("X10").ToArray();
916      for (int i = 0; i < ys.Length; i++) {
917        ys[i] -= x1[i] * x7[i] * x9[i];
918        ys[i] -= x3[i] * x6[i] * x10[i];
919      }
920      ds.ReplaceVariable("Y", ys.ToList());
921
922      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
923
924
925      TestMctsWithoutConstants(modifiedProblemData, nVarRefs: 15, iterations: 100000, allowExp: false, allowLog: false, allowInv: false);
926    }
927
928    [TestMethod]
929    [TestCategory("Algorithms.DataAnalysis")]
930    [TestProperty("Time", "short")]
931    public void MctsSymbReg_NoConstants_Poly10_Part2() {
932      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234);
933      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
934
935      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
936      //  Y' = X1*X7*X9 + X3*X6*X10
937      // simplify problem by changing target
938      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
939      var ys = ds.GetDoubleValues("Y").ToArray();
940      var x1 = ds.GetDoubleValues("X1").ToArray();
941      var x2 = ds.GetDoubleValues("X2").ToArray();
942      var x3 = ds.GetDoubleValues("X3").ToArray();
943      var x4 = ds.GetDoubleValues("X4").ToArray();
944      var x5 = ds.GetDoubleValues("X5").ToArray();
945      var x6 = ds.GetDoubleValues("X6").ToArray();
946      var x7 = ds.GetDoubleValues("X7").ToArray();
947      var x8 = ds.GetDoubleValues("X8").ToArray();
948      var x9 = ds.GetDoubleValues("X9").ToArray();
949      var x10 = ds.GetDoubleValues("X10").ToArray();
950      for (int i = 0; i < ys.Length; i++) {
951        ys[i] -= x1[i] * x2[i];
952        ys[i] -= x3[i] * x4[i];
953        ys[i] -= x5[i] * x6[i];
954      }
955      ds.ReplaceVariable("Y", ys.ToList());
956
957      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
958
959
960      TestMctsWithoutConstants(modifiedProblemData, nVarRefs: 15, iterations: 100000, allowExp: false, allowLog: false, allowInv: false);
961    }
962
963    [TestMethod]
964    [TestCategory("Algorithms.DataAnalysis")]
965    [TestProperty("Time", "short")]
966    public void MctsSymbReg_NoConstants_Poly10_Part3() {
967      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234);
968      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
969
970      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
971      //  Y' = X1*X2 + X1*X7*X9
972      // simplify problem by changing target
973      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
974      var ys = ds.GetDoubleValues("Y").ToArray();
975      var x1 = ds.GetDoubleValues("X1").ToArray();
976      var x2 = ds.GetDoubleValues("X2").ToArray();
977      var x3 = ds.GetDoubleValues("X3").ToArray();
978      var x4 = ds.GetDoubleValues("X4").ToArray();
979      var x5 = ds.GetDoubleValues("X5").ToArray();
980      var x6 = ds.GetDoubleValues("X6").ToArray();
981      var x7 = ds.GetDoubleValues("X7").ToArray();
982      var x8 = ds.GetDoubleValues("X8").ToArray();
983      var x9 = ds.GetDoubleValues("X9").ToArray();
984      var x10 = ds.GetDoubleValues("X10").ToArray();
985      for (int i = 0; i < ys.Length; i++) {
986        ys[i] -= x3[i] * x4[i];
987        ys[i] -= x5[i] * x6[i];
988        ys[i] -= x3[i] * x6[i] * x10[i];
989      }
990      ds.ReplaceVariable("Y", ys.ToList());
991
992      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
993
994
995      TestMctsWithoutConstants(modifiedProblemData, nVarRefs: 15, iterations: 100000, allowExp: false, allowLog: false, allowInv: false);
996    }
997
998    [TestMethod]
999    [TestCategory("Algorithms.DataAnalysis")]
1000    [TestProperty("Time", "short")]
1001    public void MctsSymbReg_NoConstants_Poly10_Part4() {
1002      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234);
1003      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
1004
1005      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
1006      //  Y' = X3*X4 + X5*X6 + X3*X6*X10
1007      // simplify problem by changing target
1008      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
1009      var ys = ds.GetDoubleValues("Y").ToArray();
1010      var x1 = ds.GetDoubleValues("X1").ToArray();
1011      var x2 = ds.GetDoubleValues("X2").ToArray();
1012      var x3 = ds.GetDoubleValues("X3").ToArray();
1013      var x4 = ds.GetDoubleValues("X4").ToArray();
1014      var x5 = ds.GetDoubleValues("X5").ToArray();
1015      var x6 = ds.GetDoubleValues("X6").ToArray();
1016      var x7 = ds.GetDoubleValues("X7").ToArray();
1017      var x8 = ds.GetDoubleValues("X8").ToArray();
1018      var x9 = ds.GetDoubleValues("X9").ToArray();
1019      var x10 = ds.GetDoubleValues("X10").ToArray();
1020      for (int i = 0; i < ys.Length; i++) {
1021        ys[i] -= x1[i] * x2[i];
1022        ys[i] -= x1[i] * x7[i] * x9[i];
1023      }
1024      ds.ReplaceVariable("Y", ys.ToList());
1025      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
1026
1027
1028      TestMctsWithoutConstants(modifiedProblemData, nVarRefs: 15, iterations: 100000, allowExp: false, allowLog: false, allowInv: false);
1029    }
1030
1031    [TestMethod]
1032    [TestCategory("Algorithms.DataAnalysis")]
1033    [TestProperty("Time", "short")]
1034    public void MctsSymbReg_NoConstants_Poly10_Part5() {
1035      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234);
1036      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
1037
1038      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
1039      //  Y' = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9
1040      // simplify problem by changing target
1041      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
1042      var ys = ds.GetDoubleValues("Y").ToArray();
1043      var x1 = ds.GetDoubleValues("X1").ToArray();
1044      var x2 = ds.GetDoubleValues("X2").ToArray();
1045      var x3 = ds.GetDoubleValues("X3").ToArray();
1046      var x4 = ds.GetDoubleValues("X4").ToArray();
1047      var x5 = ds.GetDoubleValues("X5").ToArray();
1048      var x6 = ds.GetDoubleValues("X6").ToArray();
1049      var x7 = ds.GetDoubleValues("X7").ToArray();
1050      var x8 = ds.GetDoubleValues("X8").ToArray();
1051      var x9 = ds.GetDoubleValues("X9").ToArray();
1052      var x10 = ds.GetDoubleValues("X10").ToArray();
1053      for (int i = 0; i < ys.Length; i++) {
1054        ys[i] -= x3[i] * x6[i] * x10[i];
1055      }
1056      ds.ReplaceVariable("Y", ys.ToList());
1057      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
1058
1059
1060      TestMctsWithoutConstants(modifiedProblemData, nVarRefs: 15, iterations: 100000, allowExp: false, allowLog: false, allowInv: false);
1061    }
1062
1063    [TestMethod]
1064    [TestCategory("Algorithms.DataAnalysis")]
1065    [TestProperty("Time", "short")]
1066    public void MctsSymbReg_NoConstants_Poly10_Part6() {
1067      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234);
1068      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
1069
1070      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
1071      //  Y' = X1*X2 + X3*X4 + X5*X6 + X3*X6*X10
1072      // simplify problem by changing target
1073      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
1074      var ys = ds.GetDoubleValues("Y").ToArray();
1075      var x1 = ds.GetDoubleValues("X1").ToArray();
1076      var x2 = ds.GetDoubleValues("X2").ToArray();
1077      var x3 = ds.GetDoubleValues("X3").ToArray();
1078      var x4 = ds.GetDoubleValues("X4").ToArray();
1079      var x5 = ds.GetDoubleValues("X5").ToArray();
1080      var x6 = ds.GetDoubleValues("X6").ToArray();
1081      var x7 = ds.GetDoubleValues("X7").ToArray();
1082      var x8 = ds.GetDoubleValues("X8").ToArray();
1083      var x9 = ds.GetDoubleValues("X9").ToArray();
1084      var x10 = ds.GetDoubleValues("X10").ToArray();
1085      for (int i = 0; i < ys.Length; i++) {
1086        ys[i] -= x1[i] * x7[i] * x9[i];
1087      }
1088      ds.ReplaceVariable("Y", ys.ToList());
1089      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
1090
1091
1092      TestMctsWithoutConstants(modifiedProblemData, nVarRefs: 9, iterations: 100000, allowExp: false, allowLog: false, allowInv: false);
1093    }
1094
1095
1096    [TestMethod]
1097    [TestCategory("Algorithms.DataAnalysis")]
1098    [TestProperty("Time", "long")]
1099    public void MctsSymbReg_NoConstants_Poly10_250rows() {
1100      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234);
1101      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
1102      regProblem.TrainingPartition.Start = 0;
1103      regProblem.TrainingPartition.End = regProblem.Dataset.Rows;
1104      regProblem.TestPartition.Start = 0;
1105      regProblem.TestPartition.End = 2;
1106      TestMctsWithoutConstants(regProblem, nVarRefs: 15, iterations: 200000, allowExp: false, allowLog: false, allowInv: false);
1107    }
1108    [TestMethod]
1109    [TestCategory("Algorithms.DataAnalysis")]
1110    [TestProperty("Time", "long")]
1111    public void MctsSymbReg_NoConstants_Poly10_10000rows() {
1112      // as poly-10 but more rows
1113      var rand = new FastRandom(1234);
1114      var x1 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1115      var x2 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1116      var x3 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1117      var x4 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1118      var x5 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1119      var x6 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1120      var x7 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1121      var x8 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1122      var x9 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1123      var x10 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1124      var ys = new List<double>();
1125      for (int i = 0; i < x1.Count; i++) {
1126        ys.Add(x1[i] * x2[i] + x3[i] * x4[i] + x5[i] * x6[i] + x1[i] * x7[i] * x9[i] + x3[i] * x6[i] * x10[i]);
1127      }
1128
1129      var ds = new Dataset(new string[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "y" },
1130        new[] { x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, ys });
1131
1132
1133      var problemData = new RegressionProblemData(ds, new string[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j" }, "y");
1134
1135      problemData.TrainingPartition.Start = 0;
1136      problemData.TrainingPartition.End = problemData.Dataset.Rows;
1137      problemData.TestPartition.Start = 0;
1138      problemData.TestPartition.End = 2; // must not be empty
1139
1140
1141      TestMctsWithoutConstants(problemData, nVarRefs: 15, iterations: 100000, allowExp: false, allowLog: false, allowInv: false);
1142    }
1143
1144    [TestMethod]
1145    [TestCategory("Algorithms.DataAnalysis")]
1146    [TestProperty("Time", "short")]
1147    public void MctsSymbReg_NoConstants_TwoVars() {
1148
1149      // y = x1 + x2 + x1*x2 + x1*x2*x2 + x1*x1*x2
1150      var rand = new FastRandom(1234);
1151      var x1 = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
1152      var x2 = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
1153      var ys = x1.Zip(x2, (x1i, x2i) => x1i + x2i + x1i * x2i + x1i * x2i * x2i + x1i * x1i * x2i).ToList();
1154
1155      var ds = new Dataset(new string[] { "a", "b", "y" }, new[] { x1, x2, ys });
1156
1157      var problemData = new RegressionProblemData(ds, new string[] { "a", "b" }, "y");
1158
1159
1160      TestMctsWithoutConstants(problemData, nVarRefs: 10, iterations: 10000, allowExp: false, allowLog: false, allowInv: false);
1161    }
1162
1163    [TestMethod]
1164    [TestCategory("Algorithms.DataAnalysis")]
1165    [TestProperty("Time", "short")]
1166    public void MctsSymbReg_NoConstants_Misleading() {
1167
1168      // y = a + baaaaa (the effect of the second term should be very small)
1169      // the alg will quickly find that a has big effect and will search below a
1170      // since we prevent a + a... the algorithm must find the correct expression via a + b...
1171      // however b has a small effect so the branch might not be identified as relevant
1172
1173      var rand = new FastRandom(1234);
1174      var @as = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
1175      var bs = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
1176      var cs = Enumerable.Range(0, 100).Select(_ => rand.NextDouble() * 1.0e-3).ToList();
1177      var ds = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
1178      var es = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
1179      var ys = new double[@as.Count];
1180      for (int i = 0; i < ys.Length; i++)
1181        ys[i] = @as[i] + bs[i] + @as[i] * bs[i] * cs[i];
1182
1183      var dataset = new Dataset(new string[] { "a", "b", "c", "d", "e", "y" }, new[] { @as, bs, cs, ds, es, ys.ToList() });
1184
1185      var problemData = new RegressionProblemData(dataset, new string[] { "a", "b", "c", "d", "e" }, "y");
1186
1187
1188      TestMctsWithoutConstants(problemData, nVarRefs: 10, iterations: 10000, allowExp: false, allowLog: false, allowInv: false);
1189    }
1190    #endregion
1191
1192    #region restricted structure but including numeric constants
1193
1194    [TestMethod]
1195    [TestCategory("Algorithms.DataAnalysis")]
1196    [TestProperty("Time", "short")]
1197    public void MctsSymbRegKeijzer7() {
1198      // ln(x)
1199      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
1200      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 7 f(")));
1201      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
1202      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
1203      TestMcts(regProblem, allowExp: false, allowLog: true, allowInv: false);
1204    }
1205
1206    /*
1207    // [TestMethod]
1208    [TestCategory("Algorithms.DataAnalysis")]
1209    [TestProperty("Time", "short")]
1210    public void MctsSymbRegBenchmarkNguyen5() {
1211      // sin(x²)cos(x) - 1
1212      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
1213      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F5 ")));
1214      TestMcts(regProblem);
1215    }
1216    // [TestMethod]
1217    [TestCategory("Algorithms.DataAnalysis")]
1218    [TestProperty("Time", "short")]
1219    public void MctsSymbRegBenchmarkNguyen6() {
1220      // sin(x) + sin(x + x²)
1221      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
1222      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F6 ")));
1223      TestMcts(regProblem);
1224    }
1225    */
1226    [TestMethod]
1227    [TestCategory("Algorithms.DataAnalysis")]
1228    [TestProperty("Time", "short")]
1229    public void MctsSymbRegBenchmarkNguyen7() {
1230      //  log(x + 1) + log(x² + 1)
1231      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
1232      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F7 ")));
1233      TestMcts(regProblem, maxVariableReferences: 5, allowExp: false, allowLog: true, allowInv: false);
1234    }
1235    [TestMethod]
1236    [TestCategory("Algorithms.DataAnalysis")]
1237    [TestProperty("Time", "short")]
1238    public void MctsSymbRegBenchmarkNguyen8() {
1239      // Sqrt(x)
1240      // = x ^ 0.5
1241      // = exp(0.5 * log(x))
1242      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
1243      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F8 ")));
1244      TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false);
1245    }
1246    /*
1247    // [TestMethod]
1248    [TestCategory("Algorithms.DataAnalysis")]
1249    [TestProperty("Time", "short")]
1250    public void MctsSymbRegBenchmarkNguyen9() {
1251      //  sin(x) + sin(y²)
1252      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
1253      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F9 ")));
1254      TestMcts(regProblem);
1255    }
1256    // [TestMethod]
1257    [TestCategory("Algorithms.DataAnalysis")]
1258    [TestProperty("Time", "short")]
1259    public void MctsSymbRegBenchmarkNguyen10() {
1260      // 2sin(x)cos(y)
1261      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
1262      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F10 ")));
1263      TestMcts(regProblem);
1264    }
1265    */
1266    [TestMethod]
1267    [TestCategory("Algorithms.DataAnalysis")]
1268    [TestProperty("Time", "short")]
1269    public void MctsSymbRegBenchmarkNguyen11() {
1270      // x ^ y  , x > 0, y > 0   
1271      // = exp(y * log(x))
1272      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
1273      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F11 ")));
1274      TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false);
1275    }
1276    [TestMethod]
1277    [TestCategory("Algorithms.DataAnalysis")]
1278    [TestProperty("Time", "short")]
1279    public void MctsSymbRegBenchmarkNguyen12() {
1280      // x^4 - x³ + y²/2 - y
1281      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
1282      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F12 ")));
1283      TestMcts(regProblem, maxVariableReferences: 20, allowExp: false, allowLog: false, allowInv: false);
1284    }
1285
1286    #endregion
1287
1288    #region keijzer
1289    [TestMethod]
1290    [TestCategory("Algorithms.DataAnalysis")]
1291    [TestProperty("Time", "long")]
1292    public void MctsSymbRegBenchmarkKeijzer5() {
1293      // (30 * x * z) / ((x - 10)  * y²)
1294      // = 30 x z / (xy² - y²)
1295      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
1296      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 5 f(")));
1297      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
1298      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
1299      TestMcts(regProblem, maxVariableReferences: 20, allowExp: false, allowLog: false, allowInv: true);
1300    }
1301
1302    [TestMethod]
1303    [TestCategory("Algorithms.DataAnalysis")]
1304    [TestProperty("Time", "short")]
1305    public void MctsSymbRegBenchmarkKeijzer6() {
1306      // Keijzer 6 f(x) = Sum(1 / i) From 1 to X  , x \in [0..120]
1307      // we can only approximate this
1308      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
1309      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 6 f(")));
1310      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
1311      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
1312      TestMcts(regProblem, maxVariableReferences: 20, allowExp: false, allowLog: false, allowInv: true);
1313    }
1314
1315
1316    [TestMethod]
1317    [TestCategory("Algorithms.DataAnalysis")]
1318    [TestProperty("Time", "short")]
1319    public void MctsSymbRegBenchmarkKeijzer8() {
1320      // sqrt(x)
1321      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
1322      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 8 f(")));
1323      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
1324      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
1325      TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false);
1326    }
1327
1328    [TestMethod]
1329    [TestCategory("Algorithms.DataAnalysis")]
1330    [TestProperty("Time", "short")]
1331    public void MctsSymbRegBenchmarkKeijzer9() {
1332      // arcsinh(x)  i.e. ln(x + sqrt(x² + 1))
1333      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
1334      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 9 f(")));
1335      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
1336      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
1337      TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false);
1338    }
1339
1340    /*
1341    [TestMethod]
1342    [TestCategory("Algorithms.DataAnalysis")]
1343    [TestProperty("Time", "short")]
1344    public void MctsSymbRegBenchmarkKeijzer11() {
1345      // xy + sin( (x-1) (y-1) )
1346      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider();
1347      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 11 f(")));
1348      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
1349      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
1350      TestMcts(regProblem, successThreshold: 0.99); // cannot solve this yet
1351    }
1352     */
1353    [TestMethod]
1354    [TestCategory("Algorithms.DataAnalysis")]
1355    [TestProperty("Time", "short")]
1356    public void MctsSymbRegBenchmarkKeijzer12() {
1357      // x^4 - x³ + y² / 2 - y,  same as Nguyen 12             
1358      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
1359      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 12 f(")));
1360      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
1361      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
1362      TestMcts(regProblem, maxVariableReferences: 15, allowExp: false, allowLog: false, allowInv: false);
1363    }
1364    [TestMethod]
1365    [TestCategory("Algorithms.DataAnalysis")]
1366    [TestProperty("Time", "short")]
1367    public void MctsSymbRegBenchmarkKeijzer14() {
1368      // 8 / (2 + x² + y²)
1369      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
1370      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 14 f(")));
1371      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
1372      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
1373      TestMcts(regProblem, maxVariableReferences: 10, allowExp: false, allowLog: false, allowInv: true);
1374    }
1375    [TestMethod]
1376    [TestCategory("Algorithms.DataAnalysis")]
1377    [TestProperty("Time", "short")]
1378    public void MctsSymbRegBenchmarkKeijzer15() {
1379      // x³ / 5 + y³ / 2 - y - x
1380      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
1381      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 15 f(")));
1382      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
1383      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
1384      TestMcts(regProblem, maxVariableReferences: 10, allowExp: false, allowLog: false, allowInv: false);
1385    }
1386    #endregion
1387
1388    private void TestMcts(IRegressionProblemData problemData,
1389      int iterations = 20000,
1390      double successThreshold = 0.99999,
1391      int maxVariableReferences = 5,
1392      bool allowExp = true,
1393      bool allowLog = true,
1394      bool allowInv = true,
1395      bool allowSum = true
1396      ) {
1397      var mctsSymbReg = new MctsSymbolicRegressionAlgorithm();
1398      var regProblem = new RegressionProblem();
1399      regProblem.ProblemDataParameter.Value = problemData;
1400      #region Algorithm Configuration
1401      mctsSymbReg.Problem = regProblem;
1402      mctsSymbReg.Iterations = iterations;
1403      mctsSymbReg.MaxVariableReferences = maxVariableReferences;
1404
1405      mctsSymbReg.SetSeedRandomly = false;
1406      mctsSymbReg.Seed = 1234;
1407      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("exp")), allowExp);
1408      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("log")), allowLog);
1409      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("1 /")), allowInv);
1410      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("t1(x) + t2(x) + ... ")), allowSum);
1411
1412      mctsSymbReg.ScaleVariables = true;
1413      mctsSymbReg.ConstantOptimizationIterations = 0;
1414
1415      #endregion
1416      RunAlgorithm(mctsSymbReg);
1417
1418      Console.WriteLine(mctsSymbReg.ExecutionTime);
1419      var eps = 1.0 - successThreshold;
1420      Assert.AreEqual(1.0, ((DoubleValue)mctsSymbReg.Results["Best solution quality (train)"].Value).Value, eps);
1421      Assert.AreEqual(1.0, ((DoubleValue)mctsSymbReg.Results["Best solution quality (test)"].Value).Value, eps);
1422    }
1423
1424
1425    private void TestMctsWithoutConstants(IRegressionProblemData problemData,
1426      int nVarRefs = 10,
1427      int iterations = 200000, double successThreshold = 0.99999,
1428      bool allowExp = true,
1429      bool allowLog = true,
1430      bool allowInv = true,
1431      bool allowSum = true
1432      ) {
1433      var mctsSymbReg = new MctsSymbolicRegressionAlgorithm();
1434      var regProblem = new RegressionProblem();
1435      regProblem.ProblemDataParameter.Value = problemData;
1436      #region Algorithm Configuration
1437      mctsSymbReg.Problem = regProblem;
1438      mctsSymbReg.Iterations = iterations;
1439      mctsSymbReg.MaxVariableReferences = nVarRefs;
1440      mctsSymbReg.SetSeedRandomly = false;
1441      mctsSymbReg.Seed = 1234;
1442      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("exp")), allowExp);
1443      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("log")), allowLog);
1444      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("1 /")), allowInv);
1445      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("t1(x) + t2(x) + ... ")), allowSum);
1446
1447      // no constants
1448      mctsSymbReg.ScaleVariables = false;
1449      mctsSymbReg.ConstantOptimizationIterations = -1;
1450
1451
1452      #endregion
1453      RunAlgorithm(mctsSymbReg);
1454
1455      Console.WriteLine(mctsSymbReg.ExecutionTime);
1456      var eps = 1.0 - successThreshold;
1457      Assert.AreEqual(1.0, ((DoubleValue)mctsSymbReg.Results["Best solution quality (train)"].Value).Value, eps);
1458      Assert.AreEqual(1.0, ((DoubleValue)mctsSymbReg.Results["Best solution quality (test)"].Value).Value, eps);
1459    }
1460
1461    private void TestMctsNumberOfSolutions(IRegressionProblemData problemData, int maxNumberOfVariables, int expectedNumberOfSolutions,
1462      bool allowProd = true,
1463      bool allowExp = true,
1464      bool allowLog = true,
1465      bool allowInv = true,
1466      bool allowSum = true
1467  ) {
1468      var mctsSymbReg = new MctsSymbolicRegressionAlgorithm();
1469      var regProblem = new RegressionProblem();
1470      regProblem.ProblemDataParameter.Value = problemData;
1471      #region Algorithm Configuration
1472
1473      mctsSymbReg.SetSeedRandomly = false;
1474      mctsSymbReg.Seed = 1234;
1475      mctsSymbReg.Problem = regProblem;
1476      mctsSymbReg.Iterations = int.MaxValue; // stopping when all solutions have been enumerated
1477      mctsSymbReg.MaxVariableReferences = maxNumberOfVariables;
1478      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.StartsWith("x * y * ...")), allowProd);
1479      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("exp(c * x * y ...)")), allowExp);
1480      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("log(c + c1 x + c2 x + ...)")), allowLog);
1481      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("1 / (1 + c1 x + c2 x + ...)")), allowInv);
1482      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("t1(x) + t2(x) + ... ")), allowSum);
1483      #endregion
1484      RunAlgorithm(mctsSymbReg);
1485
1486      Console.WriteLine(mctsSymbReg.ExecutionTime);
1487      Assert.AreEqual(expectedNumberOfSolutions, ((IntValue)mctsSymbReg.Results["Iterations"].Value).Value);
1488    }
1489
1490
1491    // same as in SamplesUtil
1492    private void RunAlgorithm(IAlgorithm a) {
1493      var trigger = new EventWaitHandle(false, EventResetMode.ManualReset);
1494      Exception ex = null;
1495      a.Stopped += (src, e) => { trigger.Set(); };
1496      a.ExceptionOccurred += (src, e) => { ex = e.Value; trigger.Set(); };
1497      a.Prepare();
1498      a.Start();
1499      trigger.WaitOne();
1500
1501      Assert.AreEqual(ex, null);
1502    }
1503
1504  }
1505}
Note: See TracBrowser for help on using the repository browser.