Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2796_SymbReg/Tests/HeuristicLab.Algorithms.DataAnalysis-3.4/MctsSymbolicRegressionTest.cs @ 16300

Last change on this file since 16300 was 15441, checked in by gkronber, 7 years ago

#2796 more bug fixing

File size: 74.3 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Threading;
5using HeuristicLab.Algorithms.DataAnalysis.MCTSSymbReg;
6using HeuristicLab.Data;
7using HeuristicLab.Optimization;
8using HeuristicLab.Problems.DataAnalysis;
9using HeuristicLab.Problems.Instances.DataAnalysis;
10using HeuristicLab.Random;
11using Microsoft.VisualStudio.TestTools.UnitTesting;
12
13namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression {
14  [TestClass()]
15  public class MctsSymbolicRegressionTest {
16    #region heuristics
17    [TestMethod]
18    [TestCategory("Algorithms.DataAnalysis")]
19    [TestProperty("Time", "short")]
20    public void TestSimple2dInteractions() {
21      {
22        // a, b ~ U(0, 1) should be trivial
23        var nRand = new MersenneTwister(1234);
24
25        int n = 10000; // large sample so that we can use the thresholds below
26        var a = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
27        var b = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
28        var x = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
29        var y = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
30
31        var z = a.Zip(b, (ai, bi) => ai * bi).ToArray();
32
33        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05); // should be detected as relevant
34        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) > 0.05); // a and b > 0 so these should be detected as well
35        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) > 0.05);
36        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) > 0.05);
37        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) > 0.05);
38        Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05);
39      }
40      {
41        // a, b ~ U(1000, 2000) also trivial
42        var nRand = new UniformDistributedRandom(new MersenneTwister(1234), 1000, 2000);
43
44        int n = 10000; // large sample so that we can use the thresholds below
45        var a = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
46        var b = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
47        var x = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
48        var y = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
49
50        var z = a.Zip(b, (ai, bi) => ai * bi).ToArray();
51
52        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05); // should be detected as relevant
53        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) > 0.05);
54        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) > 0.05);
55        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) > 0.05);
56        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) > 0.05);
57        Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05);
58      }
59      {
60        // a, b ~ U(-1, 1)
61        var nRand = new UniformDistributedRandom(new MersenneTwister(1234), -1, 1);
62
63        int n = 10000; // large sample so that we can use the thresholds below
64        var a = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
65        var b = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
66        var x = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
67        var y = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
68
69        var z = a.Zip(b, (ai, bi) => ai * bi).ToArray();
70
71        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05); // should be detected as relevant
72        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) < 0.05);
73        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) < 0.05);
74        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) < 0.05);
75        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) < 0.05);
76        Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05);
77      }
78      {
79        // a, b ~ N(0, 1)
80        var nRand = new NormalDistributedRandom(new MersenneTwister(1234), 0, 1);
81
82        int n = 10000; // large sample so that we can use the thresholds below
83        var a = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
84        var b = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
85        var x = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
86        var y = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
87
88        var z = a.Zip(b, (ai, bi) => ai * bi).ToArray();
89
90        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05); // should be detected as relevant
91        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) < 0.05);
92        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) < 0.05);
93        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) < 0.05);
94        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) < 0.05);
95        Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05);
96      }
97      {
98        var rand = new MersenneTwister(1234);
99        // a ~ N(100, 1), b ~ N(-100, 1)
100        var nRand = new NormalDistributedRandom(rand, 0, 1);
101        var aRand = new NormalDistributedRandom(rand, 100, 1);
102        var bRand = new NormalDistributedRandom(rand, -100, 1);
103
104        int n = 10000; // large sample so that we can use the thresholds below
105        var a = Enumerable.Range(0, n).Select(_ => aRand.NextDouble()).ToArray();
106        var b = Enumerable.Range(0, n).Select(_ => bRand.NextDouble()).ToArray();
107        var x = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
108        var y = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray();
109
110        var z = a.Zip(b, (ai, bi) => ai * bi).ToArray();
111
112        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05); // should be detected as relevant
113        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) > 0.05); // a > 0
114        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) > 0.05);
115        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) > 0.05); // b < 0
116        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) > 0.05);
117        Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05); // random variables are not correlated
118      }
119    }
120
121    [TestMethod]
122    [TestCategory("Algorithms.DataAnalysis")]
123    [TestProperty("Time", "short")]
124    public void TestGeneral2dInteractions() {
125      {
126        // we should be able to reliably detect when a product of two variables is correlated with the target variable       
127
128        // the test samples x from a two dimensional normal distribution
129        // the covariance matrix for the normal distribution is randomly sampled
130        // this means x_1 and x_2 might be highly correlated
131        // the mean of the normal distribution is randomly sampled (most critical are probably zero-mean distributions)
132        // y is calculated as x_1*x_2
133
134        var conditionNumber = 10000;
135        for (int iter = 0; iter < 100; iter++) {
136          double m0 = 0.0; double m1 = 0.0;
137          alglib.hqrndstate randState;
138          alglib.hqrndseed(1234, 31415, out randState);
139
140          // uncomment if non-zero mean distributions should be tested
141          //alglib.hqrndnormal2(randState, out m0, out m1);
142
143          double[,] cov_ab = new double[2, 2];
144          double[,] cov_xy = new double[2, 2];
145          alglib.matgen.spdmatrixrndcond(2, conditionNumber, ref cov_ab);
146          alglib.spdmatrixcholesky(ref cov_ab, 2, true);
147
148          alglib.matgen.spdmatrixrndcond(2, conditionNumber, ref cov_xy);
149          alglib.spdmatrixcholesky(ref cov_xy, 2, true);
150
151          // generate a, b by sampling from a 2dim multivariate normal distribution
152          // generate x, y by sampling from another 2dim multivariate normal distribution
153          // a,b and x,y might be correlated but x,y are not correlated to a,b
154          int N = 1000; // large sample size to make sure the test thresholds hold
155          double[] a = new double[N];
156          double[] b = new double[N];
157          double[] x = new double[N];
158          double[] y = new double[N];
159          double[] z = new double[N];
160
161          for (int i = 0; i < N; i++) {
162            double r1, r2, r3, r4;
163            alglib.hqrndnormal2(randState, out r1, out r2);
164            alglib.hqrndnormal2(randState, out r3, out r4);
165
166            var r_ab = new double[] { r1, r2 };
167            var r_xy = new double[] { r3, r4 };
168            double[] s_ab = new double[2];
169            double[] s_xy = new double[2];
170            alglib.ablas.rmatrixmv(2, 2, cov_ab, 0, 0, 0, r_ab, 0, ref s_ab, 0);
171            alglib.ablas.rmatrixmv(2, 2, cov_xy, 0, 0, 0, r_xy, 0, ref s_xy, 0);
172
173            a[i] = s_ab[0] + m0;
174            b[i] = s_ab[1] + m1;
175            x[i] = s_xy[0] + m0; // use same mean (just for the sake of it)
176            y[i] = s_xy[1] + m1;
177
178            z[i] = a[i] * b[i];
179          }
180
181          Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05);
182          Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05);
183
184          /* we might see correlations when only using one of the two relevant factors.
185           * this depends on the distribution / location of a and b
186          // for zero-mean distributions the following should all be quasi-zero
187          Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) < 0.05);
188          Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) < 0.05);
189          Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) < 0.05);
190          Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) < 0.05);
191          */
192          Console.WriteLine("a,b: {0:N3}\tx,y: {1:N3}\ta,x: {2:N3}\tb,x: {3:N3}\ta,y: {4:N3}\tb,y: {5:N3}\tcov(a,b): {6:N3}",
193            Heuristics.CorrelationForInteraction(a, b, z),
194            Heuristics.CorrelationForInteraction(x, y, z),
195            Heuristics.CorrelationForInteraction(a, x, z),
196            Heuristics.CorrelationForInteraction(b, x, z),
197            Heuristics.CorrelationForInteraction(a, y, z),
198            Heuristics.CorrelationForInteraction(b, y, z),
199            alglib.cov2(a, b)
200            );
201        }
202      }
203    }
204    [TestMethod]
205    [TestCategory("Algorithms.DataAnalysis")]
206    [TestProperty("Time", "short")]
207    public void TestGeneral3dInteractions() {
208      {
209        // same as TestGeneral2dInteractions but for terms with three variables
210
211        var conditionNumber = 100;
212        for (int iter = 0; iter < 100; iter++) {
213          double m0 = 0.0; double m1 = 0.0; double m2 = 0.0;
214          alglib.hqrndstate randState;
215          alglib.hqrndseed(1234, 31415, out randState);
216
217          // uncomment if non-zero mean distributions should be tested
218          //alglib.hqrndnormal2(randState, out m0, out m1);
219          //alglib.hqrndnormal2(randState, out m1, out m2);
220
221          double[,] cov_abc = new double[3, 3];
222          double[,] cov_xyz = new double[3, 3];
223          alglib.matgen.spdmatrixrndcond(3, conditionNumber, ref cov_abc);
224          alglib.spdmatrixcholesky(ref cov_abc, 3, true);
225
226          alglib.matgen.spdmatrixrndcond(3, conditionNumber, ref cov_xyz);
227          alglib.spdmatrixcholesky(ref cov_xyz, 3, true);
228
229          int N = 1000; // large sample size to make sure the test thresholds hold
230          double[] a = new double[N];
231          double[] b = new double[N];
232          double[] c = new double[N];
233          double[] x = new double[N];
234          double[] y = new double[N];
235          double[] z = new double[N];
236          double[] t = new double[N];
237
238          for (int i = 0; i < N; i++) {
239            double r1, r2, r3, r4, r5, r6;
240            alglib.hqrndnormal2(randState, out r1, out r2);
241            alglib.hqrndnormal2(randState, out r3, out r4);
242            alglib.hqrndnormal2(randState, out r5, out r6);
243
244            var r_abc = new double[] { r1, r2, r3 };
245            var r_xyz = new double[] { r4, r5, r6 };
246            double[] s_abc = new double[3];
247            double[] s_xyz = new double[3];
248            alglib.ablas.rmatrixmv(3, 3, cov_abc, 0, 0, 0, r_abc, 0, ref s_abc, 0);
249            alglib.ablas.rmatrixmv(3, 3, cov_xyz, 0, 0, 0, r_xyz, 0, ref s_xyz, 0);
250
251            a[i] = s_abc[0] + m0;
252            b[i] = s_abc[1] + m1;
253            c[i] = s_abc[2] + m2;
254            x[i] = s_xyz[0] + m0; // use same mean (just for the sake of it)
255            y[i] = s_xyz[1] + m1;
256            z[i] = s_xyz[2] + m2;
257
258            t[i] = a[i] * b[i] * c[i];
259          }
260
261          Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, c, t) > 0.05);
262          Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z, t) < 0.05);
263
264          /* we might see correlations when only using one of the two relevant factors.
265           * this depends on the distribution / location of a and b
266          // for zero-mean distributions the following should all be quasi-zero
267          Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) < 0.05);
268          Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) < 0.05);
269          Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) < 0.05);
270          Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) < 0.05);
271          */
272          Console.WriteLine("a,b,c: {0:N3}\tx,y,z: {1:N3}\ta,b,x: {2:N3}\tb,c,x: {3:N3}",
273            Heuristics.CorrelationForInteraction(a, b, c, t),
274            Heuristics.CorrelationForInteraction(x, y, z, t),
275            Heuristics.CorrelationForInteraction(a, b, x, t),
276            Heuristics.CorrelationForInteraction(b, c, x, t)
277            );
278        }
279      }
280    }
281
282    [TestMethod]
283    [TestCategory("Algorithms.DataAnalysis")]
284    [TestProperty("Time", "short")]
285    public void TestPoly10Interactions() {
286      {
287        alglib.hqrndstate randState;
288        alglib.hqrndseed(1234, 31415, out randState);
289
290        int N = 25000; // large sample size to make sure the test thresholds hold
291        double[] a = new double[N];
292        double[] b = new double[N];
293        double[] c = new double[N];
294        double[] d = new double[N];
295        double[] e = new double[N];
296        double[] f = new double[N];
297        double[] g = new double[N];
298        double[] h = new double[N];
299        double[] i = new double[N];
300        double[] j = new double[N];
301        double[] y = new double[N];
302
303        for(int k=0;k<N;k++) {
304          a[k] = alglib.hqrnduniformr(randState) * 2 - 1;
305          b[k] = alglib.hqrnduniformr(randState) * 2 - 1;
306          c[k] = alglib.hqrnduniformr(randState) * 2 - 1;
307          d[k] = alglib.hqrnduniformr(randState) * 2 - 1;
308          e[k] = alglib.hqrnduniformr(randState) * 2 - 1;
309          f[k] = alglib.hqrnduniformr(randState) * 2 - 1;
310          g[k] = alglib.hqrnduniformr(randState) * 2 - 1;
311          h[k] = alglib.hqrnduniformr(randState) * 2 - 1;
312          i[k] = alglib.hqrnduniformr(randState) * 2 - 1;
313          j[k] = alglib.hqrnduniformr(randState) * 2 - 1;
314          y[k] = a[k] * b[k] + c[k] * d[k] + e[k] * f[k] + a[k] * g[k] * i[k] + c[k] * f[k] * j[k];
315        }
316
317        var x = new[] { a, b, c, d, e, f, g, h, i, j };
318        var all2Combinations = HeuristicLab.Common.EnumerableExtensions.Combinations(new[] {1,2,3,4,5,6,7,8,9,10}, 2);
319
320        var resultList = new List<Tuple<string, double>>();
321        foreach(var entry in all2Combinations) {
322          var aIdx = entry.First();
323          var bIdx = entry.Skip(1).First();
324          resultList.Add(Tuple.Create(aIdx + " " + bIdx, Heuristics.CorrelationForInteraction(x[aIdx - 1], x[bIdx - 1], y)));
325        }
326
327        foreach(var entry in resultList.OrderByDescending(t => t.Item2)) {
328          Console.WriteLine("{0} {1:N3}", entry.Item1, entry.Item2);
329        }
330
331        var all3Combinations = HeuristicLab.Common.EnumerableExtensions.Combinations(new[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, 3);
332
333        resultList = new List<Tuple<string, double>>();
334        foreach (var entry in all3Combinations) {
335          var aIdx = entry.First();
336          var bIdx = entry.Skip(1).First();
337          var cIdx = entry.Skip(2).First();
338          resultList.Add(Tuple.Create(aIdx + " " + bIdx + " " + cIdx, Heuristics.CorrelationForInteraction(x[aIdx - 1], x[bIdx - 1], x[cIdx - 1], y)));
339        }
340
341        //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
342
343        foreach (var entry in resultList.OrderByDescending(t => t.Item2)) {
344          Console.WriteLine("{0} {1:N3}", entry.Item1, entry.Item2);
345        }
346
347
348        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, y) > 0.01);
349        Assert.IsTrue(Heuristics.CorrelationForInteraction(b, a, y) > 0.01);
350        Assert.IsTrue(Heuristics.CorrelationForInteraction(c, d, y) > 0.01);
351        Assert.IsTrue(Heuristics.CorrelationForInteraction(d, c, y) > 0.01);
352        Assert.IsTrue(Heuristics.CorrelationForInteraction(e, f, y) > 0.01);
353        Assert.IsTrue(Heuristics.CorrelationForInteraction(f, e, y) > 0.01);
354        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, g, i, y) > 0.01);
355        Assert.IsTrue(Heuristics.CorrelationForInteraction(a, i, g, y) > 0.01);
356        Assert.IsTrue(Heuristics.CorrelationForInteraction(g, a, i, y) > 0.01);
357        Assert.IsTrue(Heuristics.CorrelationForInteraction(g, i, a, y) > 0.01);
358        Assert.IsTrue(Heuristics.CorrelationForInteraction(i, g, a, y) > 0.01);
359        Assert.IsTrue(Heuristics.CorrelationForInteraction(i, a, g, y) > 0.01);
360
361        Assert.IsTrue(Heuristics.CorrelationForInteraction(c, f, j, y) > 0.01);
362        Assert.IsTrue(Heuristics.CorrelationForInteraction(c, j, f, y) > 0.01);
363        Assert.IsTrue(Heuristics.CorrelationForInteraction(f, c, j, y) > 0.01);
364        Assert.IsTrue(Heuristics.CorrelationForInteraction(f, j, c, y) > 0.01);
365        Assert.IsTrue(Heuristics.CorrelationForInteraction(j, c, f, y) > 0.01);
366        Assert.IsTrue(Heuristics.CorrelationForInteraction(j, f, c, y) > 0.01);
367      }
368    }
369    #endregion
370
371
372    #region expression hashing
373    [TestMethod]
374    [TestCategory("Algorithms.DataAnalysis")]
375    [TestProperty("Time", "short")]
376    public void ExprHashSymbolicTest() {
377      int nParams;
378      byte[] code;
379
380      {
381        // addition of variables
382        var codeGen = new CodeGenerator();
383        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
384        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
385        codeGen.Emit1(OpCodes.Add);
386        codeGen.Emit1(OpCodes.Exit);
387        codeGen.GetCode(out code, out nParams);
388        var h1 = ExprHashSymbolic.GetHash(code, nParams);
389
390        codeGen = new CodeGenerator();
391        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
392        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
393        codeGen.Emit1(OpCodes.Add);
394        codeGen.Emit1(OpCodes.Exit);
395        codeGen.GetCode(out code, out nParams);
396        var h2 = ExprHashSymbolic.GetHash(code, nParams);
397
398        Assert.AreEqual(h1, h2);
399      }
400
401      {
402        // multiplication of variables
403        var codeGen = new CodeGenerator();
404        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
405        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
406        codeGen.Emit1(OpCodes.Mul);
407        codeGen.Emit1(OpCodes.Exit);
408        codeGen.GetCode(out code, out nParams);
409        var h1 = ExprHashSymbolic.GetHash(code, nParams);
410
411        codeGen = new CodeGenerator();
412        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
413        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
414        codeGen.Emit1(OpCodes.Mul);
415        codeGen.Emit1(OpCodes.Exit);
416        codeGen.GetCode(out code, out nParams);
417        var h2 = ExprHashSymbolic.GetHash(code, nParams);
418
419        Assert.AreEqual(h1, h2);
420      }
421
422      {
423        // distributivity
424        var codeGen = new CodeGenerator();
425        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
426        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
427        codeGen.Emit1(OpCodes.Add);
428        codeGen.Emit2(OpCodes.LoadVar, 3);
429        codeGen.Emit1(OpCodes.Mul);
430        codeGen.Emit1(OpCodes.Exit);
431        codeGen.GetCode(out code, out nParams);
432        var h1 = ExprHashSymbolic.GetHash(code, nParams);
433
434        codeGen = new CodeGenerator();
435        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
436        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 3);
437        codeGen.Emit1(OpCodes.Mul);
438        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
439        codeGen.Emit2(OpCodes.LoadVar, 3);
440        codeGen.Emit1(OpCodes.Mul);
441        codeGen.Emit1(OpCodes.Add);
442        codeGen.Emit1(OpCodes.Exit);
443        codeGen.GetCode(out code, out nParams);
444        var h2 = ExprHashSymbolic.GetHash(code, nParams);
445
446        Assert.AreEqual(h1, h2);
447      }
448
449
450      { // 1/(x1x2) = 1/x1 * 1/x2
451        var codeGen = new CodeGenerator();
452        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
453        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
454        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
455        codeGen.Emit1(OpCodes.Inv);
456        codeGen.Emit1(OpCodes.Exit);
457        codeGen.GetCode(out code, out nParams);
458        var h1 = ExprHashSymbolic.GetHash(code, nParams);
459
460        codeGen = new CodeGenerator();
461        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
462        codeGen.Emit1(OpCodes.Inv);
463        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
464        codeGen.Emit1(OpCodes.Inv);
465        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
466        codeGen.Emit1(OpCodes.Exit);
467        codeGen.GetCode(out code, out nParams);
468        var h2 = ExprHashSymbolic.GetHash(code, nParams);
469
470        Assert.AreEqual(h1, h2);
471      }
472      {
473        // exp
474        var codeGen = new CodeGenerator();
475        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
476        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
477        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add);
478        codeGen.Emit1(OpCodes.Exp);
479        codeGen.Emit1(OpCodes.Exit);
480        codeGen.GetCode(out code, out nParams);
481        var h1 = ExprHashSymbolic.GetHash(code, nParams);
482
483        codeGen = new CodeGenerator();
484        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
485        codeGen.Emit1(OpCodes.Exp);
486        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
487        codeGen.Emit1(OpCodes.Exp);
488        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
489        codeGen.GetCode(out code, out nParams);
490        codeGen.Emit1(OpCodes.Exit);
491        var h2 = ExprHashSymbolic.GetHash(code, nParams);
492
493        Assert.AreEqual(h1, h2);
494      }
495      {
496        // log
497        var codeGen = new CodeGenerator();
498        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
499        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
500        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
501        codeGen.Emit1(OpCodes.Log);
502        codeGen.Emit1(OpCodes.Exit);
503        codeGen.GetCode(out code, out nParams);
504        var h1 = ExprHashSymbolic.GetHash(code, nParams);
505
506        codeGen = new CodeGenerator();
507        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
508        codeGen.Emit1(OpCodes.Log);
509        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2);
510        codeGen.Emit1(OpCodes.Log);
511        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add);
512        codeGen.Emit1(OpCodes.Exit);
513        codeGen.GetCode(out code, out nParams);
514        var h2 = ExprHashSymbolic.GetHash(code, nParams);
515
516        Assert.AreEqual(h1, h2);
517      }
518
519      {
520        // x1 + x1 is equivalent to x1
521        var codeGen = new CodeGenerator();
522        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
523        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
524        codeGen.Emit1(OpCodes.Add);
525        codeGen.Emit1(OpCodes.Exit);
526        codeGen.GetCode(out code, out nParams);
527        var h1 = ExprHashSymbolic.GetHash(code, nParams);
528
529        codeGen = new CodeGenerator();
530        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
531        codeGen.Emit1(OpCodes.Exit);
532        codeGen.GetCode(out code, out nParams);
533        var h2 = ExprHashSymbolic.GetHash(code, nParams);
534
535        Assert.AreEqual(h1, h2);
536      }
537      {
538        // c1*x1 + c2*x1 is equivalent to c3*x1
539        var codeGen = new CodeGenerator();
540        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
541        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
542        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
543
544        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
545        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
546        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
547
548        codeGen.Emit1(OpCodes.Add);
549        codeGen.Emit1(OpCodes.Exit);
550        codeGen.GetCode(out code, out nParams);
551        var h1 = ExprHashSymbolic.GetHash(code, nParams);
552
553        codeGen = new CodeGenerator();
554        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
555        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
556        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
557        codeGen.Emit1(OpCodes.Exit);
558        codeGen.GetCode(out code, out nParams);
559        var h2 = ExprHashSymbolic.GetHash(code, nParams);
560
561        Assert.AreEqual(h1, h2);
562      }
563
564      { // c1 x1 + c2 x1 = c3 x1 (extended version)
565        var codeGen = new CodeGenerator();
566        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0);
567        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1);
568        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
569        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
570        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
571
572        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
573        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
574        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
575
576        codeGen.Emit1(OpCodes.Add);
577
578        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
579        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add);
580
581        codeGen.Emit1(OpCodes.Exit);
582        codeGen.GetCode(out code, out nParams);
583        var h1 = ExprHashSymbolic.GetHash(code, nParams);
584
585        codeGen = new CodeGenerator();
586        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0);
587        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1);
588        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
589        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
590        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
591        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
592        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add);
593        codeGen.Emit1(OpCodes.Exit);
594        codeGen.GetCode(out code, out nParams);
595        var h2 = ExprHashSymbolic.GetHash(code, nParams);
596
597        Assert.AreEqual(h1, h2);
598      }
599      {
600        // exp(x1) * exp(x1) = exp(x1)
601        var codeGen = new CodeGenerator();
602        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0);
603        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1);
604        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
605        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
606        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
607        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Exp);
608
609        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
610        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
611        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
612        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Exp);
613
614        codeGen.Emit1(OpCodes.Mul);
615
616        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
617        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add);
618
619        codeGen.Emit1(OpCodes.Exit);
620        codeGen.GetCode(out code, out nParams);
621        var h1 = ExprHashSymbolic.GetHash(code, nParams);
622
623        codeGen = new CodeGenerator();
624        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0);
625        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1);
626        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
627        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
628        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
629        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Exp);
630        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
631        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add);
632        codeGen.Emit1(OpCodes.Exit);
633        codeGen.GetCode(out code, out nParams);
634        var h2 = ExprHashSymbolic.GetHash(code, nParams);
635
636        Assert.AreEqual(h1, h2);
637      }
638      {
639        // inv(x1) + inv(x1) != inv(x1)
640        var codeGen = new CodeGenerator();
641        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0);
642        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1);
643        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
644        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
645        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
646        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Inv);
647
648        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
649        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
650        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
651        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Inv);
652
653        codeGen.Emit1(OpCodes.Add);
654
655        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
656        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add);
657
658        codeGen.Emit1(OpCodes.Exit);
659        codeGen.GetCode(out code, out nParams);
660        var h1 = ExprHashSymbolic.GetHash(code, nParams);
661
662        codeGen = new CodeGenerator();
663        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0);
664        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1);
665        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
666        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
667        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
668        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Inv);
669        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
670        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add);
671        codeGen.Emit1(OpCodes.Exit);
672        codeGen.GetCode(out code, out nParams);
673        var h2 = ExprHashSymbolic.GetHash(code, nParams);
674
675        Assert.AreNotEqual(h1, h2);
676      }
677
678      {
679        // exp(x1) + exp(x1) != exp(x1)
680        var codeGen = new CodeGenerator();
681        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0);
682        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1);
683        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
684        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
685        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
686        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Exp);
687        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
688
689        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1);
690        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
691        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
692        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
693        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Exp);
694        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
695
696        codeGen.Emit1(OpCodes.Add);
697        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add);
698
699        codeGen.Emit1(OpCodes.Exit);
700        codeGen.GetCode(out code, out nParams);
701        var h1 = ExprHashSymbolic.GetHash(code, nParams);
702
703        codeGen = new CodeGenerator();
704        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0);
705        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1);
706        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
707        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
708        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
709        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Exp);
710        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
711        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add);
712        codeGen.Emit1(OpCodes.Exit);
713        codeGen.GetCode(out code, out nParams);
714        var h2 = ExprHashSymbolic.GetHash(code, nParams);
715
716        Assert.AreNotEqual(h1, h2);
717      }
718      {
719        // log(x1) + log(x1) != log(x1)
720        var codeGen = new CodeGenerator();
721        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0);
722        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1);
723        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
724        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
725        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
726        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Log);
727
728        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
729        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
730        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
731        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Log);
732
733        codeGen.Emit1(OpCodes.Add);
734
735        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
736        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add);
737
738        codeGen.Emit1(OpCodes.Exit);
739        codeGen.GetCode(out code, out nParams);
740        var h1 = ExprHashSymbolic.GetHash(code, nParams);
741
742        codeGen = new CodeGenerator();
743        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0);
744        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1);
745        codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN);
746        codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1);
747        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
748        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Log);
749        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul);
750        codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add);
751        codeGen.Emit1(OpCodes.Exit);
752        codeGen.GetCode(out code, out nParams);
753        var h2 = ExprHashSymbolic.GetHash(code, nParams);
754
755        Assert.AreNotEqual(h1, h2);
756      }
757
758    }
759    #endregion
760
761    #region number of solutions
762    // the algorithm should visits each solution only once
763    [TestMethod]
764    [TestCategory("Algorithms.DataAnalysis")]
765    [TestProperty("Time", "short")]
766    public void MctsSymbRegNumberOfSolutionsOneVariable() {
767      // this problem has only one variable
768      var provider = new NguyenInstanceProvider();
769      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F1 ")));
770      {
771        // possible solutions with max one variable reference:
772        // x
773        // log(x)
774        // exp(x)
775        // 1/x
776        TestMctsNumberOfSolutions(regProblem, 1, 4);
777      }
778      {
779        // possible solutions with max 4 variable references:
780        // without exp, log and inv
781        // x       
782        // x*x
783        // x*x*x
784        // x+x*x
785        // x+x*x*x
786        // x*x*x*x
787
788        TestMctsNumberOfSolutions(regProblem, 4, 6, allowLog: false, allowInv: false, allowExp: false);
789      }
790      {
791        // possible solutions with max 5 variable references:
792        // without exp, log and inv
793        // x       
794        // xx
795        // xxx
796        // x+xx
797        // xxxx
798        // x+xxx
799        // xxxxx
800        // x+xxxx
801        // xx+xxx
802        TestMctsNumberOfSolutions(regProblem, 5, 9, allowLog: false, allowInv: false, allowExp: false);
803      }
804      {
805        // possible solutions with max two variable references:
806        // x
807        // log(x+c)
808        // exp(x)
809        // 1/(x+c)
810        //              -- 4
811        // x * x
812        // x * log(x+c)
813        // x * exp(x)
814        // x * 1/(x + c)
815        // x + log(x+c)
816        // x + exp(x)
817        // x + 1/(x+c)
818        //              -- 7
819        // log(x + c) * log(x + c)
820        // log(x + c) * exp(x)
821        // log(x + c) * 1/(x + c)
822        // log(x + c) + log(x + c)                       
823        // log(x + c) + exp(x)
824        // log(x + c) + 1/(x+c)
825        //              -- 6
826        // exp(cx) * 1/(x+c)
827        // exp(cx) + exp(cx)                             
828        // exp(cx) + 1/(x+c)
829        //              -- 3
830        // 1/(x+c) * 1/(x+c)                             
831        // 1/(x+c) + 1/(x+c)                             
832        //              -- 2
833        // log(x*x)
834        // exp(x*x)
835        // inv(x*x+c)
836        //              -- 3
837
838
839        TestMctsNumberOfSolutions(regProblem, 2, 25);
840      }
841      {
842        // possible solutions with max three variable references:
843        // without log and inv
844        // x
845        // exp(x)
846        //              -- 2
847        // x * x
848        // x * exp(x)
849        // x + exp(x)
850        // exp(x) + exp(x)
851        // exp(x*x)
852        //              -- 5
853        // x * x * x
854        // x + x * x                                       
855        // x * x * exp(x)
856        // x + x * exp(x)                                   
857        // exp(x) + x*x
858        // exp(x) + x*exp(x)                               
859        // x + exp(x) + exp(x)                             
860        // x * exp(x*x)
861        // x + exp(x*x)
862        //              -- 9
863
864        // exp(x) + exp(x) + exp(x)                         
865        //              -- 1
866
867        // exp(x)   * exp(x*x)
868        // exp(x)   + exp(x*x)
869        //              -- 2
870        // exp(x*x*x)
871        //              -- 1
872        TestMctsNumberOfSolutions(regProblem, 3, 2+5+9+1+2+1, allowLog: false, allowInv: false);
873      }     
874    }
875
876    [TestMethod]
877    [TestCategory("Algorithms.DataAnalysis")]
878    [TestProperty("Time", "short")]
879    public void MctsSymbRegNumberOfSolutionsTwoVariables() {
880      // this problem has only two input variables
881      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
882      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F9 ")));
883      {
884        // possible solutions with max one variable reference:
885        // x
886        // log(x)
887        // exp(x)
888        // 1/x
889        // y
890        // log(y)
891        // exp(y)
892        // 1/y
893        TestMctsNumberOfSolutions(regProblem, 1, 8);
894      }
895      {
896        // possible solutions with max one variable reference:
897        // without log and inv
898
899        // x
900        // exp(x)
901        // y
902        // exp(y)
903        TestMctsNumberOfSolutions(regProblem, 1, 4, allowLog: false, allowInv: false);
904      }
905      {
906        // possible solutions with max two variable references:
907        // without log and inv
908
909        // x
910        // y
911        // exp(x)
912        // exp(y)
913        //                  -- 4
914        // x (*) x
915        // x (*|+) exp(x)
916        // x (*|+) y
917        // x (*|+) exp(y)
918        //                  -- 7
919        // exp(x) (+) exp(x)
920        // exp(x) (*|+) exp(y)
921        //                  -- 3
922        // y (*) y
923        // y (*|+) exp(x)
924        // y (*|+) exp(y)
925        //                  -- 5
926        // exp(y) (+) exp(y)
927        //                  -- 1
928        //
929        // exp(x*x)
930        // exp(x*y)
931        // exp(y*y)
932        //                  -- 3
933
934        TestMctsNumberOfSolutions(regProblem, 2, 4 + 7 + 3 + 5 + 1 + 3, allowLog: false, allowInv: false);
935      }
936
937      {
938        // possible solutions with max two variable references:
939        // without exp and sum
940        // x
941        // y
942        // log(x)
943        // log(y)
944        // inv(x)
945        // inv(y)
946        //              -- 6
947        // x * x
948        // x * y
949        // x * log(x)
950        // x * log(y)
951        // x * inv(x)
952        // x * inv(y)
953        //              -- 6
954        // log(x) * log(x)
955        // log(x) * log(y)
956        // log(x) * inv(x)
957        // log(x) * inv(y)
958        //              -- 4
959        // inv(x) * inv(x)
960        // inv(x) * inv(y)
961        //              -- 2
962        // y * y
963        // y * log(x)
964        // y * log(y)
965        // y * inv(x)
966        // y * inv(y)
967        //              -- 5
968        // log(y) * log(y)
969        // log(y) * inv(x)
970        // log(y) * inv(y)
971        //              -- 3
972        // inv(y) * inv(y)
973        //              -- 1
974        // log(x*x)
975        // log(x*y)
976        // log(y*y)
977
978        // inv(x*x)
979        // inv(x*y)
980        // inv(y*y)
981        //             -- 6
982        // log(x+y)
983        // inv(x+y)
984        //             -- 2
985        TestMctsNumberOfSolutions(regProblem, 2, 6 + 6 + 4 + 2 + 5 + 3 + 1 + 6 + 2, allowExp: false, allowSum: false);
986      }
987    }
988    #endregion
989
990
991    #region test structure search (no constants)
992    [TestMethod]
993    [TestCategory("Algorithms.DataAnalysis")]
994    [TestProperty("Time", "short")]
995    public void MctsSymbReg_NoConstants_Nguyen1() {
996      // x³ + x² + x
997      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
998      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F1 ")));
999      TestMctsWithoutConstants(regProblem, nVarRefs: 10, allowExp: false, allowLog: false, allowInv: false);
1000    }
1001    [TestMethod]
1002    [TestCategory("Algorithms.DataAnalysis")]
1003    [TestProperty("Time", "short")]
1004    public void MctsSymbReg_NoConstants_Nguyen2() {
1005      // x^4 + x³ + x² + x
1006      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
1007      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F2 ")));
1008      TestMctsWithoutConstants(regProblem, allowExp: false, allowLog: false, allowInv: false);
1009    }
1010    [TestMethod]
1011    [TestCategory("Algorithms.DataAnalysis")]
1012    [TestProperty("Time", "short")]
1013    public void MctsSymbReg_NoConstants_Nguyen3() {
1014      // x^5 + x^4 + x³ + x² + x
1015      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
1016      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F3 ")));
1017      TestMctsWithoutConstants(regProblem, nVarRefs: 15, iterations: 1000000, allowExp: false, allowLog: false, allowInv: false);
1018    }
1019    [TestMethod]
1020    [TestCategory("Algorithms.DataAnalysis")]
1021    [TestProperty("Time", "short")]
1022    public void MctsSymbReg_NoConstants_Nguyen4() {
1023      // x^6 + x^5 + x^4 + x³ + x² + x
1024      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
1025      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F4 ")));
1026      TestMctsWithoutConstants(regProblem, nVarRefs: 25, iterations: 1000000, allowExp: false, allowLog: false, allowInv: false);
1027    }
1028
1029    [TestMethod]
1030    [TestCategory("Algorithms.DataAnalysis")]
1031    [TestProperty("Time", "short")]
1032    public void MctsSymbReg_NoConstants_Nguyen7() {
1033      // log(x + 1) + log(x² + 1)
1034      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
1035      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F7 ")));
1036      TestMctsWithoutConstants(regProblem, nVarRefs: 10, iterations: 100000, allowExp: false, allowLog: true, allowInv: false);
1037    }
1038
1039    [TestMethod]
1040    [TestCategory("Algorithms.DataAnalysis")]
1041    [TestProperty("Time", "short")]
1042    public void MctsSymbReg_NoConstants_Poly10_Part1() {
1043      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234);
1044      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
1045
1046      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
1047      //  Y' = X1*X2 + X3*X4 + X5*X6
1048      // simplify problem by changing target
1049      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
1050      var ys = ds.GetDoubleValues("Y").ToArray();
1051      var x1 = ds.GetDoubleValues("X1").ToArray();
1052      var x2 = ds.GetDoubleValues("X2").ToArray();
1053      var x3 = ds.GetDoubleValues("X3").ToArray();
1054      var x4 = ds.GetDoubleValues("X4").ToArray();
1055      var x5 = ds.GetDoubleValues("X5").ToArray();
1056      var x6 = ds.GetDoubleValues("X6").ToArray();
1057      var x7 = ds.GetDoubleValues("X7").ToArray();
1058      var x8 = ds.GetDoubleValues("X8").ToArray();
1059      var x9 = ds.GetDoubleValues("X9").ToArray();
1060      var x10 = ds.GetDoubleValues("X10").ToArray();
1061      for (int i = 0; i < ys.Length; i++) {
1062        ys[i] -= x1[i] * x7[i] * x9[i];
1063        ys[i] -= x3[i] * x6[i] * x10[i];
1064      }
1065      ds.ReplaceVariable("Y", ys.ToList());
1066
1067      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
1068
1069
1070      TestMctsWithoutConstants(modifiedProblemData, nVarRefs: 15, iterations: 100000, allowExp: false, allowLog: false, allowInv: false);
1071    }
1072
1073    [TestMethod]
1074    [TestCategory("Algorithms.DataAnalysis")]
1075    [TestProperty("Time", "short")]
1076    public void MctsSymbReg_NoConstants_Poly10_Part2() {
1077      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234);
1078      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
1079
1080      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
1081      //  Y' = X1*X7*X9 + X3*X6*X10
1082      // simplify problem by changing target
1083      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
1084      var ys = ds.GetDoubleValues("Y").ToArray();
1085      var x1 = ds.GetDoubleValues("X1").ToArray();
1086      var x2 = ds.GetDoubleValues("X2").ToArray();
1087      var x3 = ds.GetDoubleValues("X3").ToArray();
1088      var x4 = ds.GetDoubleValues("X4").ToArray();
1089      var x5 = ds.GetDoubleValues("X5").ToArray();
1090      var x6 = ds.GetDoubleValues("X6").ToArray();
1091      var x7 = ds.GetDoubleValues("X7").ToArray();
1092      var x8 = ds.GetDoubleValues("X8").ToArray();
1093      var x9 = ds.GetDoubleValues("X9").ToArray();
1094      var x10 = ds.GetDoubleValues("X10").ToArray();
1095      for (int i = 0; i < ys.Length; i++) {
1096        ys[i] -= x1[i] * x2[i];
1097        ys[i] -= x3[i] * x4[i];
1098        ys[i] -= x5[i] * x6[i];
1099      }
1100      ds.ReplaceVariable("Y", ys.ToList());
1101
1102      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
1103
1104
1105      TestMctsWithoutConstants(modifiedProblemData, nVarRefs: 15, iterations: 100000, allowExp: false, allowLog: false, allowInv: false);
1106    }
1107
1108    [TestMethod]
1109    [TestCategory("Algorithms.DataAnalysis")]
1110    [TestProperty("Time", "short")]
1111    public void MctsSymbReg_NoConstants_Poly10_Part3() {
1112      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234);
1113      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
1114
1115      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
1116      //  Y' = X1*X2 + X1*X7*X9
1117      // simplify problem by changing target
1118      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
1119      var ys = ds.GetDoubleValues("Y").ToArray();
1120      var x1 = ds.GetDoubleValues("X1").ToArray();
1121      var x2 = ds.GetDoubleValues("X2").ToArray();
1122      var x3 = ds.GetDoubleValues("X3").ToArray();
1123      var x4 = ds.GetDoubleValues("X4").ToArray();
1124      var x5 = ds.GetDoubleValues("X5").ToArray();
1125      var x6 = ds.GetDoubleValues("X6").ToArray();
1126      var x7 = ds.GetDoubleValues("X7").ToArray();
1127      var x8 = ds.GetDoubleValues("X8").ToArray();
1128      var x9 = ds.GetDoubleValues("X9").ToArray();
1129      var x10 = ds.GetDoubleValues("X10").ToArray();
1130      for (int i = 0; i < ys.Length; i++) {
1131        ys[i] -= x3[i] * x4[i];
1132        ys[i] -= x5[i] * x6[i];
1133        ys[i] -= x3[i] * x6[i] * x10[i];
1134      }
1135      ds.ReplaceVariable("Y", ys.ToList());
1136
1137      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
1138
1139
1140      TestMctsWithoutConstants(modifiedProblemData, nVarRefs: 15, iterations: 100000, allowExp: false, allowLog: false, allowInv: false);
1141    }
1142
1143    [TestMethod]
1144    [TestCategory("Algorithms.DataAnalysis")]
1145    [TestProperty("Time", "short")]
1146    public void MctsSymbReg_NoConstants_Poly10_Part4() {
1147      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234);
1148      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
1149
1150      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
1151      //  Y' = X3*X4 + X5*X6 + X3*X6*X10
1152      // simplify problem by changing target
1153      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
1154      var ys = ds.GetDoubleValues("Y").ToArray();
1155      var x1 = ds.GetDoubleValues("X1").ToArray();
1156      var x2 = ds.GetDoubleValues("X2").ToArray();
1157      var x3 = ds.GetDoubleValues("X3").ToArray();
1158      var x4 = ds.GetDoubleValues("X4").ToArray();
1159      var x5 = ds.GetDoubleValues("X5").ToArray();
1160      var x6 = ds.GetDoubleValues("X6").ToArray();
1161      var x7 = ds.GetDoubleValues("X7").ToArray();
1162      var x8 = ds.GetDoubleValues("X8").ToArray();
1163      var x9 = ds.GetDoubleValues("X9").ToArray();
1164      var x10 = ds.GetDoubleValues("X10").ToArray();
1165      for (int i = 0; i < ys.Length; i++) {
1166        ys[i] -= x1[i] * x2[i];
1167        ys[i] -= x1[i] * x7[i] * x9[i];
1168      }
1169      ds.ReplaceVariable("Y", ys.ToList());
1170      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
1171
1172
1173      TestMctsWithoutConstants(modifiedProblemData, nVarRefs: 15, iterations: 100000, allowExp: false, allowLog: false, allowInv: false);
1174    }
1175
1176    [TestMethod]
1177    [TestCategory("Algorithms.DataAnalysis")]
1178    [TestProperty("Time", "short")]
1179    public void MctsSymbReg_NoConstants_Poly10_Part5() {
1180      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234);
1181      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
1182
1183      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
1184      //  Y' = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9
1185      // simplify problem by changing target
1186      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
1187      var ys = ds.GetDoubleValues("Y").ToArray();
1188      var x1 = ds.GetDoubleValues("X1").ToArray();
1189      var x2 = ds.GetDoubleValues("X2").ToArray();
1190      var x3 = ds.GetDoubleValues("X3").ToArray();
1191      var x4 = ds.GetDoubleValues("X4").ToArray();
1192      var x5 = ds.GetDoubleValues("X5").ToArray();
1193      var x6 = ds.GetDoubleValues("X6").ToArray();
1194      var x7 = ds.GetDoubleValues("X7").ToArray();
1195      var x8 = ds.GetDoubleValues("X8").ToArray();
1196      var x9 = ds.GetDoubleValues("X9").ToArray();
1197      var x10 = ds.GetDoubleValues("X10").ToArray();
1198      for (int i = 0; i < ys.Length; i++) {
1199        ys[i] -= x3[i] * x6[i] * x10[i];
1200      }
1201      ds.ReplaceVariable("Y", ys.ToList());
1202      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
1203
1204
1205      TestMctsWithoutConstants(modifiedProblemData, nVarRefs: 15, iterations: 100000, allowExp: false, allowLog: false, allowInv: false);
1206    }
1207
1208    [TestMethod]
1209    [TestCategory("Algorithms.DataAnalysis")]
1210    [TestProperty("Time", "short")]
1211    public void MctsSymbReg_NoConstants_Poly10_Part6() {
1212      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234);
1213      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
1214
1215      //  Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10
1216      //  Y' = X1*X2 + X3*X4 + X5*X6 + X3*X6*X10
1217      // simplify problem by changing target
1218      var ds = ((Dataset)regProblem.Dataset).ToModifiable();
1219      var ys = ds.GetDoubleValues("Y").ToArray();
1220      var x1 = ds.GetDoubleValues("X1").ToArray();
1221      var x2 = ds.GetDoubleValues("X2").ToArray();
1222      var x3 = ds.GetDoubleValues("X3").ToArray();
1223      var x4 = ds.GetDoubleValues("X4").ToArray();
1224      var x5 = ds.GetDoubleValues("X5").ToArray();
1225      var x6 = ds.GetDoubleValues("X6").ToArray();
1226      var x7 = ds.GetDoubleValues("X7").ToArray();
1227      var x8 = ds.GetDoubleValues("X8").ToArray();
1228      var x9 = ds.GetDoubleValues("X9").ToArray();
1229      var x10 = ds.GetDoubleValues("X10").ToArray();
1230      for (int i = 0; i < ys.Length; i++) {
1231        ys[i] -= x1[i] * x7[i] * x9[i];
1232      }
1233      ds.ReplaceVariable("Y", ys.ToList());
1234      var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable);
1235
1236
1237      TestMctsWithoutConstants(modifiedProblemData, nVarRefs: 9, iterations: 100000, allowExp: false, allowLog: false, allowInv: false);
1238    }
1239
1240
1241    [TestMethod]
1242    [TestCategory("Algorithms.DataAnalysis")]
1243    [TestProperty("Time", "long")]
1244    public void MctsSymbReg_NoConstants_Poly10_250rows() {
1245      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234);
1246      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10")));
1247      regProblem.TrainingPartition.Start = 0;
1248      regProblem.TrainingPartition.End = regProblem.Dataset.Rows;
1249      regProblem.TestPartition.Start = 0;
1250      regProblem.TestPartition.End = 2;
1251      TestMctsWithoutConstants(regProblem, nVarRefs: 15, iterations: 200000, allowExp: false, allowLog: false, allowInv: false);
1252    }
1253    [TestMethod]
1254    [TestCategory("Algorithms.DataAnalysis")]
1255    [TestProperty("Time", "long")]
1256    public void MctsSymbReg_NoConstants_Poly10_10000rows() {
1257      // as poly-10 but more rows
1258      var rand = new FastRandom(1234);
1259      var x1 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1260      var x2 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1261      var x3 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1262      var x4 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1263      var x5 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1264      var x6 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1265      var x7 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1266      var x8 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1267      var x9 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1268      var x10 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList();
1269      var ys = new List<double>();
1270      for (int i = 0; i < x1.Count; i++) {
1271        ys.Add(x1[i] * x2[i] + x3[i] * x4[i] + x5[i] * x6[i] + x1[i] * x7[i] * x9[i] + x3[i] * x6[i] * x10[i]);
1272      }
1273
1274      var ds = new Dataset(new string[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "y" },
1275        new[] { x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, ys });
1276
1277
1278      var problemData = new RegressionProblemData(ds, new string[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j" }, "y");
1279
1280      problemData.TrainingPartition.Start = 0;
1281      problemData.TrainingPartition.End = problemData.Dataset.Rows;
1282      problemData.TestPartition.Start = 0;
1283      problemData.TestPartition.End = 2; // must not be empty
1284
1285
1286      TestMctsWithoutConstants(problemData, nVarRefs: 15, iterations: 100000, allowExp: false, allowLog: false, allowInv: false);
1287    }
1288
1289    [TestMethod]
1290    [TestCategory("Algorithms.DataAnalysis")]
1291    [TestProperty("Time", "short")]
1292    public void MctsSymbReg_NoConstants_TwoVars() {
1293
1294      // y = x1 + x2 + x1*x2 + x1*x2*x2 + x1*x1*x2
1295      var rand = new FastRandom(1234);
1296      var x1 = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
1297      var x2 = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
1298      var ys = x1.Zip(x2, (x1i, x2i) => x1i + x2i + x1i * x2i + x1i * x2i * x2i + x1i * x1i * x2i).ToList();
1299
1300      var ds = new Dataset(new string[] { "a", "b", "y" }, new[] { x1, x2, ys });
1301
1302      var problemData = new RegressionProblemData(ds, new string[] { "a", "b" }, "y");
1303
1304
1305      TestMctsWithoutConstants(problemData, nVarRefs: 10, iterations: 10000, allowExp: false, allowLog: false, allowInv: false);
1306    }
1307
1308    [TestMethod]
1309    [TestCategory("Algorithms.DataAnalysis")]
1310    [TestProperty("Time", "short")]
1311    public void MctsSymbReg_NoConstants_Misleading() {
1312
1313      // y = a + baaaaa (the effect of the second term should be very small)
1314      // the alg will quickly find that a has big effect and will search below a
1315      // since we prevent a + a... the algorithm must find the correct expression via a + b...
1316      // however b has a small effect so the branch might not be identified as relevant
1317
1318      var rand = new FastRandom(1234);
1319      var @as = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
1320      var bs = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
1321      var cs = Enumerable.Range(0, 100).Select(_ => rand.NextDouble() * 1.0e-3).ToList();
1322      var ds = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
1323      var es = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList();
1324      var ys = new double[@as.Count];
1325      for (int i = 0; i < ys.Length; i++)
1326        ys[i] = @as[i] + bs[i] + @as[i] * bs[i] * cs[i];
1327
1328      var dataset = new Dataset(new string[] { "a", "b", "c", "d", "e", "y" }, new[] { @as, bs, cs, ds, es, ys.ToList() });
1329
1330      var problemData = new RegressionProblemData(dataset, new string[] { "a", "b", "c", "d", "e" }, "y");
1331
1332
1333      TestMctsWithoutConstants(problemData, nVarRefs: 10, iterations: 10000, allowExp: false, allowLog: false, allowInv: false);
1334    }
1335    #endregion
1336
1337    #region restricted structure but including numeric constants
1338
1339    [TestMethod]
1340    [TestCategory("Algorithms.DataAnalysis")]
1341    [TestProperty("Time", "short")]
1342    public void MctsSymbRegKeijzer7() {
1343      // ln(x)
1344      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
1345      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 7 f(")));
1346      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
1347      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
1348      TestMcts(regProblem, allowExp: false, allowLog: true, allowInv: false);
1349    }
1350
1351    /*
1352    // [TestMethod]
1353    [TestCategory("Algorithms.DataAnalysis")]
1354    [TestProperty("Time", "short")]
1355    public void MctsSymbRegBenchmarkNguyen5() {
1356      // sin(x²)cos(x) - 1
1357      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
1358      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F5 ")));
1359      TestMcts(regProblem);
1360    }
1361    // [TestMethod]
1362    [TestCategory("Algorithms.DataAnalysis")]
1363    [TestProperty("Time", "short")]
1364    public void MctsSymbRegBenchmarkNguyen6() {
1365      // sin(x) + sin(x + x²)
1366      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
1367      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F6 ")));
1368      TestMcts(regProblem);
1369    }
1370    */
1371    [TestMethod]
1372    [TestCategory("Algorithms.DataAnalysis")]
1373    [TestProperty("Time", "short")]
1374    public void MctsSymbRegBenchmarkNguyen7() {
1375      //  log(x + 1) + log(x² + 1)
1376      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
1377      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F7 ")));
1378      TestMcts(regProblem, maxVariableReferences: 5, allowExp: false, allowLog: true, allowInv: false);
1379    }
1380    [TestMethod]
1381    [TestCategory("Algorithms.DataAnalysis")]
1382    [TestProperty("Time", "short")]
1383    public void MctsSymbRegBenchmarkNguyen8() {
1384      // Sqrt(x)
1385      // = x ^ 0.5
1386      // = exp(0.5 * log(x))
1387      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
1388      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F8 ")));
1389      TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false);
1390    }
1391    /*
1392    // [TestMethod]
1393    [TestCategory("Algorithms.DataAnalysis")]
1394    [TestProperty("Time", "short")]
1395    public void MctsSymbRegBenchmarkNguyen9() {
1396      //  sin(x) + sin(y²)
1397      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
1398      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F9 ")));
1399      TestMcts(regProblem);
1400    }
1401    // [TestMethod]
1402    [TestCategory("Algorithms.DataAnalysis")]
1403    [TestProperty("Time", "short")]
1404    public void MctsSymbRegBenchmarkNguyen10() {
1405      // 2sin(x)cos(y)
1406      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider();
1407      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F10 ")));
1408      TestMcts(regProblem);
1409    }
1410    */
1411    [TestMethod]
1412    [TestCategory("Algorithms.DataAnalysis")]
1413    [TestProperty("Time", "short")]
1414    public void MctsSymbRegBenchmarkNguyen11() {
1415      // x ^ y  , x > 0, y > 0   
1416      // = exp(y * log(x))
1417      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
1418      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F11 ")));
1419      TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false);
1420    }
1421    [TestMethod]
1422    [TestCategory("Algorithms.DataAnalysis")]
1423    [TestProperty("Time", "short")]
1424    public void MctsSymbRegBenchmarkNguyen12() {
1425      // x^4 - x³ + y²/2 - y
1426      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234);
1427      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F12 ")));
1428      TestMcts(regProblem, maxVariableReferences: 20, allowExp: false, allowLog: false, allowInv: false);
1429    }
1430
1431    #endregion
1432
1433    #region keijzer
1434    [TestMethod]
1435    [TestCategory("Algorithms.DataAnalysis")]
1436    [TestProperty("Time", "long")]
1437    public void MctsSymbRegBenchmarkKeijzer5() {
1438      // (30 * x * z) / ((x - 10)  * y²)
1439      // = 30 x z / (xy² - y²)
1440      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
1441      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 5 f(")));
1442      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
1443      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
1444      TestMcts(regProblem, maxVariableReferences: 10, allowExp: false, allowLog: false, allowInv: true);
1445    }
1446
1447    [TestMethod]
1448    [TestCategory("Algorithms.DataAnalysis")]
1449    [TestProperty("Time", "short")]
1450    public void MctsSymbRegBenchmarkKeijzer6() {
1451      // Keijzer 6 f(x) = Sum(1 / i) From 1 to X  , x \in [0..120]
1452      // we can only approximate this
1453      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
1454      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 6 f(")));
1455      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
1456      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
1457      TestMcts(regProblem, maxVariableReferences: 20, allowExp: false, allowLog: false, allowInv: true);
1458    }
1459
1460
1461    [TestMethod]
1462    [TestCategory("Algorithms.DataAnalysis")]
1463    [TestProperty("Time", "short")]
1464    public void MctsSymbRegBenchmarkKeijzer8() {
1465      // sqrt(x)
1466      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
1467      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 8 f(")));
1468      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
1469      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
1470      TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false);
1471    }
1472
1473    [TestMethod]
1474    [TestCategory("Algorithms.DataAnalysis")]
1475    [TestProperty("Time", "short")]
1476    public void MctsSymbRegBenchmarkKeijzer9() {
1477      // arcsinh(x)  i.e. ln(x + sqrt(x² + 1))
1478      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
1479      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 9 f(")));
1480      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
1481      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
1482      TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false);
1483    }
1484
1485    /*
1486    [TestMethod]
1487    [TestCategory("Algorithms.DataAnalysis")]
1488    [TestProperty("Time", "short")]
1489    public void MctsSymbRegBenchmarkKeijzer11() {
1490      // xy + sin( (x-1) (y-1) )
1491      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider();
1492      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 11 f(")));
1493      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
1494      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
1495      TestMcts(regProblem, successThreshold: 0.99); // cannot solve this yet
1496    }
1497     */
1498    [TestMethod]
1499    [TestCategory("Algorithms.DataAnalysis")]
1500    [TestProperty("Time", "short")]
1501    public void MctsSymbRegBenchmarkKeijzer12() {
1502      // x^4 - x³ + y² / 2 - y,  same as Nguyen 12             
1503      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
1504      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 12 f(")));
1505      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
1506      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
1507      TestMcts(regProblem, maxVariableReferences: 15, allowExp: false, allowLog: false, allowInv: false);
1508    }
1509    [TestMethod]
1510    [TestCategory("Algorithms.DataAnalysis")]
1511    [TestProperty("Time", "short")]
1512    public void MctsSymbRegBenchmarkKeijzer14() {
1513      // 8 / (2 + x² + y²)
1514      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
1515      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 14 f(")));
1516      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
1517      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
1518      TestMcts(regProblem, maxVariableReferences: 10, allowExp: false, allowLog: false, allowInv: true);
1519    }
1520    [TestMethod]
1521    [TestCategory("Algorithms.DataAnalysis")]
1522    [TestProperty("Time", "short")]
1523    public void MctsSymbRegBenchmarkKeijzer15() {
1524      // x³ / 5 + y³ / 2 - y - x
1525      var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234);
1526      var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 15 f(")));
1527      // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance)
1528      if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000;
1529      TestMcts(regProblem, maxVariableReferences: 10, allowExp: false, allowLog: false, allowInv: false);
1530    }
1531    #endregion
1532
1533    private void TestMcts(IRegressionProblemData problemData,
1534      int iterations = 20000,
1535      double successThreshold = 0.99999,
1536      int maxVariableReferences = 5,
1537      bool allowExp = true,
1538      bool allowLog = true,
1539      bool allowInv = true,
1540      bool allowSum = true
1541      ) {
1542      var mctsSymbReg = new MctsSymbolicRegressionAlgorithm();
1543      var regProblem = new RegressionProblem();
1544      regProblem.ProblemDataParameter.Value = problemData;
1545      #region Algorithm Configuration
1546      mctsSymbReg.Problem = regProblem;
1547      mctsSymbReg.Iterations = iterations;
1548      mctsSymbReg.MaxVariableReferences = maxVariableReferences;
1549
1550      mctsSymbReg.SetSeedRandomly = false;
1551      mctsSymbReg.Seed = 1234;
1552      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("exp")), allowExp);
1553      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("log")), allowLog);
1554      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("1 /")), allowInv);
1555      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("t1(x) + t2(x) + ... ")), allowSum);
1556
1557      mctsSymbReg.ScaleVariables = true;
1558      mctsSymbReg.ConstantOptimizationIterations = 0;
1559
1560      #endregion
1561      RunAlgorithm(mctsSymbReg);
1562
1563      Console.WriteLine(mctsSymbReg.ExecutionTime);
1564      var eps = 1.0 - successThreshold;
1565      Assert.AreEqual(1.0, ((DoubleValue)mctsSymbReg.Results["Best solution quality (train)"].Value).Value, eps);
1566      Assert.AreEqual(1.0, ((DoubleValue)mctsSymbReg.Results["Best solution quality (test)"].Value).Value, eps);
1567    }
1568
1569
1570    private void TestMctsWithoutConstants(IRegressionProblemData problemData,
1571      int nVarRefs = 10,
1572      int iterations = 200000, double successThreshold = 0.99999,
1573      bool allowExp = true,
1574      bool allowLog = true,
1575      bool allowInv = true,
1576      bool allowSum = true
1577      ) {
1578      var mctsSymbReg = new MctsSymbolicRegressionAlgorithm();
1579      var regProblem = new RegressionProblem();
1580      regProblem.ProblemDataParameter.Value = problemData;
1581      #region Algorithm Configuration
1582      mctsSymbReg.Problem = regProblem;
1583      mctsSymbReg.Iterations = iterations;
1584      mctsSymbReg.MaxVariableReferences = nVarRefs;
1585      mctsSymbReg.SetSeedRandomly = false;
1586      mctsSymbReg.Seed = 1234;
1587      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("exp")), allowExp);
1588      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("log")), allowLog);
1589      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("1 /")), allowInv);
1590      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("t1(x) + t2(x) + ... ")), allowSum);
1591
1592      // no constants
1593      mctsSymbReg.ScaleVariables = false;
1594      mctsSymbReg.ConstantOptimizationIterations = -1;
1595
1596
1597      #endregion
1598      RunAlgorithm(mctsSymbReg);
1599
1600      Console.WriteLine(mctsSymbReg.ExecutionTime);
1601      var eps = 1.0 - successThreshold;
1602      Assert.AreEqual(1.0, ((DoubleValue)mctsSymbReg.Results["Best solution quality (train)"].Value).Value, eps);
1603      Assert.AreEqual(1.0, ((DoubleValue)mctsSymbReg.Results["Best solution quality (test)"].Value).Value, eps);
1604    }
1605
1606    private void TestMctsNumberOfSolutions(IRegressionProblemData problemData, int maxNumberOfVariables, int expectedNumberOfSolutions,
1607      bool allowProd = true,
1608      bool allowExp = true,
1609      bool allowLog = true,
1610      bool allowInv = true,
1611      bool allowSum = true
1612  ) {
1613      var mctsSymbReg = new MctsSymbolicRegressionAlgorithm();
1614      var regProblem = new RegressionProblem();
1615      regProblem.ProblemDataParameter.Value = problemData;
1616      #region Algorithm Configuration
1617
1618      mctsSymbReg.SetSeedRandomly = false;
1619      mctsSymbReg.Seed = 1234;
1620      mctsSymbReg.Problem = regProblem;
1621      mctsSymbReg.Iterations = int.MaxValue; // stopping when all solutions have been enumerated
1622      mctsSymbReg.MaxVariableReferences = maxNumberOfVariables;
1623      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.StartsWith("x * y * ...")), allowProd);
1624      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("exp(c * x * y ...)")), allowExp);
1625      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("log(c + c1 x + c2 x + ...)")), allowLog);
1626      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("1 / (1 + c1 x + c2 x + ...)")), allowInv);
1627      mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("t1(x) + t2(x) + ... ")), allowSum);
1628      #endregion
1629      RunAlgorithm(mctsSymbReg);
1630
1631      Console.WriteLine(mctsSymbReg.ExecutionTime);
1632      Assert.AreEqual(expectedNumberOfSolutions, ((IntValue)mctsSymbReg.Results["Effective rollouts"].Value).Value);
1633    }
1634
1635
1636    // same as in SamplesUtil
1637    private void RunAlgorithm(IAlgorithm a) {
1638      var trigger = new EventWaitHandle(false, EventResetMode.ManualReset);
1639      Exception ex = null;
1640      a.Stopped += (src, e) => { trigger.Set(); };
1641      a.ExceptionOccurred += (src, e) => { ex = e.Value; trigger.Set(); };
1642      a.Prepare();
1643      a.Start();
1644      trigger.WaitOne();
1645
1646      Assert.AreEqual(ex, null);
1647    }
1648
1649  }
1650}
Note: See TracBrowser for help on using the repository browser.