using System; using System.Collections.Generic; using System.Linq; using System.Threading; using HeuristicLab.Algorithms.DataAnalysis.MCTSSymbReg; using HeuristicLab.Data; using HeuristicLab.Optimization; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Problems.Instances.DataAnalysis; using HeuristicLab.Random; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression { [TestClass()] public class MctsSymbolicRegressionTest { #region heuristics [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void TestSimple2dInteractions() { { // a, b ~ U(0, 1) should be trivial var nRand = new MersenneTwister(1234); int n = 10000; // large sample so that we can use the thresholds below var a = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray(); var b = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray(); var x = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray(); var y = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray(); var z = a.Zip(b, (ai, bi) => ai * bi).ToArray(); Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05); // should be detected as relevant Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) > 0.05); // a and b > 0 so these should be detected as well Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) > 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) > 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) > 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05); } { // a, b ~ U(1000, 2000) also trivial var nRand = new UniformDistributedRandom(new MersenneTwister(1234), 1000, 2000); int n = 10000; // large sample so that we can use the thresholds below var a = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray(); var b = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray(); var x = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray(); var y = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray(); var z = a.Zip(b, (ai, bi) => ai * bi).ToArray(); Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05); // should be detected as relevant Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) > 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) > 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) > 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) > 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05); } { // a, b ~ U(-1, 1) var nRand = new UniformDistributedRandom(new MersenneTwister(1234), -1, 1); int n = 10000; // large sample so that we can use the thresholds below var a = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray(); var b = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray(); var x = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray(); var y = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray(); var z = a.Zip(b, (ai, bi) => ai * bi).ToArray(); Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05); // should be detected as relevant Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) < 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) < 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) < 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) < 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05); } { // a, b ~ N(0, 1) var nRand = new NormalDistributedRandom(new MersenneTwister(1234), 0, 1); int n = 10000; // large sample so that we can use the thresholds below var a = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray(); var b = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray(); var x = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray(); var y = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray(); var z = a.Zip(b, (ai, bi) => ai * bi).ToArray(); Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05); // should be detected as relevant Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) < 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) < 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) < 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) < 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05); } { var rand = new MersenneTwister(1234); // a ~ N(100, 1), b ~ N(-100, 1) var nRand = new NormalDistributedRandom(rand, 0, 1); var aRand = new NormalDistributedRandom(rand, 100, 1); var bRand = new NormalDistributedRandom(rand, -100, 1); int n = 10000; // large sample so that we can use the thresholds below var a = Enumerable.Range(0, n).Select(_ => aRand.NextDouble()).ToArray(); var b = Enumerable.Range(0, n).Select(_ => bRand.NextDouble()).ToArray(); var x = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray(); var y = Enumerable.Range(0, n).Select(_ => nRand.NextDouble()).ToArray(); var z = a.Zip(b, (ai, bi) => ai * bi).ToArray(); Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05); // should be detected as relevant Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) > 0.05); // a > 0 Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) > 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) > 0.05); // b < 0 Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) > 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05); // random variables are not correlated } } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void TestGeneral2dInteractions() { { // we should be able to reliably detect when a product of two variables is correlated with the target variable // the test samples x from a two dimensional normal distribution // the covariance matrix for the normal distribution is randomly sampled // this means x_1 and x_2 might be highly correlated // the mean of the normal distribution is randomly sampled (most critical are probably zero-mean distributions) // y is calculated as x_1*x_2 var conditionNumber = 10000; for (int iter = 0; iter < 100; iter++) { double m0 = 0.0; double m1 = 0.0; alglib.hqrndstate randState; alglib.hqrndseed(1234, 31415, out randState); // uncomment if non-zero mean distributions should be tested //alglib.hqrndnormal2(randState, out m0, out m1); double[,] cov_ab = new double[2, 2]; double[,] cov_xy = new double[2, 2]; alglib.matgen.spdmatrixrndcond(2, conditionNumber, ref cov_ab); alglib.spdmatrixcholesky(ref cov_ab, 2, true); alglib.matgen.spdmatrixrndcond(2, conditionNumber, ref cov_xy); alglib.spdmatrixcholesky(ref cov_xy, 2, true); // generate a, b by sampling from a 2dim multivariate normal distribution // generate x, y by sampling from another 2dim multivariate normal distribution // a,b and x,y might be correlated but x,y are not correlated to a,b int N = 1000; // large sample size to make sure the test thresholds hold double[] a = new double[N]; double[] b = new double[N]; double[] x = new double[N]; double[] y = new double[N]; double[] z = new double[N]; for (int i = 0; i < N; i++) { double r1, r2, r3, r4; alglib.hqrndnormal2(randState, out r1, out r2); alglib.hqrndnormal2(randState, out r3, out r4); var r_ab = new double[] { r1, r2 }; var r_xy = new double[] { r3, r4 }; double[] s_ab = new double[2]; double[] s_xy = new double[2]; alglib.ablas.rmatrixmv(2, 2, cov_ab, 0, 0, 0, r_ab, 0, ref s_ab, 0); alglib.ablas.rmatrixmv(2, 2, cov_xy, 0, 0, 0, r_xy, 0, ref s_xy, 0); a[i] = s_ab[0] + m0; b[i] = s_ab[1] + m1; x[i] = s_xy[0] + m0; // use same mean (just for the sake of it) y[i] = s_xy[1] + m1; z[i] = a[i] * b[i]; } Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, z) > 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z) < 0.05); /* we might see correlations when only using one of the two relevant factors. * this depends on the distribution / location of a and b // for zero-mean distributions the following should all be quasi-zero Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) < 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) < 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) < 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) < 0.05); */ Console.WriteLine("a,b: {0:N3}\tx,y: {1:N3}\ta,x: {2:N3}\tb,x: {3:N3}\ta,y: {4:N3}\tb,y: {5:N3}\tcov(a,b): {6:N3}", Heuristics.CorrelationForInteraction(a, b, z), Heuristics.CorrelationForInteraction(x, y, z), Heuristics.CorrelationForInteraction(a, x, z), Heuristics.CorrelationForInteraction(b, x, z), Heuristics.CorrelationForInteraction(a, y, z), Heuristics.CorrelationForInteraction(b, y, z), alglib.cov2(a, b) ); } } } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void TestGeneral3dInteractions() { { // same as TestGeneral2dInteractions but for terms with three variables var conditionNumber = 100; for (int iter = 0; iter < 100; iter++) { double m0 = 0.0; double m1 = 0.0; double m2 = 0.0; alglib.hqrndstate randState; alglib.hqrndseed(1234, 31415, out randState); // uncomment if non-zero mean distributions should be tested //alglib.hqrndnormal2(randState, out m0, out m1); //alglib.hqrndnormal2(randState, out m1, out m2); double[,] cov_abc = new double[3, 3]; double[,] cov_xyz = new double[3, 3]; alglib.matgen.spdmatrixrndcond(3, conditionNumber, ref cov_abc); alglib.spdmatrixcholesky(ref cov_abc, 3, true); alglib.matgen.spdmatrixrndcond(3, conditionNumber, ref cov_xyz); alglib.spdmatrixcholesky(ref cov_xyz, 3, true); int N = 1000; // large sample size to make sure the test thresholds hold double[] a = new double[N]; double[] b = new double[N]; double[] c = new double[N]; double[] x = new double[N]; double[] y = new double[N]; double[] z = new double[N]; double[] t = new double[N]; for (int i = 0; i < N; i++) { double r1, r2, r3, r4, r5, r6; alglib.hqrndnormal2(randState, out r1, out r2); alglib.hqrndnormal2(randState, out r3, out r4); alglib.hqrndnormal2(randState, out r5, out r6); var r_abc = new double[] { r1, r2, r3 }; var r_xyz = new double[] { r4, r5, r6 }; double[] s_abc = new double[3]; double[] s_xyz = new double[3]; alglib.ablas.rmatrixmv(3, 3, cov_abc, 0, 0, 0, r_abc, 0, ref s_abc, 0); alglib.ablas.rmatrixmv(3, 3, cov_xyz, 0, 0, 0, r_xyz, 0, ref s_xyz, 0); a[i] = s_abc[0] + m0; b[i] = s_abc[1] + m1; c[i] = s_abc[2] + m2; x[i] = s_xyz[0] + m0; // use same mean (just for the sake of it) y[i] = s_xyz[1] + m1; z[i] = s_xyz[2] + m2; t[i] = a[i] * b[i] * c[i]; } Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, c, t) > 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(x, y, z, t) < 0.05); /* we might see correlations when only using one of the two relevant factors. * this depends on the distribution / location of a and b // for zero-mean distributions the following should all be quasi-zero Assert.IsTrue(Heuristics.CorrelationForInteraction(a, x, z) < 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(b, x, z) < 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(a, y, z) < 0.05); Assert.IsTrue(Heuristics.CorrelationForInteraction(b, y, z) < 0.05); */ Console.WriteLine("a,b,c: {0:N3}\tx,y,z: {1:N3}\ta,b,x: {2:N3}\tb,c,x: {3:N3}", Heuristics.CorrelationForInteraction(a, b, c, t), Heuristics.CorrelationForInteraction(x, y, z, t), Heuristics.CorrelationForInteraction(a, b, x, t), Heuristics.CorrelationForInteraction(b, c, x, t) ); } } } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void TestPoly10Interactions() { { alglib.hqrndstate randState; alglib.hqrndseed(1234, 31415, out randState); int N = 25000; // large sample size to make sure the test thresholds hold double[] a = new double[N]; double[] b = new double[N]; double[] c = new double[N]; double[] d = new double[N]; double[] e = new double[N]; double[] f = new double[N]; double[] g = new double[N]; double[] h = new double[N]; double[] i = new double[N]; double[] j = new double[N]; double[] y = new double[N]; for(int k=0;k>(); foreach(var entry in all2Combinations) { var aIdx = entry.First(); var bIdx = entry.Skip(1).First(); resultList.Add(Tuple.Create(aIdx + " " + bIdx, Heuristics.CorrelationForInteraction(x[aIdx - 1], x[bIdx - 1], y))); } foreach(var entry in resultList.OrderByDescending(t => t.Item2)) { Console.WriteLine("{0} {1:N3}", entry.Item1, entry.Item2); } var all3Combinations = HeuristicLab.Common.EnumerableExtensions.Combinations(new[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }, 3); resultList = new List>(); foreach (var entry in all3Combinations) { var aIdx = entry.First(); var bIdx = entry.Skip(1).First(); var cIdx = entry.Skip(2).First(); resultList.Add(Tuple.Create(aIdx + " " + bIdx + " " + cIdx, Heuristics.CorrelationForInteraction(x[aIdx - 1], x[bIdx - 1], x[cIdx - 1], y))); } // Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10 foreach (var entry in resultList.OrderByDescending(t => t.Item2)) { Console.WriteLine("{0} {1:N3}", entry.Item1, entry.Item2); } Assert.IsTrue(Heuristics.CorrelationForInteraction(a, b, y) > 0.01); Assert.IsTrue(Heuristics.CorrelationForInteraction(b, a, y) > 0.01); Assert.IsTrue(Heuristics.CorrelationForInteraction(c, d, y) > 0.01); Assert.IsTrue(Heuristics.CorrelationForInteraction(d, c, y) > 0.01); Assert.IsTrue(Heuristics.CorrelationForInteraction(e, f, y) > 0.01); Assert.IsTrue(Heuristics.CorrelationForInteraction(f, e, y) > 0.01); Assert.IsTrue(Heuristics.CorrelationForInteraction(a, g, i, y) > 0.01); Assert.IsTrue(Heuristics.CorrelationForInteraction(a, i, g, y) > 0.01); Assert.IsTrue(Heuristics.CorrelationForInteraction(g, a, i, y) > 0.01); Assert.IsTrue(Heuristics.CorrelationForInteraction(g, i, a, y) > 0.01); Assert.IsTrue(Heuristics.CorrelationForInteraction(i, g, a, y) > 0.01); Assert.IsTrue(Heuristics.CorrelationForInteraction(i, a, g, y) > 0.01); Assert.IsTrue(Heuristics.CorrelationForInteraction(c, f, j, y) > 0.01); Assert.IsTrue(Heuristics.CorrelationForInteraction(c, j, f, y) > 0.01); Assert.IsTrue(Heuristics.CorrelationForInteraction(f, c, j, y) > 0.01); Assert.IsTrue(Heuristics.CorrelationForInteraction(f, j, c, y) > 0.01); Assert.IsTrue(Heuristics.CorrelationForInteraction(j, c, f, y) > 0.01); Assert.IsTrue(Heuristics.CorrelationForInteraction(j, f, c, y) > 0.01); } } #endregion #region expression hashing [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void ExprHashSymbolicTest() { int nParams; byte[] code; { // addition of variables var codeGen = new CodeGenerator(); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2); codeGen.Emit1(OpCodes.Add); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h1 = ExprHashSymbolic.GetHash(code, nParams); codeGen = new CodeGenerator(); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(OpCodes.Add); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h2 = ExprHashSymbolic.GetHash(code, nParams); Assert.AreEqual(h1, h2); } { // multiplication of variables var codeGen = new CodeGenerator(); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2); codeGen.Emit1(OpCodes.Mul); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h1 = ExprHashSymbolic.GetHash(code, nParams); codeGen = new CodeGenerator(); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(OpCodes.Mul); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h2 = ExprHashSymbolic.GetHash(code, nParams); Assert.AreEqual(h1, h2); } { // distributivity var codeGen = new CodeGenerator(); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2); codeGen.Emit1(OpCodes.Add); codeGen.Emit2(OpCodes.LoadVar, 3); codeGen.Emit1(OpCodes.Mul); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h1 = ExprHashSymbolic.GetHash(code, nParams); codeGen = new CodeGenerator(); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 3); codeGen.Emit1(OpCodes.Mul); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit2(OpCodes.LoadVar, 3); codeGen.Emit1(OpCodes.Mul); codeGen.Emit1(OpCodes.Add); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h2 = ExprHashSymbolic.GetHash(code, nParams); Assert.AreEqual(h1, h2); } { // 1/(x1x2) = 1/x1 * 1/x2 var codeGen = new CodeGenerator(); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(OpCodes.Inv); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h1 = ExprHashSymbolic.GetHash(code, nParams); codeGen = new CodeGenerator(); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(OpCodes.Inv); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2); codeGen.Emit1(OpCodes.Inv); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h2 = ExprHashSymbolic.GetHash(code, nParams); Assert.AreEqual(h1, h2); } { // exp var codeGen = new CodeGenerator(); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add); codeGen.Emit1(OpCodes.Exp); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h1 = ExprHashSymbolic.GetHash(code, nParams); codeGen = new CodeGenerator(); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(OpCodes.Exp); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2); codeGen.Emit1(OpCodes.Exp); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.GetCode(out code, out nParams); codeGen.Emit1(OpCodes.Exit); var h2 = ExprHashSymbolic.GetHash(code, nParams); Assert.AreEqual(h1, h2); } { // log var codeGen = new CodeGenerator(); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(OpCodes.Log); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h1 = ExprHashSymbolic.GetHash(code, nParams); codeGen = new CodeGenerator(); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(OpCodes.Log); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 2); codeGen.Emit1(OpCodes.Log); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h2 = ExprHashSymbolic.GetHash(code, nParams); Assert.AreEqual(h1, h2); } { // x1 + x1 is equivalent to x1 var codeGen = new CodeGenerator(); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(OpCodes.Add); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h1 = ExprHashSymbolic.GetHash(code, nParams); codeGen = new CodeGenerator(); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h2 = ExprHashSymbolic.GetHash(code, nParams); Assert.AreEqual(h1, h2); } { // c1*x1 + c2*x1 is equivalent to c3*x1 var codeGen = new CodeGenerator(); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(OpCodes.Add); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h1 = ExprHashSymbolic.GetHash(code, nParams); codeGen = new CodeGenerator(); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h2 = ExprHashSymbolic.GetHash(code, nParams); Assert.AreEqual(h1, h2); } { // c1 x1 + c2 x1 = c3 x1 (extended version) var codeGen = new CodeGenerator(); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(OpCodes.Add); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h1 = ExprHashSymbolic.GetHash(code, nParams); codeGen = new CodeGenerator(); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h2 = ExprHashSymbolic.GetHash(code, nParams); Assert.AreEqual(h1, h2); } { // exp(x1) * exp(x1) = exp(x1) var codeGen = new CodeGenerator(); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Exp); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Exp); codeGen.Emit1(OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h1 = ExprHashSymbolic.GetHash(code, nParams); codeGen = new CodeGenerator(); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Exp); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h2 = ExprHashSymbolic.GetHash(code, nParams); Assert.AreEqual(h1, h2); } { // inv(x1) + inv(x1) != inv(x1) var codeGen = new CodeGenerator(); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Inv); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Inv); codeGen.Emit1(OpCodes.Add); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h1 = ExprHashSymbolic.GetHash(code, nParams); codeGen = new CodeGenerator(); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Inv); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h2 = ExprHashSymbolic.GetHash(code, nParams); Assert.AreNotEqual(h1, h2); } { // exp(x1) + exp(x1) != exp(x1) var codeGen = new CodeGenerator(); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Exp); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Exp); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(OpCodes.Add); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h1 = ExprHashSymbolic.GetHash(code, nParams); codeGen = new CodeGenerator(); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Exp); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h2 = ExprHashSymbolic.GetHash(code, nParams); Assert.AreNotEqual(h1, h2); } { // log(x1) + log(x1) != log(x1) var codeGen = new CodeGenerator(); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Log); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Log); codeGen.Emit1(OpCodes.Add); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h1 = ExprHashSymbolic.GetHash(code, nParams); codeGen = new CodeGenerator(); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst0); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadConst1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.LoadParamN); codeGen.Emit2(MctsSymbolicRegression.OpCodes.LoadVar, 1); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Log); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Mul); codeGen.Emit1(MctsSymbolicRegression.OpCodes.Add); codeGen.Emit1(OpCodes.Exit); codeGen.GetCode(out code, out nParams); var h2 = ExprHashSymbolic.GetHash(code, nParams); Assert.AreNotEqual(h1, h2); } } #endregion #region number of solutions // the algorithm should visits each solution only once [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegNumberOfSolutionsOneVariable() { // this problem has only one variable var provider = new NguyenInstanceProvider(); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F1 "))); { // possible solutions with max one variable reference: // x // log(x) // exp(x) // 1/x TestMctsNumberOfSolutions(regProblem, 1, 4); } { // possible solutions with max 4 variable references: // without exp, log and inv // x // x*x // x*x*x // x+x*x // x+x*x*x // x*x*x*x TestMctsNumberOfSolutions(regProblem, 4, 6, allowLog: false, allowInv: false, allowExp: false); } { // possible solutions with max 5 variable references: // without exp, log and inv // x // xx // xxx // x+xx // xxxx // x+xxx // xxxxx // x+xxxx // xx+xxx TestMctsNumberOfSolutions(regProblem, 5, 9, allowLog: false, allowInv: false, allowExp: false); } { // possible solutions with max two variable references: // x // log(x+c) // exp(x) // 1/(x+c) // -- 4 // x * x // x * log(x+c) // x * exp(x) // x * 1/(x + c) // x + log(x+c) // x + exp(x) // x + 1/(x+c) // -- 7 // log(x + c) * log(x + c) // log(x + c) * exp(x) // log(x + c) * 1/(x + c) // log(x + c) + log(x + c) // log(x + c) + exp(x) // log(x + c) + 1/(x+c) // -- 6 // exp(cx) * 1/(x+c) // exp(cx) + exp(cx) // exp(cx) + 1/(x+c) // -- 3 // 1/(x+c) * 1/(x+c) // 1/(x+c) + 1/(x+c) // -- 2 // log(x*x) // exp(x*x) // inv(x*x+c) // -- 3 TestMctsNumberOfSolutions(regProblem, 2, 25); } { // possible solutions with max three variable references: // without log and inv // x // exp(x) // -- 2 // x * x // x * exp(x) // x + exp(x) // exp(x) + exp(x) // exp(x*x) // -- 5 // x * x * x // x + x * x // x * x * exp(x) // x + x * exp(x) // exp(x) + x*x // exp(x) + x*exp(x) // x + exp(x) + exp(x) // x * exp(x*x) // x + exp(x*x) // -- 9 // exp(x) + exp(x) + exp(x) // -- 1 // exp(x) * exp(x*x) // exp(x) + exp(x*x) // -- 2 // exp(x*x*x) // -- 1 TestMctsNumberOfSolutions(regProblem, 3, 2+5+9+1+2+1, allowLog: false, allowInv: false); } } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegNumberOfSolutionsTwoVariables() { // this problem has only two input variables var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F9 "))); { // possible solutions with max one variable reference: // x // log(x) // exp(x) // 1/x // y // log(y) // exp(y) // 1/y TestMctsNumberOfSolutions(regProblem, 1, 8); } { // possible solutions with max one variable reference: // without log and inv // x // exp(x) // y // exp(y) TestMctsNumberOfSolutions(regProblem, 1, 4, allowLog: false, allowInv: false); } { // possible solutions with max two variable references: // without log and inv // x // y // exp(x) // exp(y) // -- 4 // x (*) x // x (*|+) exp(x) // x (*|+) y // x (*|+) exp(y) // -- 7 // exp(x) (+) exp(x) // exp(x) (*|+) exp(y) // -- 3 // y (*) y // y (*|+) exp(x) // y (*|+) exp(y) // -- 5 // exp(y) (+) exp(y) // -- 1 // // exp(x*x) // exp(x*y) // exp(y*y) // -- 3 TestMctsNumberOfSolutions(regProblem, 2, 4 + 7 + 3 + 5 + 1 + 3, allowLog: false, allowInv: false); } { // possible solutions with max two variable references: // without exp and sum // x // y // log(x) // log(y) // inv(x) // inv(y) // -- 6 // x * x // x * y // x * log(x) // x * log(y) // x * inv(x) // x * inv(y) // -- 6 // log(x) * log(x) // log(x) * log(y) // log(x) * inv(x) // log(x) * inv(y) // -- 4 // inv(x) * inv(x) // inv(x) * inv(y) // -- 2 // y * y // y * log(x) // y * log(y) // y * inv(x) // y * inv(y) // -- 5 // log(y) * log(y) // log(y) * inv(x) // log(y) * inv(y) // -- 3 // inv(y) * inv(y) // -- 1 // log(x*x) // log(x*y) // log(y*y) // inv(x*x) // inv(x*y) // inv(y*y) // -- 6 // log(x+y) // inv(x+y) // -- 2 TestMctsNumberOfSolutions(regProblem, 2, 6 + 6 + 4 + 2 + 5 + 3 + 1 + 6 + 2, allowExp: false, allowSum: false); } } #endregion #region test structure search (no constants) [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Nguyen1() { // x³ + x² + x var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F1 "))); TestMctsWithoutConstants(regProblem, nVarRefs: 10, allowExp: false, allowLog: false, allowInv: false); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Nguyen2() { // x^4 + x³ + x² + x var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F2 "))); TestMctsWithoutConstants(regProblem, allowExp: false, allowLog: false, allowInv: false); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Nguyen3() { // x^5 + x^4 + x³ + x² + x var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F3 "))); TestMctsWithoutConstants(regProblem, nVarRefs: 15, iterations: 1000000, allowExp: false, allowLog: false, allowInv: false); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Nguyen4() { // x^6 + x^5 + x^4 + x³ + x² + x var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F4 "))); TestMctsWithoutConstants(regProblem, nVarRefs: 25, iterations: 1000000, allowExp: false, allowLog: false, allowInv: false); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Nguyen7() { // log(x + 1) + log(x² + 1) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F7 "))); TestMctsWithoutConstants(regProblem, nVarRefs: 10, iterations: 100000, allowExp: false, allowLog: true, allowInv: false); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Poly10_Part1() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10"))); // Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10 // Y' = X1*X2 + X3*X4 + X5*X6 // simplify problem by changing target var ds = ((Dataset)regProblem.Dataset).ToModifiable(); var ys = ds.GetDoubleValues("Y").ToArray(); var x1 = ds.GetDoubleValues("X1").ToArray(); var x2 = ds.GetDoubleValues("X2").ToArray(); var x3 = ds.GetDoubleValues("X3").ToArray(); var x4 = ds.GetDoubleValues("X4").ToArray(); var x5 = ds.GetDoubleValues("X5").ToArray(); var x6 = ds.GetDoubleValues("X6").ToArray(); var x7 = ds.GetDoubleValues("X7").ToArray(); var x8 = ds.GetDoubleValues("X8").ToArray(); var x9 = ds.GetDoubleValues("X9").ToArray(); var x10 = ds.GetDoubleValues("X10").ToArray(); for (int i = 0; i < ys.Length; i++) { ys[i] -= x1[i] * x7[i] * x9[i]; ys[i] -= x3[i] * x6[i] * x10[i]; } ds.ReplaceVariable("Y", ys.ToList()); var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable); TestMctsWithoutConstants(modifiedProblemData, nVarRefs: 15, iterations: 100000, allowExp: false, allowLog: false, allowInv: false); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Poly10_Part2() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10"))); // Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10 // Y' = X1*X7*X9 + X3*X6*X10 // simplify problem by changing target var ds = ((Dataset)regProblem.Dataset).ToModifiable(); var ys = ds.GetDoubleValues("Y").ToArray(); var x1 = ds.GetDoubleValues("X1").ToArray(); var x2 = ds.GetDoubleValues("X2").ToArray(); var x3 = ds.GetDoubleValues("X3").ToArray(); var x4 = ds.GetDoubleValues("X4").ToArray(); var x5 = ds.GetDoubleValues("X5").ToArray(); var x6 = ds.GetDoubleValues("X6").ToArray(); var x7 = ds.GetDoubleValues("X7").ToArray(); var x8 = ds.GetDoubleValues("X8").ToArray(); var x9 = ds.GetDoubleValues("X9").ToArray(); var x10 = ds.GetDoubleValues("X10").ToArray(); for (int i = 0; i < ys.Length; i++) { ys[i] -= x1[i] * x2[i]; ys[i] -= x3[i] * x4[i]; ys[i] -= x5[i] * x6[i]; } ds.ReplaceVariable("Y", ys.ToList()); var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable); TestMctsWithoutConstants(modifiedProblemData, nVarRefs: 15, iterations: 100000, allowExp: false, allowLog: false, allowInv: false); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Poly10_Part3() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10"))); // Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10 // Y' = X1*X2 + X1*X7*X9 // simplify problem by changing target var ds = ((Dataset)regProblem.Dataset).ToModifiable(); var ys = ds.GetDoubleValues("Y").ToArray(); var x1 = ds.GetDoubleValues("X1").ToArray(); var x2 = ds.GetDoubleValues("X2").ToArray(); var x3 = ds.GetDoubleValues("X3").ToArray(); var x4 = ds.GetDoubleValues("X4").ToArray(); var x5 = ds.GetDoubleValues("X5").ToArray(); var x6 = ds.GetDoubleValues("X6").ToArray(); var x7 = ds.GetDoubleValues("X7").ToArray(); var x8 = ds.GetDoubleValues("X8").ToArray(); var x9 = ds.GetDoubleValues("X9").ToArray(); var x10 = ds.GetDoubleValues("X10").ToArray(); for (int i = 0; i < ys.Length; i++) { ys[i] -= x3[i] * x4[i]; ys[i] -= x5[i] * x6[i]; ys[i] -= x3[i] * x6[i] * x10[i]; } ds.ReplaceVariable("Y", ys.ToList()); var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable); TestMctsWithoutConstants(modifiedProblemData, nVarRefs: 15, iterations: 100000, allowExp: false, allowLog: false, allowInv: false); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Poly10_Part4() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10"))); // Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10 // Y' = X3*X4 + X5*X6 + X3*X6*X10 // simplify problem by changing target var ds = ((Dataset)regProblem.Dataset).ToModifiable(); var ys = ds.GetDoubleValues("Y").ToArray(); var x1 = ds.GetDoubleValues("X1").ToArray(); var x2 = ds.GetDoubleValues("X2").ToArray(); var x3 = ds.GetDoubleValues("X3").ToArray(); var x4 = ds.GetDoubleValues("X4").ToArray(); var x5 = ds.GetDoubleValues("X5").ToArray(); var x6 = ds.GetDoubleValues("X6").ToArray(); var x7 = ds.GetDoubleValues("X7").ToArray(); var x8 = ds.GetDoubleValues("X8").ToArray(); var x9 = ds.GetDoubleValues("X9").ToArray(); var x10 = ds.GetDoubleValues("X10").ToArray(); for (int i = 0; i < ys.Length; i++) { ys[i] -= x1[i] * x2[i]; ys[i] -= x1[i] * x7[i] * x9[i]; } ds.ReplaceVariable("Y", ys.ToList()); var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable); TestMctsWithoutConstants(modifiedProblemData, nVarRefs: 15, iterations: 100000, allowExp: false, allowLog: false, allowInv: false); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Poly10_Part5() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10"))); // Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10 // Y' = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 // simplify problem by changing target var ds = ((Dataset)regProblem.Dataset).ToModifiable(); var ys = ds.GetDoubleValues("Y").ToArray(); var x1 = ds.GetDoubleValues("X1").ToArray(); var x2 = ds.GetDoubleValues("X2").ToArray(); var x3 = ds.GetDoubleValues("X3").ToArray(); var x4 = ds.GetDoubleValues("X4").ToArray(); var x5 = ds.GetDoubleValues("X5").ToArray(); var x6 = ds.GetDoubleValues("X6").ToArray(); var x7 = ds.GetDoubleValues("X7").ToArray(); var x8 = ds.GetDoubleValues("X8").ToArray(); var x9 = ds.GetDoubleValues("X9").ToArray(); var x10 = ds.GetDoubleValues("X10").ToArray(); for (int i = 0; i < ys.Length; i++) { ys[i] -= x3[i] * x6[i] * x10[i]; } ds.ReplaceVariable("Y", ys.ToList()); var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable); TestMctsWithoutConstants(modifiedProblemData, nVarRefs: 15, iterations: 100000, allowExp: false, allowLog: false, allowInv: false); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Poly10_Part6() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10"))); // Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10 // Y' = X1*X2 + X3*X4 + X5*X6 + X3*X6*X10 // simplify problem by changing target var ds = ((Dataset)regProblem.Dataset).ToModifiable(); var ys = ds.GetDoubleValues("Y").ToArray(); var x1 = ds.GetDoubleValues("X1").ToArray(); var x2 = ds.GetDoubleValues("X2").ToArray(); var x3 = ds.GetDoubleValues("X3").ToArray(); var x4 = ds.GetDoubleValues("X4").ToArray(); var x5 = ds.GetDoubleValues("X5").ToArray(); var x6 = ds.GetDoubleValues("X6").ToArray(); var x7 = ds.GetDoubleValues("X7").ToArray(); var x8 = ds.GetDoubleValues("X8").ToArray(); var x9 = ds.GetDoubleValues("X9").ToArray(); var x10 = ds.GetDoubleValues("X10").ToArray(); for (int i = 0; i < ys.Length; i++) { ys[i] -= x1[i] * x7[i] * x9[i]; } ds.ReplaceVariable("Y", ys.ToList()); var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable); TestMctsWithoutConstants(modifiedProblemData, nVarRefs: 9, iterations: 100000, allowExp: false, allowLog: false, allowInv: false); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "long")] public void MctsSymbReg_NoConstants_Poly10_250rows() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10"))); regProblem.TrainingPartition.Start = 0; regProblem.TrainingPartition.End = regProblem.Dataset.Rows; regProblem.TestPartition.Start = 0; regProblem.TestPartition.End = 2; TestMctsWithoutConstants(regProblem, nVarRefs: 15, iterations: 200000, allowExp: false, allowLog: false, allowInv: false); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "long")] public void MctsSymbReg_NoConstants_Poly10_10000rows() { // as poly-10 but more rows var rand = new FastRandom(1234); var x1 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var x2 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var x3 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var x4 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var x5 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var x6 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var x7 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var x8 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var x9 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var x10 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var ys = new List(); for (int i = 0; i < x1.Count; i++) { ys.Add(x1[i] * x2[i] + x3[i] * x4[i] + x5[i] * x6[i] + x1[i] * x7[i] * x9[i] + x3[i] * x6[i] * x10[i]); } var ds = new Dataset(new string[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "y" }, new[] { x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, ys }); var problemData = new RegressionProblemData(ds, new string[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j" }, "y"); problemData.TrainingPartition.Start = 0; problemData.TrainingPartition.End = problemData.Dataset.Rows; problemData.TestPartition.Start = 0; problemData.TestPartition.End = 2; // must not be empty TestMctsWithoutConstants(problemData, nVarRefs: 15, iterations: 100000, allowExp: false, allowLog: false, allowInv: false); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_TwoVars() { // y = x1 + x2 + x1*x2 + x1*x2*x2 + x1*x1*x2 var rand = new FastRandom(1234); var x1 = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList(); var x2 = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList(); var ys = x1.Zip(x2, (x1i, x2i) => x1i + x2i + x1i * x2i + x1i * x2i * x2i + x1i * x1i * x2i).ToList(); var ds = new Dataset(new string[] { "a", "b", "y" }, new[] { x1, x2, ys }); var problemData = new RegressionProblemData(ds, new string[] { "a", "b" }, "y"); TestMctsWithoutConstants(problemData, nVarRefs: 10, iterations: 10000, allowExp: false, allowLog: false, allowInv: false); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Misleading() { // y = a + baaaaa (the effect of the second term should be very small) // the alg will quickly find that a has big effect and will search below a // since we prevent a + a... the algorithm must find the correct expression via a + b... // however b has a small effect so the branch might not be identified as relevant var rand = new FastRandom(1234); var @as = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList(); var bs = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList(); var cs = Enumerable.Range(0, 100).Select(_ => rand.NextDouble() * 1.0e-3).ToList(); var ds = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList(); var es = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList(); var ys = new double[@as.Count]; for (int i = 0; i < ys.Length; i++) ys[i] = @as[i] + bs[i] + @as[i] * bs[i] * cs[i]; var dataset = new Dataset(new string[] { "a", "b", "c", "d", "e", "y" }, new[] { @as, bs, cs, ds, es, ys.ToList() }); var problemData = new RegressionProblemData(dataset, new string[] { "a", "b", "c", "d", "e" }, "y"); TestMctsWithoutConstants(problemData, nVarRefs: 10, iterations: 10000, allowExp: false, allowLog: false, allowInv: false); } #endregion #region restricted structure but including numeric constants [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegKeijzer7() { // ln(x) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 7 f("))); // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; TestMcts(regProblem, allowExp: false, allowLog: true, allowInv: false); } /* // [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkNguyen5() { // sin(x²)cos(x) - 1 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F5 "))); TestMcts(regProblem); } // [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkNguyen6() { // sin(x) + sin(x + x²) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F6 "))); TestMcts(regProblem); } */ [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkNguyen7() { // log(x + 1) + log(x² + 1) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F7 "))); TestMcts(regProblem, maxVariableReferences: 5, allowExp: false, allowLog: true, allowInv: false); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkNguyen8() { // Sqrt(x) // = x ^ 0.5 // = exp(0.5 * log(x)) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F8 "))); TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false); } /* // [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkNguyen9() { // sin(x) + sin(y²) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F9 "))); TestMcts(regProblem); } // [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkNguyen10() { // 2sin(x)cos(y) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F10 "))); TestMcts(regProblem); } */ [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkNguyen11() { // x ^ y , x > 0, y > 0 // = exp(y * log(x)) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F11 "))); TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkNguyen12() { // x^4 - x³ + y²/2 - y var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F12 "))); TestMcts(regProblem, maxVariableReferences: 20, allowExp: false, allowLog: false, allowInv: false); } #endregion #region keijzer [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "long")] public void MctsSymbRegBenchmarkKeijzer5() { // (30 * x * z) / ((x - 10) * y²) // = 30 x z / (xy² - y²) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 5 f("))); // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; TestMcts(regProblem, maxVariableReferences: 10, allowExp: false, allowLog: false, allowInv: true); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkKeijzer6() { // Keijzer 6 f(x) = Sum(1 / i) From 1 to X , x \in [0..120] // we can only approximate this var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 6 f("))); // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; TestMcts(regProblem, maxVariableReferences: 20, allowExp: false, allowLog: false, allowInv: true); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkKeijzer8() { // sqrt(x) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 8 f("))); // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkKeijzer9() { // arcsinh(x) i.e. ln(x + sqrt(x² + 1)) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 9 f("))); // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; TestMcts(regProblem, maxVariableReferences: 5, allowExp: true, allowLog: true, allowInv: false); } /* [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkKeijzer11() { // xy + sin( (x-1) (y-1) ) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 11 f("))); // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; TestMcts(regProblem, successThreshold: 0.99); // cannot solve this yet } */ [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkKeijzer12() { // x^4 - x³ + y² / 2 - y, same as Nguyen 12 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 12 f("))); // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; TestMcts(regProblem, maxVariableReferences: 15, allowExp: false, allowLog: false, allowInv: false); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkKeijzer14() { // 8 / (2 + x² + y²) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 14 f("))); // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; TestMcts(regProblem, maxVariableReferences: 10, allowExp: false, allowLog: false, allowInv: true); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkKeijzer15() { // x³ / 5 + y³ / 2 - y - x var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(seed: 1234); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 15 f("))); // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; TestMcts(regProblem, maxVariableReferences: 10, allowExp: false, allowLog: false, allowInv: false); } #endregion private void TestMcts(IRegressionProblemData problemData, int iterations = 20000, double successThreshold = 0.99999, int maxVariableReferences = 5, bool allowExp = true, bool allowLog = true, bool allowInv = true, bool allowSum = true ) { var mctsSymbReg = new MctsSymbolicRegressionAlgorithm(); var regProblem = new RegressionProblem(); regProblem.ProblemDataParameter.Value = problemData; #region Algorithm Configuration mctsSymbReg.Problem = regProblem; mctsSymbReg.Iterations = iterations; mctsSymbReg.MaxVariableReferences = maxVariableReferences; mctsSymbReg.SetSeedRandomly = false; mctsSymbReg.Seed = 1234; mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("exp")), allowExp); mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("log")), allowLog); mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("1 /")), allowInv); mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("t1(x) + t2(x) + ... ")), allowSum); mctsSymbReg.ScaleVariables = true; mctsSymbReg.ConstantOptimizationIterations = 0; #endregion RunAlgorithm(mctsSymbReg); Console.WriteLine(mctsSymbReg.ExecutionTime); var eps = 1.0 - successThreshold; Assert.AreEqual(1.0, ((DoubleValue)mctsSymbReg.Results["Best solution quality (train)"].Value).Value, eps); Assert.AreEqual(1.0, ((DoubleValue)mctsSymbReg.Results["Best solution quality (test)"].Value).Value, eps); } private void TestMctsWithoutConstants(IRegressionProblemData problemData, int nVarRefs = 10, int iterations = 200000, double successThreshold = 0.99999, bool allowExp = true, bool allowLog = true, bool allowInv = true, bool allowSum = true ) { var mctsSymbReg = new MctsSymbolicRegressionAlgorithm(); var regProblem = new RegressionProblem(); regProblem.ProblemDataParameter.Value = problemData; #region Algorithm Configuration mctsSymbReg.Problem = regProblem; mctsSymbReg.Iterations = iterations; mctsSymbReg.MaxVariableReferences = nVarRefs; mctsSymbReg.SetSeedRandomly = false; mctsSymbReg.Seed = 1234; mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("exp")), allowExp); mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("log")), allowLog); mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("1 /")), allowInv); mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("t1(x) + t2(x) + ... ")), allowSum); // no constants mctsSymbReg.ScaleVariables = false; mctsSymbReg.ConstantOptimizationIterations = -1; #endregion RunAlgorithm(mctsSymbReg); Console.WriteLine(mctsSymbReg.ExecutionTime); var eps = 1.0 - successThreshold; Assert.AreEqual(1.0, ((DoubleValue)mctsSymbReg.Results["Best solution quality (train)"].Value).Value, eps); Assert.AreEqual(1.0, ((DoubleValue)mctsSymbReg.Results["Best solution quality (test)"].Value).Value, eps); } private void TestMctsNumberOfSolutions(IRegressionProblemData problemData, int maxNumberOfVariables, int expectedNumberOfSolutions, bool allowProd = true, bool allowExp = true, bool allowLog = true, bool allowInv = true, bool allowSum = true ) { var mctsSymbReg = new MctsSymbolicRegressionAlgorithm(); var regProblem = new RegressionProblem(); regProblem.ProblemDataParameter.Value = problemData; #region Algorithm Configuration mctsSymbReg.SetSeedRandomly = false; mctsSymbReg.Seed = 1234; mctsSymbReg.Problem = regProblem; mctsSymbReg.Iterations = int.MaxValue; // stopping when all solutions have been enumerated mctsSymbReg.MaxVariableReferences = maxNumberOfVariables; mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.StartsWith("x * y * ...")), allowProd); mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("exp(c * x * y ...)")), allowExp); mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("log(c + c1 x + c2 x + ...)")), allowLog); mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("1 / (1 + c1 x + c2 x + ...)")), allowInv); mctsSymbReg.AllowedFactors.SetItemCheckedState(mctsSymbReg.AllowedFactors.Single(s => s.Value.Contains("t1(x) + t2(x) + ... ")), allowSum); #endregion RunAlgorithm(mctsSymbReg); Console.WriteLine(mctsSymbReg.ExecutionTime); Assert.AreEqual(expectedNumberOfSolutions, ((IntValue)mctsSymbReg.Results["Effective rollouts"].Value).Value); } // same as in SamplesUtil private void RunAlgorithm(IAlgorithm a) { var trigger = new EventWaitHandle(false, EventResetMode.ManualReset); Exception ex = null; a.Stopped += (src, e) => { trigger.Set(); }; a.ExceptionOccurred += (src, e) => { ex = e.Value; trigger.Set(); }; a.Prepare(); a.Start(); trigger.WaitOne(); Assert.AreEqual(ex, null); } } }