using System; using System.Collections; using System.Collections.Generic; using System.Linq; using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration; using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Problems.Instances.DataAnalysis; using HeuristicLab.Random; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression { [TestClass] public class MctsSymbolicRegressionTest { private const int Seed = 1234; private IRandom rand; private const double SuccessThreshold = 0.9999999; private GrammarEnumerationAlgorithm alg; private RegressionProblem problem; [TestInitialize] public void InitTest() { rand = new FastRandom(Seed); alg = new GrammarEnumerationAlgorithm(); problem = new RegressionProblem(); alg.Problem = problem; alg.GuiUpdateInterval = int.MaxValue; foreach (IGrammarEnumerationAnalyzer grammarEnumerationAnalyzer in alg.Analyzers) { alg.Analyzers.SetItemCheckedState(grammarEnumerationAnalyzer, grammarEnumerationAnalyzer is RSquaredEvaluator); } alg.SearchDataStructure = StorageType.PriorityQueue; } [TestCleanup] public void Cleanup() { if (alg.BestTrainingSentence != null) { Console.WriteLine("Training: " + alg.Grammar.ToInfixString(alg.BestTrainingSentence)); } } private void EvaluateGrammarEnumeration() { // Evaluate results var eps = 1.0 - SuccessThreshold; // Check if algorithm terminated correctly Assert.IsTrue(alg.Results.ContainsKey("Best solution (Training)"), "No training solution returned!"); // Check resultss Assert.AreEqual(1.0, ((IRegressionSolution)alg.Results["Best solution (Training)"].Value).TestRSquared, eps, "Test quality too low!"); } [TestMethod] [TestProperty("Goal", "structure search")] public void NoConstants_Nguyen1() { // x³ + x² + x alg.OptimizeConstants = false; alg.MaxComplexity = 6; alg.Problem.ProblemData = new NguyenFunctionOne(Seed).GenerateRegressionData(); alg.Start(); TerminalSymbol constSymbol = alg.Grammar.Const; TerminalSymbol varSymbol = alg.Grammar.VarTerminals.First(); TerminalSymbol mulSymbol = alg.Grammar.Multiplication; TerminalSymbol addSymbol = alg.Grammar.Addition; SymbolString targetSolution = new SymbolString(new[] { constSymbol, varSymbol, varSymbol, varSymbol, mulSymbol, mulSymbol, mulSymbol, constSymbol, varSymbol, varSymbol, mulSymbol, mulSymbol, addSymbol, constSymbol, varSymbol, mulSymbol, addSymbol, constSymbol, addSymbol }); int targetSolutionHash = alg.Grammar.Hasher.CalcHashCode(targetSolution); int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence); Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one."); // Evaluate EvaluateGrammarEnumeration(); } // Too "large" target model for now... //[TestMethod] [TestProperty("Goal", "structure search")] public void NoConstants_Nguyen2() { // x^4 + x³ + x² + x alg.MaxComplexity = 11; alg.Problem.ProblemData = new NguyenFunctionTwo(Seed).GenerateRegressionData(); alg.Start(); EvaluateGrammarEnumeration(); } // Too "large" target model for now... //[TestMethod] [TestProperty("Goal", "structure search")] public void NoConstants_Nguyen3() { // x^5 + x^4 + x^3 + x^2 + x alg.MaxComplexity = 32; alg.Problem.ProblemData = new NguyenFunctionThree(Seed).GenerateRegressionData(); alg.Start(); EvaluateGrammarEnumeration(); } [TestMethod] [TestProperty("Goal", "structure search")] public void NoConstants_Nguyen6() { // sin(x) + sin(x + x²) alg.OptimizeConstants = false; alg.MaxComplexity = 4; alg.Problem.ProblemData = new NguyenFunctionSix(Seed).GenerateRegressionData(); alg.Start(); TerminalSymbol constSymbol = alg.Grammar.Const; TerminalSymbol varSymbol = alg.Grammar.VarTerminals.First(); TerminalSymbol mulSymbol = alg.Grammar.Multiplication; TerminalSymbol addSymbol = alg.Grammar.Addition; TerminalSymbol sinSymbol = alg.Grammar.Sin; // c * sin(c x + c) + c * sin(c * x * x + c * x) + c SymbolString targetSolution = new SymbolString(new[] { varSymbol, constSymbol, mulSymbol, constSymbol, addSymbol, sinSymbol, constSymbol, mulSymbol, varSymbol, varSymbol, mulSymbol, constSymbol, mulSymbol, varSymbol, constSymbol, mulSymbol, addSymbol, constSymbol, addSymbol, sinSymbol, constSymbol, mulSymbol, addSymbol, constSymbol, addSymbol }); int targetSolutionHash = alg.Grammar.Hasher.CalcHashCode(targetSolution); int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence); Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one."); EvaluateGrammarEnumeration(); } [TestMethod] [TestProperty("Goal", "structure search")] public void NoConstants_Nguyen9() { // sin(x) + sin(y²) alg.OptimizeConstants = false; alg.MaxComplexity = 3; alg.Problem.ProblemData = new NguyenFunctionNine(Seed).GenerateRegressionData(); alg.Start(); TerminalSymbol xSymbol = alg.Grammar.VarTerminals.First(v => v.StringRepresentation == "X"); TerminalSymbol ySymbol = alg.Grammar.VarTerminals.First(v => v.StringRepresentation == "Y"); TerminalSymbol constSymbol = alg.Grammar.Const; TerminalSymbol mulSymbol = alg.Grammar.Multiplication; TerminalSymbol addSymbol = alg.Grammar.Addition; TerminalSymbol sinSymbol = alg.Grammar.Sin; // c*sin(c*x + c) + c*sin(c*y*y + c) + c SymbolString targetSolution = new SymbolString(new[] { xSymbol, constSymbol, mulSymbol, constSymbol, addSymbol, sinSymbol, constSymbol, mulSymbol, ySymbol, ySymbol, mulSymbol, constSymbol, mulSymbol, constSymbol, addSymbol, sinSymbol, constSymbol, mulSymbol, addSymbol, constSymbol, addSymbol }); int targetSolutionHash = alg.Grammar.Hasher.CalcHashCode(targetSolution); int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence); Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one."); EvaluateGrammarEnumeration(); } // Too much variables for now... //[TestMethod] [TestProperty("Goal", "structure search")] public void MctsSymbReg_NoConstants_Poly10() { alg.MaxComplexity = 10; alg.Problem.ProblemData = new PolyTen(Seed).GenerateRegressionData(); alg.Start(); EvaluateGrammarEnumeration(); } [TestMethod] [TestProperty("Goal", "structure search")] public void NoConstants_Inverse() { // x / (log(x)*x + x) alg.OptimizeConstants = false; alg.MaxComplexity = 4; var x = Enumerable.Range(0, 100).Select(_ => rand.NextDouble() + 1.1).ToList(); var y = x.Select(xi => xi / (Math.Log(xi) * xi + xi)).ToList(); alg.Problem.ProblemData = new RegressionProblemData(new Dataset(new List() { "x", "y" }, new List() { x, y }), "x".ToEnumerable(), "y"); alg.Start(); EvaluateGrammarEnumeration(); } [TestMethod] [TestProperty("Goal", "structure search + const op")] public void Constants_Nguyen7() { // log(x+1) + log(x*x + 1) alg.MaxComplexity = 3; alg.Problem.ProblemData = new NguyenFunctionSeven().GenerateRegressionData(); alg.Start(); TerminalSymbol constSymbol = alg.Grammar.Const; TerminalSymbol varSymbol = alg.Grammar.VarTerminals.First(); TerminalSymbol mulSymbol = alg.Grammar.Multiplication; TerminalSymbol addSymbol = alg.Grammar.Addition; TerminalSymbol logSymbol = alg.Grammar.Log; SymbolString targetSolution = new SymbolString(new[] { varSymbol, constSymbol, mulSymbol, constSymbol, addSymbol, logSymbol, constSymbol, mulSymbol, varSymbol, varSymbol, mulSymbol, constSymbol, mulSymbol, constSymbol, addSymbol, logSymbol, constSymbol, mulSymbol, addSymbol, constSymbol, addSymbol }); int targetSolutionHash = alg.Grammar.Hasher.CalcHashCode(targetSolution); int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence); Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one."); // Evaluate EvaluateGrammarEnumeration(); } [TestMethod] [TestProperty("Goal", "structure search + const op")] public void Constants_Nguyen12() { // x*x*x*x - x*x*x + y*y/2 -y alg.MaxComplexity = 10; alg.Problem.ProblemData = new NguyenFunctionTwelve().GenerateRegressionData(); alg.Start(); // Evaluate EvaluateGrammarEnumeration(); } [TestMethod] [TestProperty("Goal", "sinnus const op")] public void Constants_Keijzer3() { // 0.3*x*sin(2*pi*x) alg.MaxComplexity = 2; alg.Problem.ProblemData = new KeijzerFunctionThree().GenerateRegressionData(); alg.Start(); TerminalSymbol constSymbol = alg.Grammar.Const; TerminalSymbol varSymbol = alg.Grammar.VarTerminals.First(); TerminalSymbol mulSymbol = alg.Grammar.Multiplication; TerminalSymbol addSymbol = alg.Grammar.Addition; SymbolString targetSolution = new SymbolString(new[] { constSymbol, varSymbol, mulSymbol, constSymbol, addSymbol, alg.Grammar.Sin, varSymbol, mulSymbol, constSymbol, mulSymbol, constSymbol, addSymbol }); int targetSolutionHash = alg.Grammar.Hasher.CalcHashCode(targetSolution); int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence); Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one."); // Evaluate EvaluateGrammarEnumeration(); } [TestMethod] [TestProperty("Goal", "structure search + const op")] public void Constants_Keijzer5() { // (30*x*z) / ((x - 10)*y*y) alg.MaxComplexity = 5; alg.Problem.ProblemData = new KeijzerFunctionFive().GenerateRegressionData(); alg.Start(); TerminalSymbol constSymbol = alg.Grammar.Const; TerminalSymbol xSymbol = alg.Grammar.VarTerminals.First(s => s.StringRepresentation == "X"); TerminalSymbol ySymbol = alg.Grammar.VarTerminals.First(s => s.StringRepresentation == "Y"); TerminalSymbol zSymbol = alg.Grammar.VarTerminals.First(s => s.StringRepresentation == "Z"); TerminalSymbol mulSymbol = alg.Grammar.Multiplication; TerminalSymbol addSymbol = alg.Grammar.Addition; TerminalSymbol invSymbol = alg.Grammar.Inv; // 30 * x * z * 1/(x*y*y - 10*y*y) // --> x z * c * x y * y * c * y y * c * + c + inv c + SymbolString targetSolution = new SymbolString(new[] { xSymbol, zSymbol, mulSymbol, constSymbol, mulSymbol, xSymbol, ySymbol, mulSymbol, ySymbol, mulSymbol, constSymbol, mulSymbol, ySymbol, ySymbol, mulSymbol, constSymbol, mulSymbol, addSymbol, constSymbol, addSymbol, invSymbol, constSymbol, addSymbol }); int targetSolutionHash = alg.Grammar.Hasher.CalcHashCode(targetSolution); int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence); Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one."); // Evaluate EvaluateGrammarEnumeration(); } [TestMethod] [TestProperty("Goal", "structure search + const op")] public void Constants_Keijzer12() { // x*x*x*x - x*x*x + y*y/2 - y alg.MaxComplexity = 10; alg.Problem.ProblemData = new KeijzerFunctionTwelve().GenerateRegressionData(); alg.Start(); TerminalSymbol constSymbol = alg.Grammar.Const; TerminalSymbol xSymbol = alg.Grammar.VarTerminals.First(); TerminalSymbol ySymbol = alg.Grammar.VarTerminals.Last(); TerminalSymbol mulSymbol = alg.Grammar.Multiplication; TerminalSymbol addSymbol = alg.Grammar.Addition; SymbolString targetSolution = new SymbolString(new[] { xSymbol, xSymbol, mulSymbol, xSymbol, mulSymbol, xSymbol, mulSymbol, constSymbol, mulSymbol, xSymbol, xSymbol, mulSymbol, xSymbol, mulSymbol, constSymbol, mulSymbol, addSymbol, ySymbol, ySymbol, mulSymbol, constSymbol, mulSymbol, addSymbol, ySymbol, constSymbol, mulSymbol, addSymbol, constSymbol, addSymbol }); var x = alg.Grammar.ToInfixString(targetSolution); int targetSolutionHash = alg.Grammar.Hasher.CalcHashCode(targetSolution); int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence); Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one."); // Evaluate EvaluateGrammarEnumeration(); } [TestMethod] [TestProperty("Goal", "structure search + const op")] public void Constants_Keijzer14() { // 8 / (2 + x*x + y*y alg.MaxComplexity = 4; alg.Problem.ProblemData = new KeijzerFunctionFourteen().GenerateRegressionData(); alg.Start(); TerminalSymbol constSymbol = alg.Grammar.Const; TerminalSymbol xSymbol = alg.Grammar.VarTerminals.First(); TerminalSymbol ySymbol = alg.Grammar.VarTerminals.Last(); TerminalSymbol mulSymbol = alg.Grammar.Multiplication; TerminalSymbol addSymbol = alg.Grammar.Addition; TerminalSymbol divSymbol = alg.Grammar.Inv; // x x mul c mul y y mul c mul add const add inv const mul const add SymbolString targetSolution = new SymbolString(new[] { xSymbol, xSymbol, mulSymbol, constSymbol, mulSymbol, ySymbol, ySymbol, mulSymbol, constSymbol, mulSymbol, addSymbol, constSymbol, addSymbol, divSymbol, constSymbol, mulSymbol, constSymbol, addSymbol }); int targetSolutionHash = alg.Grammar.Hasher.CalcHashCode(targetSolution); int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence); Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one."); // Evaluate EvaluateGrammarEnumeration(); } [TestMethod] [TestProperty("Goal", "structure search + const op")] public void Constants_Keijzer15() { // x*x*x / 5 + y*y*y / 2 - y - x alg.MaxComplexity = 8; alg.Problem.ProblemData = new KeijzerFunctionFifteen().GenerateRegressionData(); alg.Start(); TerminalSymbol constSymbol = alg.Grammar.Const; TerminalSymbol xSymbol = alg.Grammar.VarTerminals.First(); TerminalSymbol ySymbol = alg.Grammar.VarTerminals.Last(); TerminalSymbol mulSymbol = alg.Grammar.Multiplication; TerminalSymbol addSymbol = alg.Grammar.Addition; // x x * x * const * y y * y * const * + y const * + x const * const + SymbolString targetSolution = new SymbolString(new[] { xSymbol, xSymbol, mulSymbol, xSymbol, mulSymbol, constSymbol, mulSymbol, ySymbol, ySymbol, mulSymbol, ySymbol, mulSymbol, constSymbol, mulSymbol, addSymbol, ySymbol, constSymbol, mulSymbol, addSymbol, xSymbol, constSymbol, mulSymbol, addSymbol, constSymbol, addSymbol }); int targetSolutionHash = alg.Grammar.Hasher.CalcHashCode(targetSolution); int actualSolutionHash = alg.Grammar.Hasher.CalcHashCode(alg.BestTrainingSentence); Assert.IsTrue(alg.DistinctSentencesComplexity.ContainsKey(targetSolutionHash), "Actual solution was not generated!"); Assert.AreEqual(targetSolutionHash, actualSolutionHash, "Actual solution was not recognized as best one."); // Evaluate EvaluateGrammarEnumeration(); } [TestMethod] [TestProperty("Goal", "Poly-10 derivatives")] public void MctsSymbReg_NoConstants_Poly10_Part1() { alg.MaxComplexity = 12; alg.OptimizeConstants = false; var regProblem = new PolyTen(123).GenerateRegressionData(); // Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10 // Y' = X1*X2 + X3*X4 + X5*X6 // simplify problem by changing target var ds = ((Dataset)regProblem.Dataset).ToModifiable(); var ys = ds.GetDoubleValues("Y").ToArray(); var x1 = ds.GetDoubleValues("X1").ToArray(); var x2 = ds.GetDoubleValues("X2").ToArray(); var x3 = ds.GetDoubleValues("X3").ToArray(); var x4 = ds.GetDoubleValues("X4").ToArray(); var x5 = ds.GetDoubleValues("X5").ToArray(); var x6 = ds.GetDoubleValues("X6").ToArray(); var x7 = ds.GetDoubleValues("X7").ToArray(); var x8 = ds.GetDoubleValues("X8").ToArray(); var x9 = ds.GetDoubleValues("X9").ToArray(); var x10 = ds.GetDoubleValues("X10").ToArray(); for (int i = 0; i < ys.Length; i++) { //ys[i] -= x1[i] * x7[i] * x9[i]; //ys[i] -= x3[i] * x6[i] * x10[i]; } ds.ReplaceVariable("Y", ys.ToList()); alg.Problem.ProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable); alg.Start(); EvaluateGrammarEnumeration(); } #if false [TestMethod] [TestProperty("Goal", "structure search")] public void MctsSymbReg_NoConstants_15() { alg.MaxTreeSize = 5; var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("15"))); alg.Problem.ProblemData = regProblem; alg.Start(); EvaluateGrammarEnumeration(); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Nguyen7() { // log(x + 1) + log(x² + 1) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F7 "))); TestGrammarEnumeration(regProblem); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Poly10_Part1() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10"))); // Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10 // Y' = X1*X2 + X3*X4 + X5*X6 // simplify problem by changing target var ds = ((Dataset)regProblem.Dataset).ToModifiable(); var ys = ds.GetDoubleValues("Y").ToArray(); var x1 = ds.GetDoubleValues("X1").ToArray(); var x2 = ds.GetDoubleValues("X2").ToArray(); var x3 = ds.GetDoubleValues("X3").ToArray(); var x4 = ds.GetDoubleValues("X4").ToArray(); var x5 = ds.GetDoubleValues("X5").ToArray(); var x6 = ds.GetDoubleValues("X6").ToArray(); var x7 = ds.GetDoubleValues("X7").ToArray(); var x8 = ds.GetDoubleValues("X8").ToArray(); var x9 = ds.GetDoubleValues("X9").ToArray(); var x10 = ds.GetDoubleValues("X10").ToArray(); for (int i = 0; i < ys.Length; i++) { ys[i] -= x1[i] * x7[i] * x9[i]; ys[i] -= x3[i] * x6[i] * x10[i]; } ds.ReplaceVariable("Y", ys.ToList()); var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable); TestGrammarEnumeration(modifiedProblemData); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Poly10_Part2() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10"))); // Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10 // Y' = X1*X7*X9 + X3*X6*X10 // simplify problem by changing target var ds = ((Dataset)regProblem.Dataset).ToModifiable(); var ys = ds.GetDoubleValues("Y").ToArray(); var x1 = ds.GetDoubleValues("X1").ToArray(); var x2 = ds.GetDoubleValues("X2").ToArray(); var x3 = ds.GetDoubleValues("X3").ToArray(); var x4 = ds.GetDoubleValues("X4").ToArray(); var x5 = ds.GetDoubleValues("X5").ToArray(); var x6 = ds.GetDoubleValues("X6").ToArray(); var x7 = ds.GetDoubleValues("X7").ToArray(); var x8 = ds.GetDoubleValues("X8").ToArray(); var x9 = ds.GetDoubleValues("X9").ToArray(); var x10 = ds.GetDoubleValues("X10").ToArray(); for (int i = 0; i < ys.Length; i++) { ys[i] -= x1[i] * x2[i]; ys[i] -= x3[i] * x4[i]; ys[i] -= x5[i] * x6[i]; } ds.ReplaceVariable("Y", ys.ToList()); var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable); TestGrammarEnumeration(modifiedProblemData); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Poly10_Part3() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10"))); // Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10 // Y' = X1*X2 + X1*X7*X9 // simplify problem by changing target var ds = ((Dataset)regProblem.Dataset).ToModifiable(); var ys = ds.GetDoubleValues("Y").ToArray(); var x1 = ds.GetDoubleValues("X1").ToArray(); var x2 = ds.GetDoubleValues("X2").ToArray(); var x3 = ds.GetDoubleValues("X3").ToArray(); var x4 = ds.GetDoubleValues("X4").ToArray(); var x5 = ds.GetDoubleValues("X5").ToArray(); var x6 = ds.GetDoubleValues("X6").ToArray(); var x7 = ds.GetDoubleValues("X7").ToArray(); var x8 = ds.GetDoubleValues("X8").ToArray(); var x9 = ds.GetDoubleValues("X9").ToArray(); var x10 = ds.GetDoubleValues("X10").ToArray(); for (int i = 0; i < ys.Length; i++) { ys[i] -= x3[i] * x4[i]; ys[i] -= x5[i] * x6[i]; ys[i] -= x3[i] * x6[i] * x10[i]; } ds.ReplaceVariable("Y", ys.ToList()); var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable); TestGrammarEnumeration(modifiedProblemData); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Poly10_Part4() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10"))); // Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10 // Y' = X3*X4 + X5*X6 + X3*X6*X10 // simplify problem by changing target var ds = ((Dataset)regProblem.Dataset).ToModifiable(); var ys = ds.GetDoubleValues("Y").ToArray(); var x1 = ds.GetDoubleValues("X1").ToArray(); var x2 = ds.GetDoubleValues("X2").ToArray(); var x3 = ds.GetDoubleValues("X3").ToArray(); var x4 = ds.GetDoubleValues("X4").ToArray(); var x5 = ds.GetDoubleValues("X5").ToArray(); var x6 = ds.GetDoubleValues("X6").ToArray(); var x7 = ds.GetDoubleValues("X7").ToArray(); var x8 = ds.GetDoubleValues("X8").ToArray(); var x9 = ds.GetDoubleValues("X9").ToArray(); var x10 = ds.GetDoubleValues("X10").ToArray(); for (int i = 0; i < ys.Length; i++) { ys[i] -= x1[i] * x2[i]; ys[i] -= x1[i] * x7[i] * x9[i]; } ds.ReplaceVariable("Y", ys.ToList()); var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable); TestGrammarEnumeration(modifiedProblemData); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Poly10_Part5() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10"))); // Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10 // Y' = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 // simplify problem by changing target var ds = ((Dataset)regProblem.Dataset).ToModifiable(); var ys = ds.GetDoubleValues("Y").ToArray(); var x1 = ds.GetDoubleValues("X1").ToArray(); var x2 = ds.GetDoubleValues("X2").ToArray(); var x3 = ds.GetDoubleValues("X3").ToArray(); var x4 = ds.GetDoubleValues("X4").ToArray(); var x5 = ds.GetDoubleValues("X5").ToArray(); var x6 = ds.GetDoubleValues("X6").ToArray(); var x7 = ds.GetDoubleValues("X7").ToArray(); var x8 = ds.GetDoubleValues("X8").ToArray(); var x9 = ds.GetDoubleValues("X9").ToArray(); var x10 = ds.GetDoubleValues("X10").ToArray(); for (int i = 0; i < ys.Length; i++) { ys[i] -= x3[i] * x6[i] * x10[i]; } ds.ReplaceVariable("Y", ys.ToList()); var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable); TestGrammarEnumeration(modifiedProblemData); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Poly10_Part6() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10"))); // Y = X1*X2 + X3*X4 + X5*X6 + X1*X7*X9 + X3*X6*X10 // Y' = X1*X2 + X3*X4 + X5*X6 + X3*X6*X10 // simplify problem by changing target var ds = ((Dataset)regProblem.Dataset).ToModifiable(); var ys = ds.GetDoubleValues("Y").ToArray(); var x1 = ds.GetDoubleValues("X1").ToArray(); var x2 = ds.GetDoubleValues("X2").ToArray(); var x3 = ds.GetDoubleValues("X3").ToArray(); var x4 = ds.GetDoubleValues("X4").ToArray(); var x5 = ds.GetDoubleValues("X5").ToArray(); var x6 = ds.GetDoubleValues("X6").ToArray(); var x7 = ds.GetDoubleValues("X7").ToArray(); var x8 = ds.GetDoubleValues("X8").ToArray(); var x9 = ds.GetDoubleValues("X9").ToArray(); var x10 = ds.GetDoubleValues("X10").ToArray(); for (int i = 0; i < ys.Length; i++) { ys[i] -= x1[i] * x7[i] * x9[i]; } ds.ReplaceVariable("Y", ys.ToList()); var modifiedProblemData = new RegressionProblemData(ds, regProblem.AllowedInputVariables, regProblem.TargetVariable); TestGrammarEnumeration(modifiedProblemData); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "long")] public void MctsSymbReg_NoConstants_Poly10_250rows() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.VariousInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Poly-10"))); regProblem.TrainingPartition.Start = 0; regProblem.TrainingPartition.End = regProblem.Dataset.Rows; regProblem.TestPartition.Start = 0; regProblem.TestPartition.End = 2; TestGrammarEnumeration(regProblem); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "long")] public void MctsSymbReg_NoConstants_Poly10_10000rows() { // as poly-10 but more rows var x1 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var x2 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var x3 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var x4 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var x5 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var x6 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var x7 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var x8 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var x9 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var x10 = Enumerable.Range(0, 10000).Select(_ => rand.NextDouble()).ToList(); var ys = new List(); for (int i = 0; i < x1.Count; i++) { ys.Add(x1[i] * x2[i] + x3[i] * x4[i] + x5[i] * x6[i] + x1[i] * x7[i] * x9[i] + x3[i] * x6[i] * x10[i]); } var ds = new Dataset(new string[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "y" }, new[] { x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, ys }); var problemData = new RegressionProblemData(ds, new string[] { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j" }, "y"); problemData.TrainingPartition.Start = 0; problemData.TrainingPartition.End = problemData.Dataset.Rows; problemData.TestPartition.Start = 0; problemData.TestPartition.End = 2; // must not be empty TestGrammarEnumeration(problemData); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_TwoVars() { // y = x1 + x2 + x1*x2 + x1*x2*x2 + x1*x1*x2 var x1 = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList(); var x2 = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList(); var ys = x1.Zip(x2, (x1i, x2i) => x1i + x2i + x1i * x2i + x1i * x2i * x2i + x1i * x1i * x2i).ToList(); var ds = new Dataset(new string[] { "a", "b", "y" }, new[] { x1, x2, ys }); var problemData = new RegressionProblemData(ds, new string[] { "a", "b" }, "y"); TestGrammarEnumeration(problemData); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbReg_NoConstants_Misleading() { // y = a + baaaaa (the effect of the second term should be very small) // the alg will quickly find that a has big effect and will search below a // since we prevent a + a... the algorithm must find the correct expression via a + b... // however b has a small effect so the branch might not be identified as relevant var @as = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList(); var bs = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList(); var cs = Enumerable.Range(0, 100).Select(_ => rand.NextDouble() * 1.0e-3).ToList(); var ds = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList(); var es = Enumerable.Range(0, 100).Select(_ => rand.NextDouble()).ToList(); var ys = new double[@as.Count]; for (int i = 0; i < ys.Length; i++) ys[i] = @as[i] + bs[i] + @as[i] * bs[i] * cs[i]; var dataset = new Dataset(new string[] { "a", "b", "c", "d", "e", "y" }, new[] { @as, bs, cs, ds, es, ys.ToList() }); var problemData = new RegressionProblemData(dataset, new string[] { "a", "b", "c", "d", "e" }, "y"); TestGrammarEnumeration(problemData); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegKeijzer7() { // ln(x) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 7 f("))); // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; TestGrammarEnumeration(regProblem); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkNguyen5() { // sin(x²)cos(x) - 1 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F5 "))); TestGrammarEnumeration(regProblem); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkNguyen6() { // sin(x) + sin(x + x²) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F6 "))); TestGrammarEnumeration(regProblem); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkNguyen7() { // log(x + 1) + log(x² + 1) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F7 "))); TestGrammarEnumeration(regProblem); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkNguyen8() { // Sqrt(x) // = x ^ 0.5 // = exp(0.5 * log(x)) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F8 "))); TestGrammarEnumeration(regProblem); } // [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkNguyen9() { // sin(x) + sin(y²) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F9 "))); TestGrammarEnumeration(regProblem); } // [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkNguyen10() { // 2sin(x)cos(y) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F10 "))); TestGrammarEnumeration(regProblem); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkNguyen11() { // x ^ y , x > 0, y > 0 // = exp(y * log(x)) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F11 "))); TestGrammarEnumeration(regProblem); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkNguyen12() { // x^4 - x³ + y²/2 - y var provider = new HeuristicLab.Problems.Instances.DataAnalysis.NguyenInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("F12 "))); TestGrammarEnumeration(regProblem); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "long")] public void MctsSymbRegBenchmarkKeijzer5() { // (30 * x * z) / ((x - 10) * y²) // = 30 x z / (xy² - y²) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 5 f("))); // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; TestGrammarEnumeration(regProblem); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkKeijzer6() { // Keijzer 6 f(x) = Sum(1 / i) From 1 to X , x \in [0..120] // we can only approximate this var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 6 f("))); // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; TestGrammarEnumeration(regProblem); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkKeijzer8() { // sqrt(x) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 8 f("))); // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; TestGrammarEnumeration(regProblem); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkKeijzer9() { // arcsinh(x) i.e. ln(x + sqrt(x² + 1)) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 9 f("))); // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; TestGrammarEnumeration(regProblem); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkKeijzer11() { // xy + sin( (x-1) (y-1) ) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 11 f("))); // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; TestGrammarEnumeration(regProblem); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkKeijzer12() { // x^4 - x³ + y² / 2 - y, same as Nguyen 12 var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 12 f("))); // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; TestGrammarEnumeration(regProblem); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkKeijzer14() { // 8 / (2 + x² + y²) var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 14 f("))); // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; TestGrammarEnumeration(regProblem); } [TestMethod] [TestCategory("Algorithms.DataAnalysis")] [TestProperty("Time", "short")] public void MctsSymbRegBenchmarkKeijzer15() { // x³ / 5 + y³ / 2 - y - x var provider = new HeuristicLab.Problems.Instances.DataAnalysis.KeijzerInstanceProvider(Seed); var regProblem = provider.LoadData(provider.GetDataDescriptors().Single(x => x.Name.Contains("Keijzer 15 f("))); // some Keijzer problem instances have very large test partitions (here we are not concerened about test performance) if (regProblem.TestPartition.End - regProblem.TestPartition.Start > 1000) regProblem.TestPartition.End = regProblem.TestPartition.Start + 1000; TestGrammarEnumeration(regProblem); } #endif } }