using System; using System.Collections.Generic; using System.Diagnostics; using System.Drawing; using System.Globalization; using System.Linq; using System.Text; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Views; using HeuristicLab.Random; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Tests { [TestClass] public class BottomUpSimilarityCalculatorTest { private readonly BottomUpSimilarityCalculator busCalculator; private readonly SymbolicExpressionImporter importer; private const int N = 100; private const int Rows = 1; private const int Columns = 10; public BottomUpSimilarityCalculatorTest() { busCalculator = new BottomUpSimilarityCalculator { MatchConstantValues = true, MatchVariableWeights = true }; importer = new SymbolicExpressionImporter(); } [TestMethod] [TestCategory("Problems.DataAnalysis.Symbolic")] [TestProperty("Time", "short")] public void TestBottomUpMatching() { TestMatchedNodes("(+ 1 2)", "(+ 2 1)", 5); TestMatchedNodes("(- 2 1)", "(- 1 2)", 2); TestMatchedNodes("(* (variable 1 X1) (variable 1 X2))", "(* (+ (variable 1 X1) 1) (+ (variable 1 X2) 1))", 2); TestMatchedNodes("(* (variable 1 X1) (variable 1 X2))", "(* (+ (variable 1 X1) 1) (variable 1 X2))", 2); TestMatchedNodes("(+ (variable 1 a) (variable 1 b))", "(+ (variable 1 a) (variable 1 a))", 1); TestMatchedNodes("(+ (+ (variable 1 a) (variable 1 b)) (variable 1 b))", "(+ (* (+ (variable 1 a) (variable 1 b)) (variable 1 b)) (+ (+ (variable 1 a) (variable 1 b)) (variable 1 b)))", 5); TestMatchedNodes( "(* (+ 2.84 (exp (+ (log (/ (variable 2.0539 X5) (variable -9.2452e-1 X6))) (/ (variable 2.0539 X5) (variable -9.2452e-1 X6))))) 2.9081)", "(* (- (variable 9.581e-1 X6) (+ (- (variable 5.1491e-1 X5) 1.614e+1) (+ (/ (variable 2.0539 X5) (variable -9.2452e-1 X6)) (log (/ (variable 2.0539 X5) (variable -9.2452e-1 X6)))))) 2.9081)", 9); TestMatchedNodes("(+ (exp 2.1033) (/ -4.3072 (variable 2.4691 X7)))", "(/ 1 (+ (/ -4.3072 (variable 2.4691 X7)) (exp 2.1033)))", 6); TestMatchedNodes("(+ (exp 2.1033) (/ -4.3072 (variable 2.4691 X7)))", "(/ 1 (+ (/ (variable 2.4691 X7) -4.3072) (exp 2.1033)))", 4); const string expr1 = "(* (- 1.2175e+1 (+ (/ (exp -1.4134e+1) (exp 9.2013)) (exp (log (exp (/ (exp (- (* -4.2461 (variable 2.2634 X5)) (- -9.6267e-1 3.3243))) (- (/ (/ (variable 1.0883 X1) (variable 6.9620e-1 X2)) (log 1.3011e+1)) (variable -4.3098e-1 X7)))))))) (log 1.3011e+1))"; const string expr2 = "(* (- 1.2175e+1 (+ (/ (/ (+ (variable 3.0140 X9) (variable 1.3430 X8)) -1.0864e+1) (exp 9.2013)) (exp (log (exp (/ (exp (- (* -4.2461 (variable 2.2634 X5)) (- -9.6267e-1 3.3243))) (- (/ (/ (variable 1.0883 X1) (variable 6.9620e-1 X2)) (log 1.3011e+1)) (variable -4.3098e-1 X7)))))))) (exp (variable 4.0899e-1 X7)))"; TestMatchedNodes(expr1, expr2, 23); } [TestMethod] [TestCategory("Problems.DataAnalysis.Symbolic")] [TestProperty("Time", "short")] private void TestMatchedNodes(string expr1, string expr2, int expected) { var t1 = importer.Import(expr1); var t2 = importer.Import(expr2); var mapping = busCalculator.ComputeBottomUpMapping(t1.Root, t2.Root); var c = mapping.Count; if (c != expected) { throw new Exception("Match count " + c + " is different than expected value " + expected); } } [TestMethod] [TestCategory("Problems.DataAnalysis.Symbolic")] [TestProperty("Time", "long")] public void TestBottomUpSimilarityCalculatorPerformance() { var grammar = new TypeCoherentExpressionGrammar(); grammar.ConfigureAsDefaultRegressionGrammar(); var twister = new MersenneTwister(31415); var ds = Util.CreateRandomDataset(twister, Rows, Columns); var trees = Util.CreateRandomTrees(twister, ds, grammar, N, 1, 100, 0, 0); double s = 0; var sw = new Stopwatch(); sw.Start(); for (int i = 0; i < trees.Length - 1; ++i) { for (int j = i + 1; j < trees.Length; ++j) { s += busCalculator.CalculateSimilarity(trees[i], trees[j]); } } sw.Stop(); Console.WriteLine("Elapsed time: " + sw.ElapsedMilliseconds / 1000.0 + ", Avg. similarity: " + s / (N * (N - 1) / 2)); Console.WriteLine(N * (N + 1) / (2 * sw.ElapsedMilliseconds / 1000.0) + " similarity calculations per second."); } private static string FormatMapping(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, Dictionary map) { var symbolNameMap = new Dictionary { {"ProgramRootSymbol", "Prog"}, {"StartSymbol","RPB"}, {"Multiplication", "$\\times$"}, {"Division", "$\\div$"}, {"Addition", "$+$"}, {"Subtraction", "$-$"}, {"Exponential", "$\\exp$"}, {"Logarithm", "$\\log$"} }; var sb = new StringBuilder(); var nodeIds = new Dictionary(); int offset = 0; var layoutEngine = new ReingoldTilfordLayoutEngine(x => x.Subtrees); var nodeCoordinates = layoutEngine.CalculateLayout(t1.Root).ToDictionary(n => n.Content, n => new PointF(n.X, n.Y)); double ws = 0.5; double hs = 0.5; var nl = Environment.NewLine; sb.Append("\\documentclass[class=minimal,border=0pt]{standalone}" + nl + "\\usepackage{tikz}" + nl + "\\begin{document}" + nl + "\\begin{tikzpicture}" + nl + "\\def\\ws{1}" + nl + "\\def\\hs{0.7}" + nl + "\\def\\offs{" + offset + "}" + nl); foreach (var node in t1.IterateNodesBreadth()) { var id = Guid.NewGuid().ToString(); nodeIds[node] = id; var coord = nodeCoordinates[node]; var nodeName = symbolNameMap.ContainsKey(node.Symbol.Name) ? symbolNameMap[node.Symbol.Name] : node.ToString(); sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\node ({0}) at (\\ws*{1} + \\offs,\\hs*{2}) {{{3}}};", nodeIds[node], ws * coord.X, -hs * coord.Y, EscapeLatexString(nodeName))); } foreach (ISymbolicExpressionTreeNode t in t1.IterateNodesBreadth()) { var n = t; foreach (var s in t.Subtrees) { sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\draw ({0}) -- ({1});", nodeIds[n], nodeIds[s])); } } nodeCoordinates = layoutEngine.CalculateLayout(t2.Root).ToDictionary(n => n.Content, n => new PointF(n.X, n.Y)); offset = 20; sb.Append("\\def\\offs{" + offset + "}" + nl); foreach (var node in t2.IterateNodesBreadth()) { var id = Guid.NewGuid().ToString(); nodeIds[node] = id; var coord = nodeCoordinates[node]; var nodeName = symbolNameMap.ContainsKey(node.Symbol.Name) ? symbolNameMap[node.Symbol.Name] : node.ToString(); sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\node ({0}) at (\\ws*{1} + \\offs,\\hs*{2}) {{{3}}};", nodeIds[node], ws * coord.X, -hs * coord.Y, EscapeLatexString(nodeName))); } foreach (ISymbolicExpressionTreeNode t in t2.IterateNodesBreadth()) { var n = t; foreach (var s in t.Subtrees) { sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\draw ({0}) -- ({1});", nodeIds[n], nodeIds[s])); } } foreach (var p in map) { var id1 = nodeIds[p.Key]; var id2 = nodeIds[p.Value]; sb.Append(string.Format(CultureInfo.InvariantCulture, "\\path[draw,->,color=gray] ({0}) edge[bend left,dashed] ({1});" + Environment.NewLine, id1, id2)); } sb.Append("\\end{tikzpicture}" + nl + "\\end{document}" + nl); return sb.ToString(); } private static string EscapeLatexString(string s) { return s.Replace("\\", "\\\\").Replace("{", "\\{").Replace("}", "\\}").Replace("_", "\\_"); } } }