[11219] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
| 3 | using System.Diagnostics;
|
---|
| 4 | using System.Drawing;
|
---|
| 5 | using System.Globalization;
|
---|
| 6 | using System.Linq;
|
---|
| 7 | using System.Text;
|
---|
| 8 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
| 9 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Views;
|
---|
| 10 | using HeuristicLab.Random;
|
---|
| 11 | using Microsoft.VisualStudio.TestTools.UnitTesting;
|
---|
| 12 |
|
---|
| 13 | namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Tests {
|
---|
| 14 | [TestClass]
|
---|
| 15 | public class BottomUpSimilarityCalculatorTest {
|
---|
[11486] | 16 | private readonly BottomUpSimilarityCalculator busCalculator;
|
---|
[11219] | 17 | private readonly SymbolicExpressionImporter importer;
|
---|
| 18 |
|
---|
| 19 | private const int N = 100;
|
---|
| 20 | private const int Rows = 1;
|
---|
| 21 | private const int Columns = 10;
|
---|
| 22 |
|
---|
| 23 | public BottomUpSimilarityCalculatorTest() {
|
---|
[11486] | 24 | busCalculator = new BottomUpSimilarityCalculator { MatchConstantValues = true, MatchVariableWeights = true };
|
---|
[11219] | 25 | importer = new SymbolicExpressionImporter();
|
---|
| 26 | }
|
---|
| 27 |
|
---|
| 28 | [TestMethod]
|
---|
| 29 | [TestCategory("Problems.DataAnalysis.Symbolic")]
|
---|
| 30 | [TestProperty("Time", "short")]
|
---|
| 31 | public void TestBottomUpMatching() {
|
---|
| 32 | TestMatchedNodes("(+ 1 2)", "(+ 2 1)", 5);
|
---|
| 33 | TestMatchedNodes("(- 2 1)", "(- 1 2)", 2);
|
---|
[11221] | 34 | TestMatchedNodes("(* (variable 1 X1) (variable 1 X2))", "(* (+ (variable 1 X1) 1) (+ (variable 1 X2) 1))", 2);
|
---|
[11219] | 35 |
|
---|
[11220] | 36 | TestMatchedNodes("(* (variable 1 X1) (variable 1 X2))", "(* (+ (variable 1 X1) 1) (variable 1 X2))", 2);
|
---|
| 37 |
|
---|
| 38 | TestMatchedNodes("(+ (variable 1 a) (variable 1 b))", "(+ (variable 1 a) (variable 1 a))", 1);
|
---|
| 39 | TestMatchedNodes("(+ (+ (variable 1 a) (variable 1 b)) (variable 1 b))", "(+ (* (+ (variable 1 a) (variable 1 b)) (variable 1 b)) (+ (+ (variable 1 a) (variable 1 b)) (variable 1 b)))", 5);
|
---|
| 40 |
|
---|
[11219] | 41 | TestMatchedNodes(
|
---|
| 42 | "(* (+ 2.84 (exp (+ (log (/ (variable 2.0539 X5) (variable -9.2452e-1 X6))) (/ (variable 2.0539 X5) (variable -9.2452e-1 X6))))) 2.9081)",
|
---|
| 43 | "(* (- (variable 9.581e-1 X6) (+ (- (variable 5.1491e-1 X5) 1.614e+1) (+ (/ (variable 2.0539 X5) (variable -9.2452e-1 X6)) (log (/ (variable 2.0539 X5) (variable -9.2452e-1 X6)))))) 2.9081)",
|
---|
| 44 | 9);
|
---|
| 45 |
|
---|
| 46 | TestMatchedNodes("(+ (exp 2.1033) (/ -4.3072 (variable 2.4691 X7)))", "(/ 1 (+ (/ -4.3072 (variable 2.4691 X7)) (exp 2.1033)))", 6);
|
---|
| 47 | TestMatchedNodes("(+ (exp 2.1033) (/ -4.3072 (variable 2.4691 X7)))", "(/ 1 (+ (/ (variable 2.4691 X7) -4.3072) (exp 2.1033)))", 4);
|
---|
[11225] | 48 |
|
---|
[11229] | 49 | const string expr1 = "(* (- 1.2175e+1 (+ (/ (exp -1.4134e+1) (exp 9.2013)) (exp (log (exp (/ (exp (- (* -4.2461 (variable 2.2634 X5)) (- -9.6267e-1 3.3243))) (- (/ (/ (variable 1.0883 X1) (variable 6.9620e-1 X2)) (log 1.3011e+1)) (variable -4.3098e-1 X7)))))))) (log 1.3011e+1))";
|
---|
| 50 | const string expr2 = "(* (- 1.2175e+1 (+ (/ (/ (+ (variable 3.0140 X9) (variable 1.3430 X8)) -1.0864e+1) (exp 9.2013)) (exp (log (exp (/ (exp (- (* -4.2461 (variable 2.2634 X5)) (- -9.6267e-1 3.3243))) (- (/ (/ (variable 1.0883 X1) (variable 6.9620e-1 X2)) (log 1.3011e+1)) (variable -4.3098e-1 X7)))))))) (exp (variable 4.0899e-1 X7)))";
|
---|
[11225] | 51 |
|
---|
| 52 | TestMatchedNodes(expr1, expr2, 23);
|
---|
| 53 |
|
---|
[11219] | 54 | }
|
---|
| 55 |
|
---|
[11244] | 56 | [TestMethod]
|
---|
| 57 | [TestCategory("Problems.DataAnalysis.Symbolic")]
|
---|
| 58 | [TestProperty("Time", "short")]
|
---|
[11219] | 59 | private void TestMatchedNodes(string expr1, string expr2, int expected) {
|
---|
| 60 | var t1 = importer.Import(expr1);
|
---|
| 61 | var t2 = importer.Import(expr2);
|
---|
| 62 |
|
---|
[11221] | 63 | var mapping = busCalculator.ComputeBottomUpMapping(t1.Root, t2.Root);
|
---|
[11219] | 64 | var c = mapping.Count;
|
---|
| 65 |
|
---|
| 66 | if (c != expected) {
|
---|
| 67 | throw new Exception("Match count " + c + " is different than expected value " + expected);
|
---|
| 68 | }
|
---|
| 69 | }
|
---|
| 70 |
|
---|
| 71 | [TestMethod]
|
---|
| 72 | [TestCategory("Problems.DataAnalysis.Symbolic")]
|
---|
| 73 | [TestProperty("Time", "long")]
|
---|
| 74 | public void TestBottomUpSimilarityCalculatorPerformance() {
|
---|
| 75 | var grammar = new TypeCoherentExpressionGrammar();
|
---|
| 76 | grammar.ConfigureAsDefaultRegressionGrammar();
|
---|
| 77 | var twister = new MersenneTwister(31415);
|
---|
| 78 | var ds = Util.CreateRandomDataset(twister, Rows, Columns);
|
---|
| 79 | var trees = Util.CreateRandomTrees(twister, ds, grammar, N, 1, 100, 0, 0);
|
---|
| 80 |
|
---|
| 81 | double s = 0;
|
---|
| 82 | var sw = new Stopwatch();
|
---|
| 83 |
|
---|
| 84 | sw.Start();
|
---|
| 85 | for (int i = 0; i < trees.Length - 1; ++i) {
|
---|
| 86 | for (int j = i + 1; j < trees.Length; ++j) {
|
---|
[11486] | 87 | s += busCalculator.CalculateSimilarity(trees[i], trees[j]);
|
---|
[11219] | 88 | }
|
---|
| 89 | }
|
---|
[11239] | 90 |
|
---|
[11219] | 91 | sw.Stop();
|
---|
[11239] | 92 | Console.WriteLine("Elapsed time: " + sw.ElapsedMilliseconds / 1000.0 + ", Avg. similarity: " + s / (N * (N - 1) / 2));
|
---|
[11219] | 93 | Console.WriteLine(N * (N + 1) / (2 * sw.ElapsedMilliseconds / 1000.0) + " similarity calculations per second.");
|
---|
| 94 | }
|
---|
| 95 |
|
---|
| 96 | private static string FormatMapping(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> map) {
|
---|
| 97 | var symbolNameMap = new Dictionary<string, string>
|
---|
| 98 | {
|
---|
| 99 | {"ProgramRootSymbol", "Prog"},
|
---|
| 100 | {"StartSymbol","RPB"},
|
---|
| 101 | {"Multiplication", "$\\times$"},
|
---|
| 102 | {"Division", "$\\div$"},
|
---|
| 103 | {"Addition", "$+$"},
|
---|
| 104 | {"Subtraction", "$-$"},
|
---|
| 105 | {"Exponential", "$\\exp$"},
|
---|
| 106 | {"Logarithm", "$\\log$"}
|
---|
| 107 | };
|
---|
| 108 |
|
---|
| 109 | var sb = new StringBuilder();
|
---|
| 110 | var nodeIds = new Dictionary<ISymbolicExpressionTreeNode, string>();
|
---|
| 111 | int offset = 0;
|
---|
| 112 | var layoutEngine = new ReingoldTilfordLayoutEngine<ISymbolicExpressionTreeNode>(x => x.Subtrees);
|
---|
| 113 | var nodeCoordinates = layoutEngine.CalculateLayout(t1.Root).ToDictionary(n => n.Content, n => new PointF(n.X, n.Y));
|
---|
| 114 |
|
---|
| 115 | double ws = 0.5;
|
---|
| 116 | double hs = 0.5;
|
---|
| 117 |
|
---|
| 118 | var nl = Environment.NewLine;
|
---|
| 119 | sb.Append("\\documentclass[class=minimal,border=0pt]{standalone}" + nl +
|
---|
| 120 | "\\usepackage{tikz}" + nl +
|
---|
| 121 | "\\begin{document}" + nl +
|
---|
| 122 | "\\begin{tikzpicture}" + nl +
|
---|
| 123 | "\\def\\ws{1}" + nl +
|
---|
| 124 | "\\def\\hs{0.7}" + nl +
|
---|
| 125 | "\\def\\offs{" + offset + "}" + nl);
|
---|
| 126 |
|
---|
| 127 | foreach (var node in t1.IterateNodesBreadth()) {
|
---|
| 128 | var id = Guid.NewGuid().ToString();
|
---|
| 129 | nodeIds[node] = id;
|
---|
| 130 | var coord = nodeCoordinates[node];
|
---|
| 131 | var nodeName = symbolNameMap.ContainsKey(node.Symbol.Name) ? symbolNameMap[node.Symbol.Name] : node.ToString();
|
---|
| 132 | sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\node ({0}) at (\\ws*{1} + \\offs,\\hs*{2}) {{{3}}};", nodeIds[node], ws * coord.X, -hs * coord.Y, EscapeLatexString(nodeName)));
|
---|
| 133 | }
|
---|
| 134 |
|
---|
| 135 | foreach (ISymbolicExpressionTreeNode t in t1.IterateNodesBreadth()) {
|
---|
| 136 | var n = t;
|
---|
| 137 | foreach (var s in t.Subtrees) {
|
---|
| 138 | sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\draw ({0}) -- ({1});", nodeIds[n], nodeIds[s]));
|
---|
| 139 | }
|
---|
| 140 | }
|
---|
| 141 |
|
---|
| 142 | nodeCoordinates = layoutEngine.CalculateLayout(t2.Root).ToDictionary(n => n.Content, n => new PointF(n.X, n.Y));
|
---|
| 143 |
|
---|
| 144 | offset = 20;
|
---|
| 145 | sb.Append("\\def\\offs{" + offset + "}" + nl);
|
---|
| 146 | foreach (var node in t2.IterateNodesBreadth()) {
|
---|
| 147 | var id = Guid.NewGuid().ToString();
|
---|
| 148 | nodeIds[node] = id;
|
---|
| 149 | var coord = nodeCoordinates[node];
|
---|
| 150 | var nodeName = symbolNameMap.ContainsKey(node.Symbol.Name) ? symbolNameMap[node.Symbol.Name] : node.ToString();
|
---|
| 151 | sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\node ({0}) at (\\ws*{1} + \\offs,\\hs*{2}) {{{3}}};", nodeIds[node], ws * coord.X, -hs * coord.Y, EscapeLatexString(nodeName)));
|
---|
| 152 | }
|
---|
| 153 |
|
---|
| 154 | foreach (ISymbolicExpressionTreeNode t in t2.IterateNodesBreadth()) {
|
---|
| 155 | var n = t;
|
---|
| 156 | foreach (var s in t.Subtrees) {
|
---|
| 157 | sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\draw ({0}) -- ({1});", nodeIds[n], nodeIds[s]));
|
---|
| 158 | }
|
---|
| 159 | }
|
---|
| 160 |
|
---|
| 161 | foreach (var p in map) {
|
---|
| 162 | var id1 = nodeIds[p.Key];
|
---|
| 163 | var id2 = nodeIds[p.Value];
|
---|
| 164 |
|
---|
| 165 | sb.Append(string.Format(CultureInfo.InvariantCulture, "\\path[draw,->,color=gray] ({0}) edge[bend left,dashed] ({1});" + Environment.NewLine, id1, id2));
|
---|
| 166 | }
|
---|
| 167 | sb.Append("\\end{tikzpicture}" + nl +
|
---|
| 168 | "\\end{document}" + nl);
|
---|
| 169 | return sb.ToString();
|
---|
| 170 | }
|
---|
| 171 |
|
---|
| 172 | private static string EscapeLatexString(string s) {
|
---|
| 173 | return s.Replace("\\", "\\\\").Replace("{", "\\{").Replace("}", "\\}").Replace("_", "\\_");
|
---|
| 174 | }
|
---|
| 175 | }
|
---|
| 176 | }
|
---|