1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Diagnostics;
|
---|
4 | using System.Drawing;
|
---|
5 | using System.Globalization;
|
---|
6 | using System.Linq;
|
---|
7 | using System.Text;
|
---|
8 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
9 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Views;
|
---|
10 | using HeuristicLab.Random;
|
---|
11 | using Microsoft.VisualStudio.TestTools.UnitTesting;
|
---|
12 |
|
---|
13 | namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Tests {
|
---|
14 | [TestClass]
|
---|
15 | public class BottomUpSimilarityCalculatorTest {
|
---|
16 | private readonly BottomUpSimilarityCalculator busCalculator;
|
---|
17 | private readonly SymbolicExpressionImporter importer;
|
---|
18 |
|
---|
19 | private const int N = 100;
|
---|
20 | private const int Rows = 1;
|
---|
21 | private const int Columns = 10;
|
---|
22 |
|
---|
23 | public BottomUpSimilarityCalculatorTest() {
|
---|
24 | busCalculator = new BottomUpSimilarityCalculator { MatchConstantValues = true, MatchVariableWeights = true };
|
---|
25 | importer = new SymbolicExpressionImporter();
|
---|
26 | }
|
---|
27 |
|
---|
28 | [TestMethod]
|
---|
29 | [TestCategory("Problems.DataAnalysis.Symbolic")]
|
---|
30 | [TestProperty("Time", "short")]
|
---|
31 | public void TestBottomUpMatching() {
|
---|
32 | TestMatchedNodes("(+ 1 2)", "(+ 2 1)", 5);
|
---|
33 | TestMatchedNodes("(- 2 1)", "(- 1 2)", 2);
|
---|
34 | TestMatchedNodes("(* (variable 1 X1) (variable 1 X2))", "(* (+ (variable 1 X1) 1) (+ (variable 1 X2) 1))", 2);
|
---|
35 |
|
---|
36 | TestMatchedNodes("(* (variable 1 X1) (variable 1 X2))", "(* (+ (variable 1 X1) 1) (variable 1 X2))", 2);
|
---|
37 |
|
---|
38 | TestMatchedNodes("(+ (variable 1 a) (variable 1 b))", "(+ (variable 1 a) (variable 1 a))", 1);
|
---|
39 | TestMatchedNodes("(+ (+ (variable 1 a) (variable 1 b)) (variable 1 b))", "(+ (* (+ (variable 1 a) (variable 1 b)) (variable 1 b)) (+ (+ (variable 1 a) (variable 1 b)) (variable 1 b)))", 5);
|
---|
40 |
|
---|
41 | TestMatchedNodes(
|
---|
42 | "(* (+ 2.84 (exp (+ (log (/ (variable 2.0539 X5) (variable -9.2452e-1 X6))) (/ (variable 2.0539 X5) (variable -9.2452e-1 X6))))) 2.9081)",
|
---|
43 | "(* (- (variable 9.581e-1 X6) (+ (- (variable 5.1491e-1 X5) 1.614e+1) (+ (/ (variable 2.0539 X5) (variable -9.2452e-1 X6)) (log (/ (variable 2.0539 X5) (variable -9.2452e-1 X6)))))) 2.9081)",
|
---|
44 | 9);
|
---|
45 |
|
---|
46 | TestMatchedNodes("(+ (exp 2.1033) (/ -4.3072 (variable 2.4691 X7)))", "(/ 1 (+ (/ -4.3072 (variable 2.4691 X7)) (exp 2.1033)))", 6);
|
---|
47 | TestMatchedNodes("(+ (exp 2.1033) (/ -4.3072 (variable 2.4691 X7)))", "(/ 1 (+ (/ (variable 2.4691 X7) -4.3072) (exp 2.1033)))", 4);
|
---|
48 |
|
---|
49 | const string expr1 = "(* (- 1.2175e+1 (+ (/ (exp -1.4134e+1) (exp 9.2013)) (exp (log (exp (/ (exp (- (* -4.2461 (variable 2.2634 X5)) (- -9.6267e-1 3.3243))) (- (/ (/ (variable 1.0883 X1) (variable 6.9620e-1 X2)) (log 1.3011e+1)) (variable -4.3098e-1 X7)))))))) (log 1.3011e+1))";
|
---|
50 | const string expr2 = "(* (- 1.2175e+1 (+ (/ (/ (+ (variable 3.0140 X9) (variable 1.3430 X8)) -1.0864e+1) (exp 9.2013)) (exp (log (exp (/ (exp (- (* -4.2461 (variable 2.2634 X5)) (- -9.6267e-1 3.3243))) (- (/ (/ (variable 1.0883 X1) (variable 6.9620e-1 X2)) (log 1.3011e+1)) (variable -4.3098e-1 X7)))))))) (exp (variable 4.0899e-1 X7)))";
|
---|
51 |
|
---|
52 | TestMatchedNodes(expr1, expr2, 23);
|
---|
53 |
|
---|
54 | }
|
---|
55 |
|
---|
56 | [TestMethod]
|
---|
57 | [TestCategory("Problems.DataAnalysis.Symbolic")]
|
---|
58 | [TestProperty("Time", "short")]
|
---|
59 | private void TestMatchedNodes(string expr1, string expr2, int expected) {
|
---|
60 | var t1 = importer.Import(expr1);
|
---|
61 | var t2 = importer.Import(expr2);
|
---|
62 |
|
---|
63 | var mapping = busCalculator.ComputeBottomUpMapping(t1.Root, t2.Root);
|
---|
64 | var c = mapping.Count;
|
---|
65 |
|
---|
66 | if (c != expected) {
|
---|
67 | throw new Exception("Match count " + c + " is different than expected value " + expected);
|
---|
68 | }
|
---|
69 | }
|
---|
70 |
|
---|
71 | [TestMethod]
|
---|
72 | [TestCategory("Problems.DataAnalysis.Symbolic")]
|
---|
73 | [TestProperty("Time", "long")]
|
---|
74 | public void TestBottomUpSimilarityCalculatorPerformance() {
|
---|
75 | var grammar = new TypeCoherentExpressionGrammar();
|
---|
76 | grammar.ConfigureAsDefaultRegressionGrammar();
|
---|
77 | var twister = new MersenneTwister(31415);
|
---|
78 | var ds = Util.CreateRandomDataset(twister, Rows, Columns);
|
---|
79 | var trees = Util.CreateRandomTrees(twister, ds, grammar, N, 1, 100, 0, 0);
|
---|
80 |
|
---|
81 | double s = 0;
|
---|
82 | var sw = new Stopwatch();
|
---|
83 |
|
---|
84 | sw.Start();
|
---|
85 | for (int i = 0; i < trees.Length - 1; ++i) {
|
---|
86 | for (int j = i + 1; j < trees.Length; ++j) {
|
---|
87 | s += busCalculator.CalculateSimilarity(trees[i], trees[j]);
|
---|
88 | }
|
---|
89 | }
|
---|
90 |
|
---|
91 | sw.Stop();
|
---|
92 | Console.WriteLine("Elapsed time: " + sw.ElapsedMilliseconds / 1000.0 + ", Avg. similarity: " + s / (N * (N - 1) / 2));
|
---|
93 | Console.WriteLine(N * (N + 1) / (2 * sw.ElapsedMilliseconds / 1000.0) + " similarity calculations per second.");
|
---|
94 | }
|
---|
95 |
|
---|
96 | private static string FormatMapping(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> map) {
|
---|
97 | var symbolNameMap = new Dictionary<string, string>
|
---|
98 | {
|
---|
99 | {"ProgramRootSymbol", "Prog"},
|
---|
100 | {"StartSymbol","RPB"},
|
---|
101 | {"Multiplication", "$\\times$"},
|
---|
102 | {"Division", "$\\div$"},
|
---|
103 | {"Addition", "$+$"},
|
---|
104 | {"Subtraction", "$-$"},
|
---|
105 | {"Exponential", "$\\exp$"},
|
---|
106 | {"Logarithm", "$\\log$"}
|
---|
107 | };
|
---|
108 |
|
---|
109 | var sb = new StringBuilder();
|
---|
110 | var nodeIds = new Dictionary<ISymbolicExpressionTreeNode, string>();
|
---|
111 | int offset = 0;
|
---|
112 | var layoutEngine = new ReingoldTilfordLayoutEngine<ISymbolicExpressionTreeNode>(x => x.Subtrees);
|
---|
113 | var nodeCoordinates = layoutEngine.CalculateLayout(t1.Root).ToDictionary(n => n.Content, n => new PointF(n.X, n.Y));
|
---|
114 |
|
---|
115 | double ws = 0.5;
|
---|
116 | double hs = 0.5;
|
---|
117 |
|
---|
118 | var nl = Environment.NewLine;
|
---|
119 | sb.Append("\\documentclass[class=minimal,border=0pt]{standalone}" + nl +
|
---|
120 | "\\usepackage{tikz}" + nl +
|
---|
121 | "\\begin{document}" + nl +
|
---|
122 | "\\begin{tikzpicture}" + nl +
|
---|
123 | "\\def\\ws{1}" + nl +
|
---|
124 | "\\def\\hs{0.7}" + nl +
|
---|
125 | "\\def\\offs{" + offset + "}" + nl);
|
---|
126 |
|
---|
127 | foreach (var node in t1.IterateNodesBreadth()) {
|
---|
128 | var id = Guid.NewGuid().ToString();
|
---|
129 | nodeIds[node] = id;
|
---|
130 | var coord = nodeCoordinates[node];
|
---|
131 | var nodeName = symbolNameMap.ContainsKey(node.Symbol.Name) ? symbolNameMap[node.Symbol.Name] : node.ToString();
|
---|
132 | sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\node ({0}) at (\\ws*{1} + \\offs,\\hs*{2}) {{{3}}};", nodeIds[node], ws * coord.X, -hs * coord.Y, EscapeLatexString(nodeName)));
|
---|
133 | }
|
---|
134 |
|
---|
135 | foreach (ISymbolicExpressionTreeNode t in t1.IterateNodesBreadth()) {
|
---|
136 | var n = t;
|
---|
137 | foreach (var s in t.Subtrees) {
|
---|
138 | sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\draw ({0}) -- ({1});", nodeIds[n], nodeIds[s]));
|
---|
139 | }
|
---|
140 | }
|
---|
141 |
|
---|
142 | nodeCoordinates = layoutEngine.CalculateLayout(t2.Root).ToDictionary(n => n.Content, n => new PointF(n.X, n.Y));
|
---|
143 |
|
---|
144 | offset = 20;
|
---|
145 | sb.Append("\\def\\offs{" + offset + "}" + nl);
|
---|
146 | foreach (var node in t2.IterateNodesBreadth()) {
|
---|
147 | var id = Guid.NewGuid().ToString();
|
---|
148 | nodeIds[node] = id;
|
---|
149 | var coord = nodeCoordinates[node];
|
---|
150 | var nodeName = symbolNameMap.ContainsKey(node.Symbol.Name) ? symbolNameMap[node.Symbol.Name] : node.ToString();
|
---|
151 | sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\node ({0}) at (\\ws*{1} + \\offs,\\hs*{2}) {{{3}}};", nodeIds[node], ws * coord.X, -hs * coord.Y, EscapeLatexString(nodeName)));
|
---|
152 | }
|
---|
153 |
|
---|
154 | foreach (ISymbolicExpressionTreeNode t in t2.IterateNodesBreadth()) {
|
---|
155 | var n = t;
|
---|
156 | foreach (var s in t.Subtrees) {
|
---|
157 | sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\draw ({0}) -- ({1});", nodeIds[n], nodeIds[s]));
|
---|
158 | }
|
---|
159 | }
|
---|
160 |
|
---|
161 | foreach (var p in map) {
|
---|
162 | var id1 = nodeIds[p.Key];
|
---|
163 | var id2 = nodeIds[p.Value];
|
---|
164 |
|
---|
165 | sb.Append(string.Format(CultureInfo.InvariantCulture, "\\path[draw,->,color=gray] ({0}) edge[bend left,dashed] ({1});" + Environment.NewLine, id1, id2));
|
---|
166 | }
|
---|
167 | sb.Append("\\end{tikzpicture}" + nl +
|
---|
168 | "\\end{document}" + nl);
|
---|
169 | return sb.ToString();
|
---|
170 | }
|
---|
171 |
|
---|
172 | private static string EscapeLatexString(string s) {
|
---|
173 | return s.Replace("\\", "\\\\").Replace("{", "\\{").Replace("}", "\\}").Replace("_", "\\_");
|
---|
174 | }
|
---|
175 | }
|
---|
176 | }
|
---|