1 | using System;
|
---|
2 | using System.Diagnostics;
|
---|
3 | using HeuristicLab.Random;
|
---|
4 | using Microsoft.VisualStudio.TestTools.UnitTesting;
|
---|
5 |
|
---|
6 | namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Tests {
|
---|
7 | [TestClass]
|
---|
8 | public class BottomUpSimilarityCalculatorTest {
|
---|
9 | private readonly SymbolicExpressionImporter importer = new SymbolicExpressionImporter();
|
---|
10 | private readonly InfixExpressionParser parser = new InfixExpressionParser();
|
---|
11 |
|
---|
12 | private const int N = 200;
|
---|
13 | private const int Rows = 1;
|
---|
14 | private const int Columns = 10;
|
---|
15 |
|
---|
16 | public BottomUpSimilarityCalculatorTest() {
|
---|
17 | var parser = new InfixExpressionParser();
|
---|
18 | }
|
---|
19 |
|
---|
20 | [TestMethod]
|
---|
21 | [TestCategory("Problems.DataAnalysis.Symbolic")]
|
---|
22 | [TestProperty("Time", "short")]
|
---|
23 | public void BottomUpTreeSimilarityCalculatorTestMapping() {
|
---|
24 | TestMatchedNodes("1 + 1", "2 + 2", 0, strict: true);
|
---|
25 | TestMatchedNodes("1 + 1", "2 + 2", 3, strict: false);
|
---|
26 | TestMatchedNodes("1 + 1", "1 + 2", 1, strict: true);
|
---|
27 | TestMatchedNodes("1 + 2", "2 + 1", 3, strict: true);
|
---|
28 |
|
---|
29 | TestMatchedNodes("1 - 1", "2 - 2", 0, strict: true);
|
---|
30 | TestMatchedNodes("1 - 1", "2 - 2", 4, strict: false); // 4, because of the way strings are parsed into trees by the infix parser
|
---|
31 |
|
---|
32 | TestMatchedNodes("2 - 1", "1 - 2", 2, strict: true);
|
---|
33 | TestMatchedNodes("2 - 1", "1 - 2", 4, strict: false);
|
---|
34 |
|
---|
35 | TestMatchedNodes("(X1 * X2) + (X3 * X4)", "(X1 * X2) + (X3 * X4)", 7, strict: true);
|
---|
36 | TestMatchedNodes("(X1 * X2) + (X3 * X4)", "(X1 * X2) + (X3 * X4)", 7, strict: false);
|
---|
37 |
|
---|
38 | TestMatchedNodes("(X1 * X2) + (X3 * X4)", "(X1 * X2) + (X5 * X6)", 3, strict: true);
|
---|
39 | TestMatchedNodes("(X1 * X2) + (X3 * X4)", "(X1 * X2) + (X5 * X6)", 3, strict: false);
|
---|
40 |
|
---|
41 | TestMatchedNodes("(X1 * X2) + (X3 * X4)", "(X1 * X2) - (X5 * X6)", 3, strict: true);
|
---|
42 | TestMatchedNodes("(X1 * X2) + (X3 * X4)", "(X1 * X2) - (X5 * X6)", 3, strict: false);
|
---|
43 |
|
---|
44 | TestMatchedNodes("SIN(SIN(SIN(X1)))", "SIN(SIN(SIN(X1)))", 4, strict: true);
|
---|
45 | TestMatchedNodes("SIN(SIN(SIN(X1)))", "COS(SIN(SIN(X1)))", 3, strict: true);
|
---|
46 | TestMatchedNodes("SIN(SIN(SIN(X1)))", "COS(COS(SIN(X1)))", 2, strict: true);
|
---|
47 | TestMatchedNodes("SIN(SIN(SIN(X1)))", "COS(COS(COS(X1)))", 1, strict: true);
|
---|
48 |
|
---|
49 | const string lhs = "(0.006153 + (X9 * X7 * X2 * 0.229506) + (X6 * X10 * X3 * 0.924598) + (X2 * X1 * 0.951272) + (X4 * X3 * 0.992570) + (X6 * X5 * 1.027299))";
|
---|
50 | const string rhs = "(0.006153 + (X10 * X7 * X2 * 0.229506) + (X6 * X10 * X3 * 0.924598) + (X2 * X1 * 0.951272) + (X4 * X3 * 0.992570) + (X6 * X5 * 1.027299))";
|
---|
51 |
|
---|
52 | TestMatchedNodes(lhs, lhs, 24, strict: true);
|
---|
53 | TestMatchedNodes(lhs, lhs, 24, strict: false);
|
---|
54 |
|
---|
55 | TestMatchedNodes(lhs, rhs, 21, strict: true);
|
---|
56 | TestMatchedNodes(lhs, rhs, 21, strict: false);
|
---|
57 | }
|
---|
58 |
|
---|
59 | private void TestMatchedNodes(string expr1, string expr2, int expected, bool strict) {
|
---|
60 | var t1 = parser.Parse(expr1);
|
---|
61 | var t2 = parser.Parse(expr2);
|
---|
62 |
|
---|
63 | var map = SymbolicExpressionTreeBottomUpSimilarityCalculator.ComputeBottomUpMapping(t1, t2, strict);
|
---|
64 | Console.WriteLine($"Count: {map.Count}");
|
---|
65 |
|
---|
66 | if (map.Count != expected) {
|
---|
67 | throw new Exception($"Match count {map.Count} is different than expected value {expected} for expressions:\n{expr1} and {expr2} (strict = {strict})\n");
|
---|
68 | }
|
---|
69 | }
|
---|
70 |
|
---|
71 | [TestMethod]
|
---|
72 | [TestCategory("Problems.DataAnalysis.Symbolic")]
|
---|
73 | [TestProperty("Time", "long")]
|
---|
74 | public void BottomUpTreeSimilarityCalculatorTestPerformance() {
|
---|
75 | var grammar = new TypeCoherentExpressionGrammar();
|
---|
76 | grammar.ConfigureAsDefaultRegressionGrammar();
|
---|
77 | var twister = new MersenneTwister(31415);
|
---|
78 | var ds = Util.CreateRandomDataset(twister, Rows, Columns);
|
---|
79 | var trees = Util.CreateRandomTrees(twister, ds, grammar, N, 1, 100, 0, 0);
|
---|
80 |
|
---|
81 | double s = 0;
|
---|
82 | var sw = new Stopwatch();
|
---|
83 |
|
---|
84 | var similarityCalculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator { MatchVariableWeights = false, MatchParameterValues = false };
|
---|
85 |
|
---|
86 | sw.Start();
|
---|
87 | for (int i = 0; i < trees.Length - 1; ++i) {
|
---|
88 | for (int j = i + 1; j < trees.Length; ++j) {
|
---|
89 | s += similarityCalculator.CalculateSimilarity(trees[i], trees[j]);
|
---|
90 | }
|
---|
91 | }
|
---|
92 |
|
---|
93 | sw.Stop();
|
---|
94 | Console.WriteLine("Elapsed time: " + sw.ElapsedMilliseconds / 1000.0 + ", Avg. similarity: " + s / (N * (N - 1) / 2));
|
---|
95 | Console.WriteLine(N * (N + 1) / (2 * sw.ElapsedMilliseconds / 1000.0) + " similarity calculations per second.");
|
---|
96 | }
|
---|
97 |
|
---|
98 | [TestMethod]
|
---|
99 | [TestCategory("Problems.DataAnalysis.Symbolic")]
|
---|
100 | [TestProperty("Time", "long")]
|
---|
101 | public void BottomUpTreeSimilarityCalculatorStrictMatchingConsistency() {
|
---|
102 | TestMatchingConsistency(strict: true);
|
---|
103 | }
|
---|
104 |
|
---|
105 | [TestMethod]
|
---|
106 | [TestCategory("Problems.DataAnalysis.Symbolic")]
|
---|
107 | [TestProperty("Time", "long")]
|
---|
108 | public void BottomUpTreeSimilarityCalculatorRelaxedMatchingConsistency() {
|
---|
109 | TestMatchingConsistency(strict: false);
|
---|
110 | }
|
---|
111 |
|
---|
112 | private static void TestMatchingConsistency(bool strict = false) {
|
---|
113 | var grammar = new TypeCoherentExpressionGrammar();
|
---|
114 | grammar.ConfigureAsDefaultRegressionGrammar();
|
---|
115 | var twister = new MersenneTwister(31415);
|
---|
116 | var ds = Util.CreateRandomDataset(twister, Rows, Columns);
|
---|
117 | var trees = Util.CreateRandomTrees(twister, ds, grammar, N, 1, 100, 0, 0);
|
---|
118 |
|
---|
119 | var similarityCalculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator { MatchParameterValues = strict, MatchVariableWeights = strict };
|
---|
120 | var bottomUpSimilarity = 0d;
|
---|
121 | for (int i = 0; i < trees.Length - 1; ++i) {
|
---|
122 | for (int j = i + 1; j < trees.Length; ++j) {
|
---|
123 | bottomUpSimilarity += similarityCalculator.CalculateSimilarity(trees[i], trees[j]);
|
---|
124 | }
|
---|
125 | }
|
---|
126 | bottomUpSimilarity /= N * (N - 1) / 2;
|
---|
127 |
|
---|
128 | var hashBasedSimilarity = SymbolicExpressionTreeHash.ComputeAverageSimilarity(trees, false, strict);
|
---|
129 |
|
---|
130 | Assert.AreEqual(bottomUpSimilarity, hashBasedSimilarity, 1e-6);
|
---|
131 |
|
---|
132 | Console.WriteLine($"Bottom-up similarity: {bottomUpSimilarity}, hash-based similarity: {hashBasedSimilarity}");
|
---|
133 | }
|
---|
134 | }
|
---|
135 | }
|
---|