[11219] | 1 | using System;
|
---|
| 2 | using System.Diagnostics;
|
---|
| 3 | using HeuristicLab.Random;
|
---|
| 4 | using Microsoft.VisualStudio.TestTools.UnitTesting;
|
---|
| 5 |
|
---|
| 6 | namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Tests {
|
---|
| 7 | [TestClass]
|
---|
| 8 | public class BottomUpSimilarityCalculatorTest {
|
---|
[16723] | 9 | private readonly SymbolicExpressionImporter importer = new SymbolicExpressionImporter();
|
---|
[16946] | 10 | private readonly InfixExpressionParser parser = new InfixExpressionParser();
|
---|
[11219] | 11 |
|
---|
[16946] | 12 | private const int N = 200;
|
---|
[11219] | 13 | private const int Rows = 1;
|
---|
| 14 | private const int Columns = 10;
|
---|
| 15 |
|
---|
| 16 | public BottomUpSimilarityCalculatorTest() {
|
---|
[16723] | 17 | var parser = new InfixExpressionParser();
|
---|
[11219] | 18 | }
|
---|
| 19 |
|
---|
| 20 | [TestMethod]
|
---|
| 21 | [TestCategory("Problems.DataAnalysis.Symbolic")]
|
---|
| 22 | [TestProperty("Time", "short")]
|
---|
[11916] | 23 | public void BottomUpTreeSimilarityCalculatorTestMapping() {
|
---|
[16946] | 24 | TestMatchedNodes("1 + 1", "2 + 2", 0, strict: true);
|
---|
| 25 | TestMatchedNodes("1 + 1", "2 + 2", 3, strict: false);
|
---|
| 26 | TestMatchedNodes("1 + 1", "1 + 2", 1, strict: true);
|
---|
| 27 | TestMatchedNodes("1 + 2", "2 + 1", 3, strict: true);
|
---|
[11219] | 28 |
|
---|
[16946] | 29 | TestMatchedNodes("1 - 1", "2 - 2", 0, strict: true);
|
---|
| 30 | TestMatchedNodes("1 - 1", "2 - 2", 4, strict: false); // 4, because of the way strings are parsed into trees by the infix parser
|
---|
[11220] | 31 |
|
---|
[16946] | 32 | TestMatchedNodes("2 - 1", "1 - 2", 2, strict: true);
|
---|
| 33 | TestMatchedNodes("2 - 1", "1 - 2", 4, strict: false);
|
---|
| 34 |
|
---|
| 35 | TestMatchedNodes("(X1 * X2) + (X3 * X4)", "(X1 * X2) + (X3 * X4)", 7, strict: true);
|
---|
| 36 | TestMatchedNodes("(X1 * X2) + (X3 * X4)", "(X1 * X2) + (X3 * X4)", 7, strict: false);
|
---|
| 37 |
|
---|
| 38 | TestMatchedNodes("(X1 * X2) + (X3 * X4)", "(X1 * X2) + (X5 * X6)", 3, strict: true);
|
---|
| 39 | TestMatchedNodes("(X1 * X2) + (X3 * X4)", "(X1 * X2) + (X5 * X6)", 3, strict: false);
|
---|
| 40 |
|
---|
| 41 | TestMatchedNodes("(X1 * X2) + (X3 * X4)", "(X1 * X2) - (X5 * X6)", 3, strict: true);
|
---|
| 42 | TestMatchedNodes("(X1 * X2) + (X3 * X4)", "(X1 * X2) - (X5 * X6)", 3, strict: false);
|
---|
| 43 |
|
---|
| 44 | TestMatchedNodes("SIN(SIN(SIN(X1)))", "SIN(SIN(SIN(X1)))", 4, strict: true);
|
---|
| 45 | TestMatchedNodes("SIN(SIN(SIN(X1)))", "COS(SIN(SIN(X1)))", 3, strict: true);
|
---|
| 46 | TestMatchedNodes("SIN(SIN(SIN(X1)))", "COS(COS(SIN(X1)))", 2, strict: true);
|
---|
| 47 | TestMatchedNodes("SIN(SIN(SIN(X1)))", "COS(COS(COS(X1)))", 1, strict: true);
|
---|
| 48 |
|
---|
| 49 | const string lhs = "(0.006153 + (X9 * X7 * X2 * 0.229506) + (X6 * X10 * X3 * 0.924598) + (X2 * X1 * 0.951272) + (X4 * X3 * 0.992570) + (X6 * X5 * 1.027299))";
|
---|
| 50 | const string rhs = "(0.006153 + (X10 * X7 * X2 * 0.229506) + (X6 * X10 * X3 * 0.924598) + (X2 * X1 * 0.951272) + (X4 * X3 * 0.992570) + (X6 * X5 * 1.027299))";
|
---|
| 51 |
|
---|
| 52 | TestMatchedNodes(lhs, lhs, 24, strict: true);
|
---|
| 53 | TestMatchedNodes(lhs, lhs, 24, strict: false);
|
---|
| 54 |
|
---|
| 55 | TestMatchedNodes(lhs, rhs, 21, strict: true);
|
---|
| 56 | TestMatchedNodes(lhs, rhs, 21, strict: false);
|
---|
[11219] | 57 | }
|
---|
| 58 |
|
---|
[16723] | 59 | private void TestMatchedNodes(string expr1, string expr2, int expected, bool strict) {
|
---|
[16946] | 60 | var t1 = parser.Parse(expr1);
|
---|
| 61 | var t2 = parser.Parse(expr2);
|
---|
[11219] | 62 |
|
---|
[16723] | 63 | var map = SymbolicExpressionTreeBottomUpSimilarityCalculator.ComputeBottomUpMapping(t1, t2, strict);
|
---|
[16946] | 64 | Console.WriteLine($"Count: {map.Count}");
|
---|
[11219] | 65 |
|
---|
[16723] | 66 | if (map.Count != expected) {
|
---|
[16946] | 67 | throw new Exception($"Match count {map.Count} is different than expected value {expected} for expressions:\n{expr1} and {expr2} (strict = {strict})\n");
|
---|
[11219] | 68 | }
|
---|
| 69 | }
|
---|
| 70 |
|
---|
| 71 | [TestMethod]
|
---|
| 72 | [TestCategory("Problems.DataAnalysis.Symbolic")]
|
---|
| 73 | [TestProperty("Time", "long")]
|
---|
[11916] | 74 | public void BottomUpTreeSimilarityCalculatorTestPerformance() {
|
---|
[11219] | 75 | var grammar = new TypeCoherentExpressionGrammar();
|
---|
| 76 | grammar.ConfigureAsDefaultRegressionGrammar();
|
---|
| 77 | var twister = new MersenneTwister(31415);
|
---|
| 78 | var ds = Util.CreateRandomDataset(twister, Rows, Columns);
|
---|
| 79 | var trees = Util.CreateRandomTrees(twister, ds, grammar, N, 1, 100, 0, 0);
|
---|
| 80 |
|
---|
| 81 | double s = 0;
|
---|
| 82 | var sw = new Stopwatch();
|
---|
| 83 |
|
---|
[16946] | 84 | var similarityCalculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator { MatchVariableWeights = false, MatchConstantValues = false };
|
---|
| 85 |
|
---|
[11219] | 86 | sw.Start();
|
---|
| 87 | for (int i = 0; i < trees.Length - 1; ++i) {
|
---|
| 88 | for (int j = i + 1; j < trees.Length; ++j) {
|
---|
[16723] | 89 | s += similarityCalculator.CalculateSimilarity(trees[i], trees[j]);
|
---|
[11219] | 90 | }
|
---|
| 91 | }
|
---|
[11239] | 92 |
|
---|
[11219] | 93 | sw.Stop();
|
---|
[11239] | 94 | Console.WriteLine("Elapsed time: " + sw.ElapsedMilliseconds / 1000.0 + ", Avg. similarity: " + s / (N * (N - 1) / 2));
|
---|
[11219] | 95 | Console.WriteLine(N * (N + 1) / (2 * sw.ElapsedMilliseconds / 1000.0) + " similarity calculations per second.");
|
---|
| 96 | }
|
---|
[16946] | 97 |
|
---|
| 98 | [TestMethod]
|
---|
| 99 | [TestCategory("Problems.DataAnalysis.Symbolic")]
|
---|
| 100 | [TestProperty("Time", "long")]
|
---|
| 101 | public void BottomUpTreeSimilarityCalculatorStrictMatchingConsistency() {
|
---|
| 102 | TestMatchingConsistency(strict: true);
|
---|
| 103 | }
|
---|
| 104 |
|
---|
| 105 | [TestMethod]
|
---|
| 106 | [TestCategory("Problems.DataAnalysis.Symbolic")]
|
---|
| 107 | [TestProperty("Time", "long")]
|
---|
| 108 | public void BottomUpTreeSimilarityCalculatorRelaxedMatchingConsistency() {
|
---|
| 109 | TestMatchingConsistency(strict: false);
|
---|
| 110 | }
|
---|
| 111 |
|
---|
| 112 | private static void TestMatchingConsistency(bool strict = false) {
|
---|
| 113 | var grammar = new TypeCoherentExpressionGrammar();
|
---|
| 114 | grammar.ConfigureAsDefaultRegressionGrammar();
|
---|
| 115 | var twister = new MersenneTwister(31415);
|
---|
| 116 | var ds = Util.CreateRandomDataset(twister, Rows, Columns);
|
---|
| 117 | var trees = Util.CreateRandomTrees(twister, ds, grammar, N, 1, 100, 0, 0);
|
---|
| 118 |
|
---|
| 119 | var similarityCalculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator { MatchConstantValues = strict, MatchVariableWeights = strict };
|
---|
| 120 | var bottomUpSimilarity = 0d;
|
---|
| 121 | for (int i = 0; i < trees.Length - 1; ++i) {
|
---|
| 122 | for (int j = i + 1; j < trees.Length; ++j) {
|
---|
| 123 | bottomUpSimilarity += similarityCalculator.CalculateSimilarity(trees[i], trees[j]);
|
---|
| 124 | }
|
---|
| 125 | }
|
---|
| 126 | bottomUpSimilarity /= N * (N - 1) / 2;
|
---|
| 127 |
|
---|
| 128 | var hashBasedSimilarity = SymbolicExpressionTreeHash.ComputeAverageSimilarity(trees, false, strict);
|
---|
| 129 |
|
---|
| 130 | Assert.AreEqual(bottomUpSimilarity, hashBasedSimilarity, 1e-6);
|
---|
| 131 |
|
---|
| 132 | Console.WriteLine($"Bottom-up similarity: {bottomUpSimilarity}, hash-based similarity: {hashBasedSimilarity}");
|
---|
| 133 | }
|
---|
[11219] | 134 | }
|
---|
| 135 | }
|
---|