[16157] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
| 3 | using System.Linq;
|
---|
[15714] | 4 | using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration;
|
---|
| 5 | using Microsoft.VisualStudio.TestTools.UnitTesting;
|
---|
| 6 |
|
---|
| 7 | namespace Test {
|
---|
| 8 | [TestClass]
|
---|
| 9 | public class TreeHashingTest {
|
---|
| 10 |
|
---|
| 11 | private Grammar grammar;
|
---|
| 12 | private TerminalSymbol varA;
|
---|
| 13 | private TerminalSymbol varB;
|
---|
| 14 | private TerminalSymbol varC;
|
---|
[15849] | 15 | private TerminalSymbol c;
|
---|
[15714] | 16 |
|
---|
| 17 | [TestInitialize]
|
---|
| 18 | public void InitTest() {
|
---|
| 19 | grammar = new Grammar(new[] { "a", "b", "c" });
|
---|
| 20 |
|
---|
[15834] | 21 | varA = grammar.VarTerminals.First(s => s.StringRepresentation == "a");
|
---|
| 22 | varB = grammar.VarTerminals.First(s => s.StringRepresentation == "b");
|
---|
| 23 | varC = grammar.VarTerminals.First(s => s.StringRepresentation == "c");
|
---|
[15849] | 24 | c = grammar.Const;
|
---|
[15714] | 25 | }
|
---|
| 26 |
|
---|
| 27 | [TestMethod]
|
---|
| 28 | [TestCategory("TreeHashing")]
|
---|
| 29 | public void SimpleEqualityAddition() {
|
---|
[16026] | 30 | SymbolList s1 = new SymbolList(new[] { varA, varB, grammar.Addition, varC, grammar.Addition });
|
---|
| 31 | SymbolList s2 = new SymbolList(new[] { varA, varB, grammar.Addition, varC, grammar.Addition });
|
---|
[15714] | 32 |
|
---|
[15832] | 33 | int hash1 = grammar.Hasher.CalcHashCode(s1);
|
---|
| 34 | int hash2 = grammar.Hasher.CalcHashCode(s2);
|
---|
[15714] | 35 |
|
---|
| 36 | Assert.AreEqual(hash1, hash2);
|
---|
| 37 | }
|
---|
| 38 |
|
---|
| 39 | [TestMethod]
|
---|
| 40 | [TestCategory("TreeHashing")]
|
---|
| 41 | public void SimpleInequalityAddition() {
|
---|
[16026] | 42 | SymbolList s1 = new SymbolList(new[] { varA, varB, grammar.Addition, varC, grammar.Addition });
|
---|
| 43 | SymbolList s2 = new SymbolList(new[] { varB, varB, grammar.Addition, varB, grammar.Addition });
|
---|
[15714] | 44 |
|
---|
[15832] | 45 | int hash1 = grammar.Hasher.CalcHashCode(s1);
|
---|
| 46 | int hash2 = grammar.Hasher.CalcHashCode(s2);
|
---|
[15714] | 47 |
|
---|
| 48 | Assert.AreNotEqual(hash1, hash2);
|
---|
| 49 | }
|
---|
| 50 |
|
---|
| 51 | [TestMethod]
|
---|
| 52 | [TestCategory("TreeHashing")]
|
---|
| 53 | public void CommutativityAddition() {
|
---|
[16026] | 54 | SymbolList s1 = new SymbolList(new[] { varA, varB, grammar.Addition });
|
---|
| 55 | SymbolList s2 = new SymbolList(new[] { varB, varA, grammar.Addition });
|
---|
[15714] | 56 |
|
---|
[15832] | 57 | int hash1 = grammar.Hasher.CalcHashCode(s1);
|
---|
| 58 | int hash2 = grammar.Hasher.CalcHashCode(s2);
|
---|
[15714] | 59 |
|
---|
| 60 | Assert.AreEqual(hash1, hash2);
|
---|
| 61 | }
|
---|
| 62 |
|
---|
| 63 | [TestMethod]
|
---|
| 64 | [TestCategory("TreeHashing")]
|
---|
| 65 | public void AssociativityAddition() {
|
---|
[16026] | 66 | SymbolList s1 = new SymbolList(new[] { varA, varB, grammar.Addition, varA, grammar.Addition });
|
---|
| 67 | SymbolList s2 = new SymbolList(new[] { varA, varB, varA, grammar.Addition, grammar.Addition });
|
---|
[15714] | 68 |
|
---|
[15832] | 69 | int hash1 = grammar.Hasher.CalcHashCode(s1);
|
---|
| 70 | int hash2 = grammar.Hasher.CalcHashCode(s2);
|
---|
[15714] | 71 |
|
---|
| 72 | Assert.AreEqual(hash1, hash2);
|
---|
| 73 | }
|
---|
| 74 |
|
---|
| 75 | [TestMethod]
|
---|
| 76 | [TestCategory("TreeHashing")]
|
---|
| 77 | public void RepeatedAddition() {
|
---|
[16026] | 78 | SymbolList s1 = new SymbolList(new[] { varA, varA, grammar.Addition, varA, grammar.Addition });
|
---|
| 79 | SymbolList s2 = new SymbolList(new[] { varA });
|
---|
[15714] | 80 |
|
---|
[15832] | 81 | int hash1 = grammar.Hasher.CalcHashCode(s1);
|
---|
| 82 | int hash2 = grammar.Hasher.CalcHashCode(s2);
|
---|
[15714] | 83 |
|
---|
| 84 | Assert.AreEqual(hash1, hash2);
|
---|
| 85 | }
|
---|
[15725] | 86 |
|
---|
| 87 | [TestMethod]
|
---|
| 88 | [TestCategory("TreeHashing")]
|
---|
| 89 | public void ComplexInequality() {
|
---|
[16026] | 90 | SymbolList s1 = new SymbolList(new[] { varA, varA, varA, grammar.Multiplication, grammar.Multiplication });
|
---|
| 91 | SymbolList s2 = new SymbolList(new[] { varA, varA, varA, grammar.Multiplication, grammar.Addition });
|
---|
[15725] | 92 |
|
---|
[15832] | 93 | int hash1 = grammar.Hasher.CalcHashCode(s1);
|
---|
| 94 | int hash2 = grammar.Hasher.CalcHashCode(s2);
|
---|
[15725] | 95 |
|
---|
| 96 | Assert.AreNotEqual(hash1, hash2);
|
---|
| 97 | }
|
---|
| 98 |
|
---|
[15746] | 99 | [TestMethod]
|
---|
| 100 | [TestCategory("TreeHashing")]
|
---|
| 101 | public void NonterminalHashing() {
|
---|
[16026] | 102 | SymbolList s1 = new SymbolList(new Symbol[] { varA, varA, grammar.Expr, grammar.Addition, grammar.Addition });
|
---|
| 103 | SymbolList s2 = new SymbolList(new Symbol[] { varA, grammar.Expr, grammar.Addition });
|
---|
[15725] | 104 |
|
---|
[15832] | 105 | int hash1 = grammar.Hasher.CalcHashCode(s1);
|
---|
| 106 | int hash2 = grammar.Hasher.CalcHashCode(s2);
|
---|
[15746] | 107 |
|
---|
| 108 | Assert.AreEqual(hash1, hash2);
|
---|
| 109 | }
|
---|
| 110 |
|
---|
[15784] | 111 | [TestMethod]
|
---|
| 112 | [TestCategory("TreeHashing")]
|
---|
| 113 | public void InverseFactorCancelationSimple() {
|
---|
[15791] | 114 | // 1/a * b * a * a
|
---|
[16026] | 115 | SymbolList s1 = new SymbolList(new Symbol[] { varA, grammar.Inv, varB, grammar.Multiplication, varA, grammar.Multiplication, varA, grammar.Multiplication });
|
---|
[15791] | 116 | // a * b
|
---|
[16026] | 117 | SymbolList s2 = new SymbolList(new Symbol[] { varA, varB, grammar.Multiplication });
|
---|
[15746] | 118 |
|
---|
[15832] | 119 | int hash1 = grammar.Hasher.CalcHashCode(s1);
|
---|
| 120 | int hash2 = grammar.Hasher.CalcHashCode(s2);
|
---|
[15784] | 121 |
|
---|
| 122 | Assert.AreEqual(hash1, hash2);
|
---|
| 123 | }
|
---|
| 124 |
|
---|
| 125 | [TestMethod]
|
---|
| 126 | [TestCategory("TreeHashing")]
|
---|
| 127 | public void InverseFactorCancelationComplex() {
|
---|
[16026] | 128 | SymbolList s1 = new SymbolList(new Symbol[] { varA, grammar.Sin, varA, varA, grammar.Multiplication, varA, grammar.Addition, grammar.Sin, grammar.Addition });
|
---|
| 129 | SymbolList s2 = new SymbolList(new Symbol[] { varA, varA, varA, grammar.Multiplication, grammar.Addition, grammar.Sin, varA, grammar.Inv, varA, grammar.Sin, varA, grammar.Multiplication, grammar.Multiplication, grammar.Addition });
|
---|
[15784] | 130 |
|
---|
[15832] | 131 | int hash1 = grammar.Hasher.CalcHashCode(s1);
|
---|
| 132 | int hash2 = grammar.Hasher.CalcHashCode(s2);
|
---|
[15784] | 133 |
|
---|
| 134 | Assert.AreEqual(hash1, hash2);
|
---|
| 135 | }
|
---|
| 136 |
|
---|
[15849] | 137 | // Constants
|
---|
| 138 | [TestMethod]
|
---|
| 139 | [TestCategory("TreeHashing")]
|
---|
| 140 | public void SimpleConst() {
|
---|
[16056] | 141 | SymbolList s1 = new SymbolList(new Symbol[] { c, varA, grammar.Multiplication, c, grammar.Addition });
|
---|
[16026] | 142 | SymbolList s2 = new SymbolList(new Symbol[] { c, varA, grammar.Multiplication, c, varA, grammar.Multiplication, grammar.Addition, c, grammar.Addition });
|
---|
[15849] | 143 |
|
---|
| 144 | int hash1 = grammar.Hasher.CalcHashCode(s1);
|
---|
| 145 | int hash2 = grammar.Hasher.CalcHashCode(s2);
|
---|
| 146 |
|
---|
| 147 | Assert.AreEqual(hash1, hash2);
|
---|
| 148 | }
|
---|
| 149 |
|
---|
[15791] | 150 | [TestMethod]
|
---|
| 151 | [TestCategory("TreeHashing")]
|
---|
[16157] | 152 | public void EnumerateGrammarTest() {
|
---|
| 153 | //const int nvars = 1;
|
---|
| 154 | //var variables = Enumerable.Range(1, nvars).Select(x => $"x{x}").ToArray();
|
---|
[16159] | 155 | var variables = new[] { "x", "y", "z", "w" };
|
---|
[16157] | 156 | var grammar = new Grammar(variables, Enum.GetValues(typeof(GrammarRule)).Cast<GrammarRule>());
|
---|
[15791] | 157 |
|
---|
[16159] | 158 | Func<SymbolList, int> hash = s => grammar.Hasher.CalcHashCode(s);
|
---|
[15791] | 159 |
|
---|
[16157] | 160 | List<SymbolList> sentences = EnumerateGrammarBreadth(grammar, length: 20, hashPhrases: false).ToList();
|
---|
[16159] | 161 | Console.WriteLine("Breadth: {0} generated, {1} distinct sentences", sentences.Count, sentences.GroupBy(hash).Count());
|
---|
[16157] | 162 |
|
---|
| 163 | sentences = EnumerateGrammarBreadth(grammar, length: 20, hashPhrases: true).ToList();
|
---|
[16159] | 164 | Console.WriteLine("Breadth (hashed): {0} generated, {1} distinct sentences", sentences.Count, sentences.GroupBy(hash).Count());
|
---|
[16157] | 165 |
|
---|
| 166 | sentences = EnumerateGrammarDepth(grammar, length: 20, hashPhrases: false).ToList();
|
---|
[16159] | 167 | Console.WriteLine("Depth: {0} generated, {1} distinct sentences", sentences.Count, sentences.GroupBy(hash).Count());
|
---|
[16157] | 168 |
|
---|
| 169 | sentences = EnumerateGrammarDepth(grammar, length: 20, hashPhrases: true).ToList();
|
---|
[16159] | 170 | Console.WriteLine("Depth (hashed): {0} generated, {1} distinct sentences", sentences.Count, sentences.GroupBy(hash).Count());
|
---|
[15791] | 171 | }
|
---|
| 172 |
|
---|
[16157] | 173 | private static IEnumerable<SymbolList> EnumerateGrammarBreadth(Grammar grammar, int length, bool hashPhrases = true) {
|
---|
| 174 | var phrases = new Queue<SymbolList>();
|
---|
| 175 | phrases.Enqueue(new SymbolList(grammar.StartSymbol));
|
---|
| 176 | var sentences = new List<SymbolList>();
|
---|
| 177 | var archive = new HashSet<int>();
|
---|
[15791] | 178 |
|
---|
[16157] | 179 | while (phrases.Any()) {
|
---|
| 180 | var phrase = phrases.Dequeue();
|
---|
[15791] | 181 |
|
---|
[16157] | 182 | if (phrase.Count > length)
|
---|
| 183 | continue;
|
---|
| 184 |
|
---|
| 185 | if (phrase.IsSentence()) {
|
---|
| 186 | sentences.Add(phrase);
|
---|
| 187 | continue;
|
---|
| 188 | }
|
---|
| 189 |
|
---|
| 190 | if (hashPhrases && !archive.Add(grammar.Hasher.CalcHashCode(phrase))) {
|
---|
| 191 | continue;
|
---|
| 192 | }
|
---|
| 193 |
|
---|
| 194 | var idx = phrase.NextNonterminalIndex();
|
---|
| 195 | var productions = grammar.Productions[phrase[idx]];
|
---|
| 196 | var derived = productions.Select(p => phrase.DerivePhrase(idx, p)).Where(p => p.Count <= length);
|
---|
| 197 | foreach (var d in derived)
|
---|
| 198 | phrases.Enqueue(d);
|
---|
| 199 | }
|
---|
| 200 | return sentences;
|
---|
[15791] | 201 | }
|
---|
| 202 |
|
---|
[16157] | 203 | private static IEnumerable<SymbolList> EnumerateGrammarDepth(Grammar grammar, int length, bool hashPhrases = true) {
|
---|
| 204 | return Expand(new SymbolList(grammar.StartSymbol), grammar, length, hashPhrases ? new HashSet<int>() : null);
|
---|
| 205 | }
|
---|
[15791] | 206 |
|
---|
[16157] | 207 | private static IEnumerable<SymbolList> Expand(SymbolList phrase, Grammar grammar, int maxLength, HashSet<int> visited) {
|
---|
| 208 | if (phrase.Count > maxLength) {
|
---|
| 209 | yield break;
|
---|
| 210 | }
|
---|
[15791] | 211 |
|
---|
[16157] | 212 | if (phrase.IsSentence()) {
|
---|
| 213 | yield return phrase;
|
---|
| 214 | yield break;
|
---|
| 215 | }
|
---|
| 216 |
|
---|
| 217 | if (visited != null && !visited.Add(grammar.Hasher.CalcHashCode(phrase))) {
|
---|
| 218 | yield break;
|
---|
| 219 | }
|
---|
| 220 |
|
---|
| 221 | var i = phrase.NextNonterminalIndex();
|
---|
| 222 | var productions = grammar.Productions[phrase[i]];
|
---|
| 223 |
|
---|
| 224 | foreach (var s in productions.SelectMany(p => Expand(phrase.DerivePhrase(i, p), grammar, maxLength, visited)))
|
---|
| 225 | yield return s;
|
---|
| 226 | }
|
---|
[15714] | 227 | }
|
---|
| 228 | }
|
---|