Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/Test/TreeHashingTest.cs @ 16194

Last change on this file since 16194 was 16194, checked in by bburlacu, 6 years ago

#2886: Fix compilation errors in test

File size: 14.9 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Security.Cryptography;
6using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration;
7using Microsoft.VisualStudio.TestTools.UnitTesting;
8using HierarchicalFormatter = HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.SymbolicExpressionTreeHierarchicalFormatter;
9
10namespace Test {
11  [TestClass]
12  public class TreeHashingTest {
13
14    private Grammar grammar;
15    private TerminalSymbol varA;
16    private TerminalSymbol varB;
17    private TerminalSymbol varC;
18    private TerminalSymbol c;
19
20    Func<Grammar, SymbolList, int> ComputeHash = (grammar, sentence) => grammar.ComputeHash(null, sentence);
21
22    [TestInitialize]
23    public void InitTest() {
24      grammar = new Grammar(new[] { "a", "b", "c" });
25
26      varA = grammar.VarTerminals.First(s => s.StringRepresentation == "a");
27      varB = grammar.VarTerminals.First(s => s.StringRepresentation == "b");
28      varC = grammar.VarTerminals.First(s => s.StringRepresentation == "c");
29      c = grammar.Const;
30    }
31
32    [TestMethod]
33    [TestCategory("TreeHashing")]
34    public void SimpleEqualityAddition() {
35      SymbolList s1 = new SymbolList(new[] { varA, varB, grammar.Addition, varC, grammar.Addition });
36      SymbolList s2 = new SymbolList(new[] { varA, varB, grammar.Addition, varC, grammar.Addition });
37
38      int hash1 = ComputeHash(grammar, s1);
39      int hash2 = ComputeHash(grammar, s2);
40
41      Assert.AreEqual(hash1, hash2);
42    }
43
44    [TestMethod]
45    [TestCategory("TreeHashing")]
46    public void SimpleInequalityAddition() {
47      SymbolList s1 = new SymbolList(new[] { varA, varB, grammar.Addition, varC, grammar.Addition });
48      SymbolList s2 = new SymbolList(new[] { varB, varB, grammar.Addition, varB, grammar.Addition });
49
50      int hash1 = ComputeHash(grammar, s1);
51      int hash2 = ComputeHash(grammar, s2);
52
53      Assert.AreNotEqual(hash1, hash2);
54    }
55
56    [TestMethod]
57    [TestCategory("TreeHashing")]
58    public void CommutativityAddition() {
59      SymbolList s1 = new SymbolList(new[] { varA, varB, grammar.Addition });
60      SymbolList s2 = new SymbolList(new[] { varB, varA, grammar.Addition });
61
62      int hash1 = ComputeHash(grammar, s1);
63      int hash2 = ComputeHash(grammar, s2);
64
65      Assert.AreEqual(hash1, hash2);
66    }
67
68    [TestMethod]
69    [TestCategory("TreeHashing")]
70    public void AssociativityAddition() {
71      SymbolList s1 = new SymbolList(new[] { varA, varB, grammar.Addition, varA, grammar.Addition });
72      SymbolList s2 = new SymbolList(new[] { varA, varB, varA, grammar.Addition, grammar.Addition });
73
74      int hash1 = ComputeHash(grammar, s1);
75      int hash2 = ComputeHash(grammar, s2);
76
77      Assert.AreEqual(hash1, hash2);
78    }
79
80    [TestMethod]
81    [TestCategory("TreeHashing")]
82    public void RepeatedAddition() {
83      SymbolList s1 = new SymbolList(new[] { varA, varA, grammar.Addition, varA, grammar.Addition });
84      SymbolList s2 = new SymbolList(new[] { varA });
85
86      int hash1 = ComputeHash(grammar, s1);
87      int hash2 = ComputeHash(grammar, s2);
88
89      Assert.AreEqual(hash1, hash2);
90    }
91
92    [TestMethod]
93    [TestCategory("TreeHashing")]
94    public void ComplexInequality() {
95      SymbolList s1 = new SymbolList(new[] { varA, varA, varA, grammar.Multiplication, grammar.Multiplication });
96      SymbolList s2 = new SymbolList(new[] { varA, varA, varA, grammar.Multiplication, grammar.Addition });
97
98      int hash1 = ComputeHash(grammar, s1);
99      int hash2 = ComputeHash(grammar, s2);
100
101      Assert.AreNotEqual(hash1, hash2);
102    }
103
104    [TestMethod]
105    [TestCategory("TreeHashing")]
106    public void NonterminalHashing() {
107      SymbolList s1 = new SymbolList(new Symbol[] { varA, varA, grammar.Expr, grammar.Addition, grammar.Addition });
108      SymbolList s2 = new SymbolList(new Symbol[] { varA, grammar.Expr, grammar.Addition });
109
110      int hash1 = ComputeHash(grammar, s1);
111      int hash2 = ComputeHash(grammar, s2);
112
113      Assert.AreEqual(hash1, hash2);
114    }
115
116    [TestMethod]
117    [TestCategory("TreeHashing")]
118    public void InverseFactorCancelationSimple() {
119      // 1/a * b * a * a
120      SymbolList s1 = new SymbolList(new Symbol[] { varA, grammar.Inv, varB, grammar.Multiplication, varA, grammar.Multiplication, varA, grammar.Multiplication });
121      // a * b
122      SymbolList s2 = new SymbolList(new Symbol[] { varA, varB, grammar.Multiplication });
123
124      int hash1 = ComputeHash(grammar, s1);
125      int hash2 = ComputeHash(grammar, s2);
126
127      Assert.AreEqual(hash1, hash2);
128    }
129
130    [TestMethod]
131    [TestCategory("TreeHashing")]
132    public void InverseFactorCancelationComplex() {
133      SymbolList s1 = new SymbolList(new Symbol[] { varA, grammar.Sin, varA, varA, grammar.Multiplication, varA, grammar.Addition, grammar.Sin, grammar.Addition });
134      SymbolList s2 = new SymbolList(new Symbol[] { varA, varA, varA, grammar.Multiplication, grammar.Addition, grammar.Sin, varA, grammar.Inv, varA, grammar.Sin, varA, grammar.Multiplication, grammar.Multiplication, grammar.Addition });
135
136      int hash1 = ComputeHash(grammar, s1);
137      int hash2 = ComputeHash(grammar, s2);
138
139      Assert.AreEqual(hash1, hash2);
140    }
141
142    // Constants
143    [TestMethod]
144    [TestCategory("TreeHashing")]
145    public void SimpleConst() {
146      SymbolList s1 = new SymbolList(new Symbol[] { c, varA, grammar.Multiplication, c, grammar.Addition });
147      SymbolList s2 = new SymbolList(new Symbol[] { c, varA, grammar.Multiplication, c, varA, grammar.Multiplication, grammar.Addition, c, grammar.Addition });
148
149      int hash1 = ComputeHash(grammar, s1);
150      int hash2 = ComputeHash(grammar, s2);
151
152      Assert.AreEqual(hash1, hash2);
153    }
154
155    [TestMethod]
156    [TestCategory("TreeHashing")]
157    public void EnumerateGrammarTest() {
158      //const int nvars = 1;
159      //var variables = Enumerable.Range(1, nvars).Select(x => $"x{x}").ToArray();
160      var variables = new[] { "a", "b" };
161      var grammar = new Grammar(variables, Enum.GetValues(typeof(GrammarRule)).Cast<GrammarRule>());
162
163      Func<SymbolList, int> hash = s => grammar.Hasher.CalcHashCode(s);
164      int length = 100;
165      int depth = 20;
166
167      //List<SymbolList> sentences = EnumerateGrammarBreadth(grammar, length: length, hashPhrases: false).ToList();
168      //Console.WriteLine("Breadth: {0} generated, {1} distinct sentences", sentences.Count, sentences.GroupBy(hash).Count());
169
170      //var sentences = EnumerateGrammarBreadth(grammar, length: length, hashPhrases: true).ToList();
171      //Console.WriteLine("Breadth (hashed): {0} generated, {1} distinct sentences", sentences.Count, sentences.GroupBy(hash).Count());
172
173      var sentences = EnumerateGrammarDepth(grammar, length: length, depth: depth, hashPhrases: false).ToList();
174      Console.WriteLine("Depth: {0} generated, {1} distinct sentences", sentences.Count, sentences.GroupBy(hash).Count());
175
176      sentences = EnumerateGrammarDepth(grammar, length: length, depth: depth, hashPhrases: true).ToList();
177      Console.WriteLine("Depth (hashed): {0} generated, {1} distinct sentences", sentences.Count, sentences.GroupBy(hash).Count());
178    }
179
180    [TestMethod]
181    [TestCategory("TreeHashing")]
182    public void HashExtensionsTest() {
183      var variables = new[] { "x", "y", "z" };
184      var rules = Enum.GetValues(typeof(GrammarRule)).Cast<GrammarRule>();
185      var grammar = new Grammar(variables, rules);
186      var add = grammar.Addition;
187      var mul = grammar.Multiplication;
188      var exp = grammar.Exp;
189      var log = grammar.Log;
190      var inv = grammar.Inv;
191      var sin = grammar.Sin;
192
193      var c = grammar.Const;
194      var x = grammar.VarTerminals.Single(v => v.StringRepresentation == "x");
195      var y = grammar.VarTerminals.Single(v => v.StringRepresentation == "y");
196      var z = grammar.VarTerminals.Single(v => v.StringRepresentation == "z");
197
198
199      var ha = SHA512.Create();
200
201      var sentences = new[] {
202        new SymbolList(c, c, x, mul, c, add, inv, mul, c, add),
203        new SymbolList(c, c, x, mul, c, add, log, mul, c, add),
204        new SymbolList(x, x, add),
205        new SymbolList(x, x, add, x, add),
206        new SymbolList(x, x, add, y, add),
207        new SymbolList(x, x, add, x, add, x, add),
208        new SymbolList(x, x, add, y, add, x, add),
209        new SymbolList(x, y, mul, x, y, mul, add),
210        new SymbolList(x, y, mul, y, x, mul, add),
211        new SymbolList(x, y, mul, z, y, mul, add),
212        new SymbolList(x, x, add, y, mul),
213        new SymbolList(x, x, add, y, mul, y, mul),
214        new SymbolList(x, x, inv, mul),
215        new SymbolList(x, inv, x, inv, mul, x, mul, x, mul),
216        new SymbolList(c, x, x, x, x, x, x, mul, mul, mul, mul, mul, mul, c, x, mul, c, add, add),
217        new SymbolList(c, x, x, x, x, x, x, mul, mul, mul, mul, mul, mul, c, x, mul, c, add, add),
218        new SymbolList(c, x, mul, c, x, x, x, x, x, x, mul, mul, mul, mul, mul, mul, c, add, add),
219        new SymbolList(c, x, x, x, x, mul, mul, mul, mul, c, x, mul, c, x, mul, c, add, add, add),
220        new SymbolList(c, x, mul, c, x, x, x, x, mul, mul, mul, mul, c, x, mul, c, add, add, add)
221    };
222
223      foreach (var sentence in sentences) {
224        var simplified = grammar.Simplify(ha, sentence);
225        Console.WriteLine($"{sentence} -> {simplified} {grammar.Hasher.CalcHashCode(sentence)} {grammar.ComputeHash(ha, sentence)}");
226        Console.WriteLine();
227      }
228    }
229
230    [TestMethod]
231    [TestCategory("TreeHashing")]
232    public void HashCollisionsTest() {
233      var variables = new[] { "x", "y", "z", "w" };
234      //var rules = Enum.GetValues(typeof(GrammarRule)).Cast<GrammarRule>().Except(new[] { GrammarRule.InverseTerm, GrammarRule.Exponentiation, GrammarRule.Logarithm, GrammarRule.Sine });
235      var rules = Enum.GetValues(typeof(GrammarRule)).Cast<GrammarRule>();
236      var grammar = new Grammar(variables, rules);
237
238      int maxLength = 20, maxDepth = int.MaxValue;
239      var sentences = EnumerateGrammarDepth(grammar, length: maxLength, depth: maxDepth, hashPhrases: false).ToList();
240      var count = sentences.Count;
241
242      var sw = new Stopwatch();
243      sw.Start();
244      var hashes = sentences.Select(grammar.Hasher.CalcHashCode).ToList();
245      sw.Stop();
246      Console.WriteLine($"Old hash: {sentences.Count} ({hashes.Distinct().Count()}) hashed in {sw.ElapsedMilliseconds / 1000.0} seconds ({1000d * sentences.Count / sw.ElapsedMilliseconds} hashes/s)");
247
248      var ha = SHA512.Create();
249      sw.Restart();
250      var hashes_new = sentences.Select(x => grammar.ComputeHash(ha, x)).ToList();
251      sw.Stop();
252      Console.WriteLine($"New hash: {sentences.Count} ({hashes_new.Distinct().Count()}) hashed in {sw.ElapsedMilliseconds / 1000.0} seconds ({1000.0 * sentences.Count / sw.ElapsedMilliseconds} hashes/s)");
253
254      var distinct = Enumerable.Range(0, count).GroupBy(x => hashes_new[x]).Select(g => g.OrderBy(x => x).First()).ToList();
255      var collisions = distinct.ToLookup(x => hashes[x], x => Tuple.Create(hashes_new[x], sentences[x]));
256
257      foreach (var pair in collisions) {
258        if (pair.Count() > 1) {
259          Console.WriteLine(pair.Key);
260          foreach (var t in pair) {
261            Console.WriteLine($"\t{t}");
262            Console.WriteLine($"\t{grammar.ToInfixString(t.Item2)}");
263            var simplified = grammar.Simplify(ha, t.Item2);
264            Console.WriteLine($"\t{simplified}");
265            Console.Write($"\t");
266            Console.WriteLine();
267          }
268        }
269      }
270    }
271
272    [TestMethod]
273    [TestCategory("TreeHashing")]
274    public void HashPerformance() {
275      var nvars = 4;
276      var variables = Enumerable.Range(0, nvars).Select(x => $"X{x}").ToArray();
277      //var rules = Enum.GetValues(typeof(GrammarRule)).Cast<GrammarRule>().Except(new[] { GrammarRule.InverseTerm, GrammarRule.Exponentiation, GrammarRule.Logarithm, GrammarRule.Sine });
278      var rules = Enum.GetValues(typeof(GrammarRule)).Cast<GrammarRule>();
279      var grammar = new Grammar(variables, rules);
280
281      int maxLength = 20, maxDepth = int.MaxValue;
282      var sentences = EnumerateGrammarDepth(grammar, length: maxLength, depth: maxDepth, hashPhrases: false).ToList();
283      var count = sentences.Count;
284
285      var ha = SHA512.Create();
286
287      var sw = new Stopwatch();
288      sw.Start();
289      var hashes = sentences.Select(x => grammar.ComputeHash(ha, x)).ToList();
290      sw.Stop();
291
292      Console.WriteLine($"New: {sentences.Count} ({hashes.Distinct().Count()}) hashed in {sw.ElapsedMilliseconds / 1000.0} seconds ({1000d * sentences.Count / sw.ElapsedMilliseconds} hashes/s)");
293
294      sw.Restart();
295      var hashes_old = sentences.Select(x => grammar.Hasher.CalcHashCode(x)).ToList();
296      sw.Stop();
297
298      Console.WriteLine($"Old: {sentences.Count} ({hashes_old.Distinct().Count()}) hashed in {sw.ElapsedMilliseconds / 1000.0} seconds ({1000d * sentences.Count / sw.ElapsedMilliseconds} hashes/s)");
299    }
300
301    #region enumerate the grammar
302    private static IEnumerable<SymbolList> EnumerateGrammarBreadth(Grammar grammar, int length, bool hashPhrases = true) {
303      var phrases = new Queue<SymbolList>();
304      phrases.Enqueue(new SymbolList(grammar.StartSymbol));
305      var sentences = new List<SymbolList>();
306      var archive = new HashSet<int>();
307
308      while (phrases.Any()) {
309        var phrase = phrases.Dequeue();
310
311        if (phrase.Count > length)
312          continue;
313
314        if (phrase.IsSentence()) {
315          sentences.Add(phrase);
316          continue;
317        }
318
319        if (hashPhrases && !archive.Add(grammar.Hasher.CalcHashCode(phrase))) {
320          continue;
321        }
322
323        var idx = phrase.NextNonterminalIndex();
324        var productions = grammar.Productions[phrase[idx]];
325        var derived = productions.Select(p => phrase.DerivePhrase(idx, p)).Where(p => p.Count <= length);
326        foreach (var d in derived)
327          phrases.Enqueue(d);
328      }
329      return sentences;
330    }
331
332    private static IEnumerable<SymbolList> EnumerateGrammarDepth(Grammar grammar, int length, int depth, bool hashPhrases = true) {
333      return Expand(new SymbolList(grammar.StartSymbol), grammar, length, 0, depth, hashPhrases ? new HashSet<int>() : null);
334    }
335
336    private static IEnumerable<SymbolList> Expand(SymbolList phrase, Grammar grammar, int maxLength, int depth, int maxDepth, HashSet<int> visited) {
337      if (maxDepth < depth || maxLength < phrase.Count) {
338        yield break;
339      }
340
341      if (phrase.IsSentence()) {
342        yield return phrase;
343        yield break;
344      }
345
346      if (visited != null && !visited.Add(HashExtensions.ComputeHash(grammar, null, phrase))) {
347        yield break;
348      }
349
350      var i = phrase.NextNonterminalIndex();
351      var productions = grammar.Productions[phrase[i]];
352
353      foreach (var s in productions.SelectMany(p => Expand(phrase.DerivePhrase(i, p), grammar, maxLength, depth + 1, maxDepth, visited)))
354        yield return s;
355    }
356    #endregion
357  }
358}
Note: See TracBrowser for help on using the repository browser.