Changeset 16193
- Timestamp:
- 09/28/18 15:22:58 (6 years ago)
- Location:
- branches/2886_SymRegGrammarEnumeration
- Files:
-
- 2 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.csproj
r16053 r16193 128 128 <Compile Include="Hashing\Hasher.cs" /> 129 129 <Compile Include="GrammarEnumeration\SearchDataStructure.cs" /> 130 <Compile Include="Hashing\HashExtensions.cs" /> 131 <Compile Include="Hashing\HashUtil.cs" /> 130 132 <Compile Include="ProblemInstances\AircraftMaximumLift.cs" /> 131 133 <Compile Include="ProblemInstances\FluidDynamics.cs" /> -
branches/2886_SymRegGrammarEnumeration/Test/TreeHashingTest.cs
r16159 r16193 1 1 using System; 2 2 using System.Collections.Generic; 3 using System.Diagnostics; 3 4 using System.Linq; 5 using System.Security.Cryptography; 4 6 using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration; 5 7 using Microsoft.VisualStudio.TestTools.UnitTesting; 8 using HierarchicalFormatter = HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.SymbolicExpressionTreeHierarchicalFormatter; 6 9 7 10 namespace Test { … … 15 18 private TerminalSymbol c; 16 19 20 Func<Grammar, SymbolList, int> ComputeHash = (grammar, sentence) => grammar.ComputeHash(null, sentence); 21 17 22 [TestInitialize] 18 23 public void InitTest() { … … 31 36 SymbolList s2 = new SymbolList(new[] { varA, varB, grammar.Addition, varC, grammar.Addition }); 32 37 33 int hash1 = grammar.Hasher.CalcHashCode(s1);34 int hash2 = grammar.Hasher.CalcHashCode(s2);38 int hash1 = ComputeHash(grammar, s1); 39 int hash2 = ComputeHash(grammar, s2); 35 40 36 41 Assert.AreEqual(hash1, hash2); … … 43 48 SymbolList s2 = new SymbolList(new[] { varB, varB, grammar.Addition, varB, grammar.Addition }); 44 49 45 int hash1 = grammar.Hasher.CalcHashCode(s1);46 int hash2 = grammar.Hasher.CalcHashCode(s2);50 int hash1 = ComputeHash(grammar, s1); 51 int hash2 = ComputeHash(grammar, s2); 47 52 48 53 Assert.AreNotEqual(hash1, hash2); … … 55 60 SymbolList s2 = new SymbolList(new[] { varB, varA, grammar.Addition }); 56 61 57 int hash1 = grammar.Hasher.CalcHashCode(s1);58 int hash2 = grammar.Hasher.CalcHashCode(s2);62 int hash1 = ComputeHash(grammar, s1); 63 int hash2 = ComputeHash(grammar, s2); 59 64 60 65 Assert.AreEqual(hash1, hash2); … … 67 72 SymbolList s2 = new SymbolList(new[] { varA, varB, varA, grammar.Addition, grammar.Addition }); 68 73 69 int hash1 = grammar.Hasher.CalcHashCode(s1);70 int hash2 = grammar.Hasher.CalcHashCode(s2);74 int hash1 = ComputeHash(grammar, s1); 75 int hash2 = ComputeHash(grammar, s2); 71 76 72 77 Assert.AreEqual(hash1, hash2); … … 79 84 SymbolList s2 = new SymbolList(new[] { varA }); 80 85 81 int hash1 = grammar.Hasher.CalcHashCode(s1);82 int hash2 = grammar.Hasher.CalcHashCode(s2);86 int hash1 = ComputeHash(grammar, s1); 87 int hash2 = ComputeHash(grammar, s2); 83 88 84 89 Assert.AreEqual(hash1, hash2); … … 91 96 SymbolList s2 = new SymbolList(new[] { varA, varA, varA, grammar.Multiplication, grammar.Addition }); 92 97 93 int hash1 = grammar.Hasher.CalcHashCode(s1);94 int hash2 = grammar.Hasher.CalcHashCode(s2);98 int hash1 = ComputeHash(grammar, s1); 99 int hash2 = ComputeHash(grammar, s2); 95 100 96 101 Assert.AreNotEqual(hash1, hash2); … … 103 108 SymbolList s2 = new SymbolList(new Symbol[] { varA, grammar.Expr, grammar.Addition }); 104 109 105 int hash1 = grammar.Hasher.CalcHashCode(s1);106 int hash2 = grammar.Hasher.CalcHashCode(s2);110 int hash1 = ComputeHash(grammar, s1); 111 int hash2 = ComputeHash(grammar, s2); 107 112 108 113 Assert.AreEqual(hash1, hash2); … … 117 122 SymbolList s2 = new SymbolList(new Symbol[] { varA, varB, grammar.Multiplication }); 118 123 119 int hash1 = grammar.Hasher.CalcHashCode(s1);120 int hash2 = grammar.Hasher.CalcHashCode(s2);124 int hash1 = ComputeHash(grammar, s1); 125 int hash2 = ComputeHash(grammar, s2); 121 126 122 127 Assert.AreEqual(hash1, hash2); … … 129 134 SymbolList s2 = new SymbolList(new Symbol[] { varA, varA, varA, grammar.Multiplication, grammar.Addition, grammar.Sin, varA, grammar.Inv, varA, grammar.Sin, varA, grammar.Multiplication, grammar.Multiplication, grammar.Addition }); 130 135 131 int hash1 = grammar.Hasher.CalcHashCode(s1); 132 int hash2 = grammar.Hasher.CalcHashCode(s2); 136 int hash1 = ComputeHash(grammar, s1); 137 int hash2 = ComputeHash(grammar, s2); 138 139 Console.WriteLine(s1); 140 Console.WriteLine(PrintTree(s1)); 141 Console.WriteLine(grammar.Simplify(null, s1)); 142 Console.WriteLine(hash1); 143 Console.WriteLine(); 144 Console.WriteLine(s2); 145 Console.WriteLine(PrintTree(s2)); 146 Console.WriteLine(grammar.Simplify(null, s2)); 147 Console.WriteLine(hash2); 148 133 149 134 150 Assert.AreEqual(hash1, hash2); … … 142 158 SymbolList s2 = new SymbolList(new Symbol[] { c, varA, grammar.Multiplication, c, varA, grammar.Multiplication, grammar.Addition, c, grammar.Addition }); 143 159 144 int hash1 = grammar.Hasher.CalcHashCode(s1);145 int hash2 = grammar.Hasher.CalcHashCode(s2);160 int hash1 = ComputeHash(grammar, s1); 161 int hash2 = ComputeHash(grammar, s2); 146 162 147 163 Assert.AreEqual(hash1, hash2); … … 153 169 //const int nvars = 1; 154 170 //var variables = Enumerable.Range(1, nvars).Select(x => $"x{x}").ToArray(); 171 var variables = new[] { "a", "b" }; 172 var grammar = new Grammar(variables, Enum.GetValues(typeof(GrammarRule)).Cast<GrammarRule>()); 173 174 Func<SymbolList, int> hash = s => grammar.Hasher.CalcHashCode(s); 175 int length = 100; 176 int depth = 20; 177 178 //List<SymbolList> sentences = EnumerateGrammarBreadth(grammar, length: length, hashPhrases: false).ToList(); 179 //Console.WriteLine("Breadth: {0} generated, {1} distinct sentences", sentences.Count, sentences.GroupBy(hash).Count()); 180 181 //var sentences = EnumerateGrammarBreadth(grammar, length: length, hashPhrases: true).ToList(); 182 //Console.WriteLine("Breadth (hashed): {0} generated, {1} distinct sentences", sentences.Count, sentences.GroupBy(hash).Count()); 183 184 var sentences = EnumerateGrammarDepth(grammar, length: length, depth: depth, hashPhrases: false).ToList(); 185 Console.WriteLine("Depth: {0} generated, {1} distinct sentences", sentences.Count, sentences.GroupBy(hash).Count()); 186 187 sentences = EnumerateGrammarDepth(grammar, length: length, depth: depth, hashPhrases: true).ToList(); 188 Console.WriteLine("Depth (hashed): {0} generated, {1} distinct sentences", sentences.Count, sentences.GroupBy(hash).Count()); 189 } 190 191 [TestMethod] 192 [TestCategory("TreeHashing")] 193 public void HashExtensionsTest() { 194 var variables = new[] { "x", "y", "z" }; 195 var rules = Enum.GetValues(typeof(GrammarRule)).Cast<GrammarRule>(); 196 var grammar = new Grammar(variables, rules); 197 var add = grammar.Addition; 198 var mul = grammar.Multiplication; 199 var exp = grammar.Exp; 200 var log = grammar.Log; 201 var inv = grammar.Inv; 202 var sin = grammar.Sin; 203 204 var c = grammar.Const; 205 var x = grammar.VarTerminals.Single(v => v.StringRepresentation == "x"); 206 var y = grammar.VarTerminals.Single(v => v.StringRepresentation == "y"); 207 var z = grammar.VarTerminals.Single(v => v.StringRepresentation == "z"); 208 209 210 var ha = SHA512.Create(); 211 212 var sentences = new[] { 213 new SymbolList(c, c, x, mul, c, add, inv, mul, c, add), 214 new SymbolList(c, c, x, mul, c, add, log, mul, c, add), 215 new SymbolList(x, x, add), 216 new SymbolList(x, x, add, x, add), 217 new SymbolList(x, x, add, y, add), 218 new SymbolList(x, x, add, x, add, x, add), 219 new SymbolList(x, x, add, y, add, x, add), 220 new SymbolList(x, y, mul, x, y, mul, add), 221 new SymbolList(x, y, mul, y, x, mul, add), 222 new SymbolList(x, y, mul, z, y, mul, add), 223 new SymbolList(x, x, add, y, mul), 224 new SymbolList(x, x, add, y, mul, y, mul), 225 new SymbolList(x, x, inv, mul), 226 new SymbolList(x, inv, x, inv, mul, x, mul, x, mul), 227 new SymbolList(c, x, x, x, x, x, x, mul, mul, mul, mul, mul, mul, c, x, mul, c, add, add), 228 new SymbolList(c, x, x, x, x, x, x, mul, mul, mul, mul, mul, mul, c, x, mul, c, add, add), 229 new SymbolList(c, x, mul, c, x, x, x, x, x, x, mul, mul, mul, mul, mul, mul, c, add, add), 230 new SymbolList(c, x, x, x, x, mul, mul, mul, mul, c, x, mul, c, x, mul, c, add, add, add), 231 new SymbolList(c, x, mul, c, x, x, x, x, mul, mul, mul, mul, c, x, mul, c, add, add, add) 232 }; 233 234 foreach (var sentence in sentences) { 235 var simplified = grammar.Simplify(ha, sentence); 236 Console.WriteLine($"{sentence} -> {simplified} {grammar.Hasher.CalcHashCode(sentence)} {grammar.ComputeHash(ha, sentence)}"); 237 Console.WriteLine(); 238 } 239 } 240 241 string PrintTree(SymbolList s) { 242 var t = grammar.ParseSymbolicExpressionTree(s); 243 return HierarchicalFormatter.Format(t.Root.GetSubtree(0).GetSubtree(0)); 244 } 245 246 [TestMethod] 247 [TestCategory("TreeHashing")] 248 public void HashCollisionsTest() { 155 249 var variables = new[] { "x", "y", "z", "w" }; 156 var grammar = new Grammar(variables, Enum.GetValues(typeof(GrammarRule)).Cast<GrammarRule>()); 157 158 Func<SymbolList, int> hash = s => grammar.Hasher.CalcHashCode(s); 159 160 List<SymbolList> sentences = EnumerateGrammarBreadth(grammar, length: 20, hashPhrases: false).ToList(); 161 Console.WriteLine("Breadth: {0} generated, {1} distinct sentences", sentences.Count, sentences.GroupBy(hash).Count()); 162 163 sentences = EnumerateGrammarBreadth(grammar, length: 20, hashPhrases: true).ToList(); 164 Console.WriteLine("Breadth (hashed): {0} generated, {1} distinct sentences", sentences.Count, sentences.GroupBy(hash).Count()); 165 166 sentences = EnumerateGrammarDepth(grammar, length: 20, hashPhrases: false).ToList(); 167 Console.WriteLine("Depth: {0} generated, {1} distinct sentences", sentences.Count, sentences.GroupBy(hash).Count()); 168 169 sentences = EnumerateGrammarDepth(grammar, length: 20, hashPhrases: true).ToList(); 170 Console.WriteLine("Depth (hashed): {0} generated, {1} distinct sentences", sentences.Count, sentences.GroupBy(hash).Count()); 171 } 172 250 //var rules = Enum.GetValues(typeof(GrammarRule)).Cast<GrammarRule>().Except(new[] { GrammarRule.InverseTerm, GrammarRule.Exponentiation, GrammarRule.Logarithm, GrammarRule.Sine }); 251 var rules = Enum.GetValues(typeof(GrammarRule)).Cast<GrammarRule>(); 252 var grammar = new Grammar(variables, rules); 253 254 int maxLength = 20, maxDepth = int.MaxValue; 255 var sentences = EnumerateGrammarDepth(grammar, length: maxLength, depth: maxDepth, hashPhrases: false).ToList(); 256 var count = sentences.Count; 257 258 var sw = new Stopwatch(); 259 sw.Start(); 260 var hashes = sentences.Select(grammar.Hasher.CalcHashCode).ToList(); 261 sw.Stop(); 262 Console.WriteLine($"Old hash: {sentences.Count} ({hashes.Distinct().Count()}) hashed in {sw.ElapsedMilliseconds / 1000.0} seconds ({1000d * sentences.Count / sw.ElapsedMilliseconds} hashes/s)"); 263 264 var ha = SHA512.Create(); 265 sw.Restart(); 266 var hashes_new = sentences.Select(x => grammar.ComputeHash(ha, x)).ToList(); 267 sw.Stop(); 268 Console.WriteLine($"New hash: {sentences.Count} ({hashes_new.Distinct().Count()}) hashed in {sw.ElapsedMilliseconds / 1000.0} seconds ({1000.0 * sentences.Count / sw.ElapsedMilliseconds} hashes/s)"); 269 270 var distinct = Enumerable.Range(0, count).GroupBy(x => hashes_new[x]).Select(g => g.OrderBy(x => x).First()).ToList(); 271 var collisions = distinct.ToLookup(x => hashes[x], x => Tuple.Create(hashes_new[x], sentences[x])); 272 273 foreach (var pair in collisions) { 274 if (pair.Count() > 1) { 275 Console.WriteLine(pair.Key); 276 foreach (var t in pair) { 277 Console.WriteLine($"\t{t}"); 278 Console.WriteLine($"\t{grammar.ToInfixString(t.Item2)}"); 279 var simplified = grammar.Simplify(ha, t.Item2); 280 Console.WriteLine($"\t{simplified}"); 281 Console.Write($"\t"); 282 PrintTree(t.Item2); 283 Console.WriteLine(); 284 } 285 } 286 } 287 } 288 289 [TestMethod] 290 [TestCategory("TreeHashing")] 291 public void HashPerformance() { 292 var nvars = 4; 293 var variables = Enumerable.Range(0, nvars).Select(x => $"X{x}").ToArray(); 294 //var rules = Enum.GetValues(typeof(GrammarRule)).Cast<GrammarRule>().Except(new[] { GrammarRule.InverseTerm, GrammarRule.Exponentiation, GrammarRule.Logarithm, GrammarRule.Sine }); 295 var rules = Enum.GetValues(typeof(GrammarRule)).Cast<GrammarRule>(); 296 var grammar = new Grammar(variables, rules); 297 298 int maxLength = 20, maxDepth = int.MaxValue; 299 var sentences = EnumerateGrammarDepth(grammar, length: maxLength, depth: maxDepth, hashPhrases: false).ToList(); 300 var count = sentences.Count; 301 302 var ha = SHA512.Create(); 303 304 var sw = new Stopwatch(); 305 sw.Start(); 306 var hashes = sentences.Select(x => grammar.ComputeHash(ha, x)).ToList(); 307 sw.Stop(); 308 309 Console.WriteLine($"New: {sentences.Count} ({hashes.Distinct().Count()}) hashed in {sw.ElapsedMilliseconds / 1000.0} seconds ({1000d * sentences.Count / sw.ElapsedMilliseconds} hashes/s)"); 310 311 sw.Restart(); 312 var hashes_old = sentences.Select(x => grammar.Hasher.CalcHashCode(x)).ToList(); 313 sw.Stop(); 314 315 Console.WriteLine($"Old: {sentences.Count} ({hashes_old.Distinct().Count()}) hashed in {sw.ElapsedMilliseconds / 1000.0} seconds ({1000d * sentences.Count / sw.ElapsedMilliseconds} hashes/s)"); 316 } 317 318 #region enumerate the grammar 173 319 private static IEnumerable<SymbolList> EnumerateGrammarBreadth(Grammar grammar, int length, bool hashPhrases = true) { 174 320 var phrases = new Queue<SymbolList>(); … … 201 347 } 202 348 203 private static IEnumerable<SymbolList> EnumerateGrammarDepth(Grammar grammar, int length, bool hashPhrases = true) {204 return Expand(new SymbolList(grammar.StartSymbol), grammar, length, hashPhrases ? new HashSet<int>() : null);205 } 206 207 private static IEnumerable<SymbolList> Expand(SymbolList phrase, Grammar grammar, int maxLength, HashSet<int> visited) {208 if ( phrase.Count > maxLength) {349 private static IEnumerable<SymbolList> EnumerateGrammarDepth(Grammar grammar, int length, int depth, bool hashPhrases = true) { 350 return Expand(new SymbolList(grammar.StartSymbol), grammar, length, 0, depth, hashPhrases ? new HashSet<int>() : null); 351 } 352 353 private static IEnumerable<SymbolList> Expand(SymbolList phrase, Grammar grammar, int maxLength, int depth, int maxDepth, HashSet<int> visited) { 354 if (maxDepth < depth || maxLength < phrase.Count) { 209 355 yield break; 210 356 } … … 215 361 } 216 362 217 if (visited != null && !visited.Add( grammar.Hasher.CalcHashCode(phrase))) {363 if (visited != null && !visited.Add(HashExtensions.ComputeHash(grammar, null, phrase))) { 218 364 yield break; 219 365 } … … 222 368 var productions = grammar.Productions[phrase[i]]; 223 369 224 foreach (var s in productions.SelectMany(p => Expand(phrase.DerivePhrase(i, p), grammar, maxLength, visited)))370 foreach (var s in productions.SelectMany(p => Expand(phrase.DerivePhrase(i, p), grammar, maxLength, depth + 1, maxDepth, visited))) 225 371 yield return s; 226 372 } 373 #endregion 227 374 } 228 375 }
Note: See TracChangeset
for help on using the changeset viewer.