using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration { [StorableClass] public abstract class Hasher : DeepCloneable { protected Hasher(Grammar grammar) { Grammar = grammar; } protected Hasher(Hasher original, Cloner cloner) : base(original, cloner) { Grammar = cloner.Clone(original.Grammar); } [Storable] protected Grammar Grammar { get; private set; } protected abstract THashType AggregateHashes(Symbol operatorSym, IList hashes); public int CalcHashCode(SymbolString sentence) { Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!"); Stack parseStack = new Stack(sentence); Symbol peek = parseStack.Peek(); return AggregateHashes(peek, GetSubtreeHashes(parseStack)).GetHashCode(); } [StorableConstructor] protected Hasher(bool deserializing) { } private IList GetSubtreeHashes(Stack parseStack) { Symbol currentSymbol = parseStack.Pop(); // ADDITION if (currentSymbol == Grammar.Addition) { return GetAdditionSubtreeHashes(parseStack); } // MULTIPLICATION if (currentSymbol == Grammar.Multiplication) { return GetMultiplicationSubtreeHashes(parseStack); } // LOG, EXP, SIN, INV if (currentSymbol == Grammar.Log || currentSymbol == Grammar.Exp || currentSymbol == Grammar.Sin || currentSymbol == Grammar.Inv) { // Directly aggregate the single subtree return new[] { AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)) }; } // var, const or nonterminal symbol return new[] { AggregateHashes(currentSymbol, new THashType[0]) }; } private void AddAdditionChildHashes(HashSet hashSet, Stack symbolStack) { var peek = symbolStack.Peek(); var hashes = GetSubtreeHashes(symbolStack); // will pop the stack if (peek == Grammar.Addition) hashSet.UnionWith(hashes); else hashSet.Add(AggregateHashes(peek, hashes)); } private IList GetAdditionSubtreeHashes(Stack parseStack) { var uniqueChildHashes = new HashSet(); AddAdditionChildHashes(uniqueChildHashes, parseStack); // first child AddAdditionChildHashes(uniqueChildHashes, parseStack); // second child var result = uniqueChildHashes.ToArray(); Array.Sort(result); return result; } private void AddMultiplicationChildHashes(List hashList, Stack symbolStack) { var peek = symbolStack.Peek(); var hashes = GetSubtreeHashes(symbolStack); if (peek == Grammar.Multiplication) hashList.AddRange(hashes); else hashList.Add(AggregateHashes(peek, hashes)); } public IList GetMultiplicationSubtreeHashes(Stack parseStack) { var childHashes = new List(); AddMultiplicationChildHashes(childHashes, parseStack); // first child AddMultiplicationChildHashes(childHashes, parseStack); // second child // Sort due to commutativity childHashes.Sort(); if (childHashes.Count <= 2) return childHashes; // Cancel out inverse factors. var isInvFactor = new bool[childHashes.Count]; bool foundInvFactor = false; for (int i = 0; i < isInvFactor.Length; i++) { if (isInvFactor[i]) continue; var currFactor = childHashes[i]; var invFactor = AggregateHashes(Grammar.Inv, new[] { currFactor }); int indexOfInv = childHashes.IndexOf(invFactor); if (indexOfInv >= 0 && !isInvFactor[indexOfInv]) { isInvFactor[i] = isInvFactor[indexOfInv] = true; foundInvFactor = true; } } if (!foundInvFactor) return childHashes; return childHashes .Where((c, i) => !isInvFactor[i]) .ToArray(); } } [StorableClass] public class IntHasher : Hasher { public IntHasher(Grammar grammar) : base(grammar) { } public IntHasher(IntHasher original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new IntHasher(this, cloner); } [StorableConstructor] protected IntHasher(bool deserializing) : base(deserializing) { } protected override int AggregateHashes(Symbol operatorSym, IList hashes) { int start; if ((operatorSym == Grammar.Addition || operatorSym == Grammar.Multiplication) && hashes.Count <= 1) { start = 0; } else if (operatorSym is NonterminalSymbol || operatorSym is VariableTerminalSymbol) { return operatorSym.StringRepresentation.GetHashCode(); } else { start = operatorSym.StringRepresentation.GetHashCode(); } for (int i = 0; i < hashes.Count; i++) { start = ((start << 5) + start) ^ hashes[i]; } return start; } } [StorableClass] public class StringHasher : Hasher { public StringHasher(Grammar grammar) : base(grammar) { } public StringHasher(StringHasher original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new StringHasher(this, cloner); } [StorableConstructor] protected StringHasher(bool deserializing) : base(deserializing) { } protected override string AggregateHashes(Symbol operatorSym, IList hashes) { var hashesArray = hashes.ToArray(); if ((operatorSym == Grammar.Addition || operatorSym == Grammar.Multiplication) && hashesArray.Length <= 1) { return hashesArray[0]; } if (operatorSym is NonterminalSymbol || operatorSym is VariableTerminalSymbol) { return operatorSym.StringRepresentation; } return $"[{operatorSym.StringRepresentation} ° {string.Join(" ° ", hashesArray)}]"; } } }