using System.Collections; using System.Collections.Generic; using System.ComponentModel; using System.Diagnostics; using System.Linq; using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration; using HeuristicLab.Common; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Problems.DataAnalysis.Symbolic; namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration { public class Grammar { public Symbol StartSymbol; #region Symbols public VariableSymbol Var; public NonterminalSymbol Expr; public NonterminalSymbol Term; public NonterminalSymbol Factor; public TerminalSymbol Addition; public TerminalSymbol Multiplication; #endregion #region HL Symbols for Parsing ExpressionTrees private TypeCoherentExpressionGrammar symbolicExpressionGrammar; private ISymbol constSy; private ISymbol varSy; private ISymbol addSy; private ISymbol mulSy; private ISymbol logSy; private ISymbol expSy; private ISymbol divSy; private ISymbol rootSy; private ISymbol startSy; #endregion public Grammar(string[] variables) { #region Define Symbols Var = new VariableSymbol("var", variables); Expr = new NonterminalSymbol("Expr"); Term = new NonterminalSymbol("Term"); Factor = new NonterminalSymbol("Factor"); Addition = new TerminalSymbol("+"); Multiplication = new TerminalSymbol("*"); #endregion #region Production rules // order of production is important, since they are accessed via index // in memoization. StartSymbol = Expr; Expr.AddProduction(Term, Expr, Addition); Expr.AddProduction(Term); Term.AddProduction(Factor, Term, Multiplication); Term.AddProduction(Factor); Factor.AddProduction(Var); #endregion #region Parsing to SymbolicExpressionTree symbolicExpressionGrammar = new TypeCoherentExpressionGrammar(); symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar(); constSy = symbolicExpressionGrammar.Symbols.OfType().First(); varSy = symbolicExpressionGrammar.Symbols.OfType().First(); addSy = symbolicExpressionGrammar.AllowedSymbols.OfType().First(); mulSy = symbolicExpressionGrammar.AllowedSymbols.OfType().First(); logSy = symbolicExpressionGrammar.AllowedSymbols.OfType().First(); expSy = symbolicExpressionGrammar.AllowedSymbols.OfType().First(); divSy = symbolicExpressionGrammar.AllowedSymbols.OfType().First(); rootSy = symbolicExpressionGrammar.AllowedSymbols.OfType().First(); startSy = symbolicExpressionGrammar.AllowedSymbols.OfType().First(); #endregion } /* #region Memoize subtrees public void MemoizeSubtrees(SymbolString sentence) { Stack parseStack = new Stack(sentence.OfType()); // Parse root symbol "+" MemoizeSubtreeExpression(parseStack); } private SymbolString MemoizeSubtreeExpression(Stack parseStack) { SymbolString subtree = new SymbolString(); if (ReferenceEquals(parseStack.Peek(), Addition)) { subtree.Add(parseStack.Pop()); subtree.InsertRange(0, MemoizeSubtreeExpression(parseStack)); subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack)); Expr.Alternatives[0].GeneratedSentences.Add(subtree); } else { subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack)); Expr.Alternatives[1].GeneratedSentences.Add(subtree); } return subtree; } private SymbolString MemoizeSubtreeTerm(Stack parseStack) { SymbolString subtree = new SymbolString(); if (ReferenceEquals(parseStack.Peek(), Multiplication)) { subtree.Add(parseStack.Pop()); subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack)); subtree.InsertRange(0, MemoizeSubtreeFactor(parseStack)); Term.Alternatives[0].GeneratedSentences.Add(subtree); } else { subtree.InsertRange(0, MemoizeSubtreeFactor(parseStack)); Term.Alternatives[1].GeneratedSentences.Add(subtree); } return subtree; } private SymbolString MemoizeSubtreeFactor(Stack parseStack) { SymbolString subtree = new SymbolString(MemoizeSubtreeVar(parseStack)); Factor.Alternatives[0].GeneratedSentences.Add(subtree); return subtree; } private SymbolString MemoizeSubtreeVar(Stack parseStack) { SymbolString subtree = new SymbolString(parseStack.Pop().ToEnumerable()); // ... not really //Var.Alternatives[0].GeneratedSentences.Add(subtree); return subtree; } #endregion */ #region Hashing public int CalcHashCode(SymbolString sentence) { Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!"); Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!"); Stack parseStack = new Stack(sentence.OfType()); int[] subtreeHashes = GetSubtreeHashes(parseStack.Pop(), parseStack); return AggregateHashes(subtreeHashes); } private int[] GetSubtreeHashes(TerminalSymbol currentSymbol, Stack parseStack) { List childHashes = null; if (Var.VariableTerminalSymbols.Contains(currentSymbol)) { childHashes = currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToList(); } else if (ReferenceEquals(currentSymbol, Multiplication)) { // MULTIPLICATION childHashes = new List(); // First subtree TerminalSymbol firstSubtreeRoot = parseStack.Pop(); int[] firstSubtreeHashes = GetSubtreeHashes(firstSubtreeRoot, parseStack); if (ReferenceEquals(firstSubtreeRoot, Multiplication)) { childHashes.AddRange(firstSubtreeHashes); } else { childHashes.Add(AggregateHashes(firstSubtreeHashes)); } // Second subtree TerminalSymbol secondSubtreeRoot = parseStack.Pop(); int[] secondSubtreeHashes = GetSubtreeHashes(secondSubtreeRoot, parseStack); if (ReferenceEquals(secondSubtreeRoot, Multiplication)) { childHashes.AddRange(secondSubtreeHashes); } else { childHashes.Add(AggregateHashes(secondSubtreeHashes)); } // Sort due to commutativity childHashes.Sort(); } else if (ReferenceEquals(currentSymbol, Addition)) { // ADDITION HashSet uniqueChildHashes = new HashSet(); TerminalSymbol firstSubtreeRoot = parseStack.Pop(); int[] firstSubtreeHashes = GetSubtreeHashes(firstSubtreeRoot, parseStack); // First subtree if (ReferenceEquals(firstSubtreeRoot, Addition)) { uniqueChildHashes.UnionWith(firstSubtreeHashes); } else { uniqueChildHashes.Add(AggregateHashes(firstSubtreeHashes)); } // Second subtree TerminalSymbol secondSubtreeRoot = parseStack.Pop(); int[] secondSubtreeHashes = GetSubtreeHashes(secondSubtreeRoot, parseStack); if (ReferenceEquals(secondSubtreeRoot, Addition)) { uniqueChildHashes.UnionWith(secondSubtreeHashes); } else { uniqueChildHashes.Add(AggregateHashes(secondSubtreeHashes)); } childHashes = uniqueChildHashes.ToList(); childHashes.Sort(); } Debug.Assert(childHashes != null, "Sentence not hashable!"); return childHashes.ToArray(); } private int AggregateHashes(IEnumerable hashes) { return hashes.Aggregate(0, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode()); } #endregion #region Parse to SymbolicExpressionTree public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolString sentence) { Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!"); Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!"); symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar(); var rootNode = rootSy.CreateTreeNode(); var startNode = startSy.CreateTreeNode(); rootNode.AddSubtree(startNode); Stack parseStack = new Stack(sentence.OfType()); startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack)); return new SymbolicExpressionTree(rootNode); } public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack parseStack) { TerminalSymbol currentSymbol = parseStack.Pop(); ISymbolicExpressionTreeNode parsedSubTree = null; if (ReferenceEquals(currentSymbol, Addition)) { parsedSubTree = addSy.CreateTreeNode(); parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part } else if (ReferenceEquals(currentSymbol, Multiplication)) { parsedSubTree = mulSy.CreateTreeNode(); parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part } else if (Var.VariableTerminalSymbols.Contains(currentSymbol)) { VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode(); varNode.Weight = 1.0; varNode.VariableName = currentSymbol.StringRepresentation; parsedSubTree = varNode; } Debug.Assert(parsedSubTree != null); return parsedSubTree; } #endregion } }