using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration; using HeuristicLab.Common; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Problems.DataAnalysis.Symbolic; namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration { public class Grammar { public Symbol StartSymbol; #region Symbols public VariableSymbol Var; public NonterminalSymbol Expr; public NonterminalSymbol Term; public NonterminalSymbol Factor; public TerminalSymbol Addition; public TerminalSymbol Multiplication; #endregion #region HL Symbols for Parsing ExpressionTrees private TypeCoherentExpressionGrammar symbolicExpressionGrammar; private ISymbol constSy; private ISymbol varSy; private ISymbol addSy; private ISymbol mulSy; private ISymbol logSy; private ISymbol expSy; private ISymbol divSy; private ISymbol rootSy; private ISymbol startSy; #endregion public Grammar(string[] variables) { #region Define Symbols Var = new VariableSymbol("var", variables); Expr = new NonterminalSymbol("Expr"); Term = new NonterminalSymbol("Term"); Factor = new NonterminalSymbol("Factor"); Addition = new TerminalSymbol("+"); Multiplication = new TerminalSymbol("*"); #endregion #region Production rules // order of production is important, since they are accessed via index // in memoization. StartSymbol = Expr; Expr.AddProduction(Term, Expr, Addition); Expr.AddProduction(Term); Term.AddProduction(Factor, Term, Multiplication); Term.AddProduction(Factor); Factor.AddProduction(Var); #endregion #region Parsing to SymbolicExpressionTree symbolicExpressionGrammar = new TypeCoherentExpressionGrammar(); symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar(); constSy = symbolicExpressionGrammar.Symbols.OfType().First(); varSy = symbolicExpressionGrammar.Symbols.OfType().First(); addSy = symbolicExpressionGrammar.AllowedSymbols.OfType().First(); mulSy = symbolicExpressionGrammar.AllowedSymbols.OfType().First(); logSy = symbolicExpressionGrammar.AllowedSymbols.OfType().First(); expSy = symbolicExpressionGrammar.AllowedSymbols.OfType().First(); divSy = symbolicExpressionGrammar.AllowedSymbols.OfType().First(); rootSy = symbolicExpressionGrammar.AllowedSymbols.OfType().First(); startSy = symbolicExpressionGrammar.AllowedSymbols.OfType().First(); #endregion } /* #region Memoize subtrees public void MemoizeSubtrees(SymbolString sentence) { Stack parseStack = new Stack(sentence.OfType()); // Parse root symbol "+" MemoizeSubtreeExpression(parseStack); } private SymbolString MemoizeSubtreeExpression(Stack parseStack) { SymbolString subtree = new SymbolString(); if (ReferenceEquals(parseStack.Peek(), Addition)) { subtree.Add(parseStack.Pop()); subtree.InsertRange(0, MemoizeSubtreeExpression(parseStack)); subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack)); Expr.Alternatives[0].GeneratedSentences.Add(subtree); } else { subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack)); Expr.Alternatives[1].GeneratedSentences.Add(subtree); } return subtree; } private SymbolString MemoizeSubtreeTerm(Stack parseStack) { SymbolString subtree = new SymbolString(); if (ReferenceEquals(parseStack.Peek(), Multiplication)) { subtree.Add(parseStack.Pop()); subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack)); subtree.InsertRange(0, MemoizeSubtreeFactor(parseStack)); Term.Alternatives[0].GeneratedSentences.Add(subtree); } else { subtree.InsertRange(0, MemoizeSubtreeFactor(parseStack)); Term.Alternatives[1].GeneratedSentences.Add(subtree); } return subtree; } private SymbolString MemoizeSubtreeFactor(Stack parseStack) { SymbolString subtree = new SymbolString(MemoizeSubtreeVar(parseStack)); Factor.Alternatives[0].GeneratedSentences.Add(subtree); return subtree; } private SymbolString MemoizeSubtreeVar(Stack parseStack) { SymbolString subtree = new SymbolString(parseStack.Pop().ToEnumerable()); // ... not really //Var.Alternatives[0].GeneratedSentences.Add(subtree); return subtree; } #endregion */ #region Hashing public int CalcHashCode(SymbolString sentence) { Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!"); Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!"); Stack parseStack = new Stack(sentence.OfType()); TerminalSymbol peek = parseStack.Peek(); int[] subtreeHashes = GetSubtreeHashes(parseStack); return AggregateHashes(peek, subtreeHashes); } private int[] GetSubtreeHashes(Stack parseStack) { TerminalSymbol currentSymbol = parseStack.Pop(); // VARIABLE if (Var.VariableTerminalSymbols.Contains(currentSymbol)) { return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray(); } // MULTIPLICATION if (ReferenceEquals(currentSymbol, Multiplication)) { List childHashes = new List(); // First subtree if (ReferenceEquals(parseStack.Peek(), Multiplication)) { childHashes.AddRange(GetSubtreeHashes(parseStack)); } else { childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack))); } // Second subtree if (ReferenceEquals(parseStack.Peek(), Multiplication)) { childHashes.AddRange(GetSubtreeHashes(parseStack)); } else { childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack))); } // Sort due to commutativity childHashes.Sort(); return childHashes.ToArray(); } // ADDITION if (ReferenceEquals(currentSymbol, Addition)) { HashSet uniqueChildHashes = new HashSet(); // First subtree if (ReferenceEquals(parseStack.Peek(), Addition)) { uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack)); } else { var peek = parseStack.Peek(); uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack))); } // Second subtree if (ReferenceEquals(parseStack.Peek(), Addition)) { uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack)); } else { var peek = parseStack.Peek(); uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack))); } var result = uniqueChildHashes.ToList(); result.Sort(); return result.ToArray(); } throw new ArgumentException("Trying to hash malformed sentence!"); } private int AggregateHashes(TerminalSymbol rule, IEnumerable hashes) { // If multiple subtrees are "merged" (e.g. added, multiplied, etc.), consider the executed operation var hashesArray = hashes.ToArray(); int start = hashesArray.Length > 1 ? rule.StringRepresentation.GetHashCode() : 0; return hashesArray.Aggregate(start, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode()); } #endregion #region Parse to SymbolicExpressionTree public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolString sentence) { Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!"); Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!"); symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar(); var rootNode = rootSy.CreateTreeNode(); var startNode = startSy.CreateTreeNode(); rootNode.AddSubtree(startNode); Stack parseStack = new Stack(sentence.OfType()); startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack)); return new SymbolicExpressionTree(rootNode); } public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack parseStack) { TerminalSymbol currentSymbol = parseStack.Pop(); ISymbolicExpressionTreeNode parsedSubTree = null; if (ReferenceEquals(currentSymbol, Addition)) { parsedSubTree = addSy.CreateTreeNode(); parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part } else if (ReferenceEquals(currentSymbol, Multiplication)) { parsedSubTree = mulSy.CreateTreeNode(); parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part } else if (Var.VariableTerminalSymbols.Contains(currentSymbol)) { VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode(); varNode.Weight = 1.0; varNode.VariableName = currentSymbol.StringRepresentation; parsedSubTree = varNode; } Debug.Assert(parsedSubTree != null); return parsedSubTree; } #endregion #region Parse to Infix string public SymbolString PostfixToInfixParser(SymbolString phrase) { Stack parseStack = new Stack(phrase); return PostfixToInfixSubtreeParser(parseStack); } private SymbolString PostfixToInfixSubtreeParser(Stack parseStack) { Symbol head = parseStack.Pop(); SymbolString result = new SymbolString(); if (ReferenceEquals(head, Addition) || ReferenceEquals(head, Multiplication)) { // right part SymbolString rightPart = PostfixToInfixSubtreeParser(parseStack); SymbolString leftPart = PostfixToInfixSubtreeParser(parseStack); result.AddRange(leftPart); result.Add(head); result.AddRange(rightPart); } else { result.Add(head); } return result; } #endregion } }