using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration; using HeuristicLab.Common; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Problems.DataAnalysis.Symbolic; namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration { public class Grammar { public Symbol StartSymbol { get; } #region Symbols public VariableSymbol Var; public NonterminalSymbol Expr; public NonterminalSymbol Term; public NonterminalSymbol Factor; public NonterminalSymbol LogFactor; public NonterminalSymbol ExpFactor; public NonterminalSymbol SinFactor; public NonterminalSymbol SimpleExpr; public NonterminalSymbol SimpleTerm; public NonterminalSymbol InvExpr; public NonterminalSymbol InvTerm; public TerminalSymbol Addition; public TerminalSymbol Multiplication; public TerminalSymbol Log; public TerminalSymbol Exp; public TerminalSymbol Sin; public TerminalSymbol Inv; // For infix notation public TerminalSymbol OpeningBracket; public TerminalSymbol ClosingBracket; #endregion #region HL Symbols for Parsing ExpressionTrees private TypeCoherentExpressionGrammar symbolicExpressionGrammar; private ISymbol constSy; private ISymbol varSy; private ISymbol addSy; private ISymbol mulSy; private ISymbol logSy; private ISymbol expSy; private ISymbol divSy; private ISymbol sinSy; private ISymbol rootSy; private ISymbol startSy; #endregion public Grammar(string[] variables) { #region Define Symbols Var = new VariableSymbol("var", variables); Expr = new NonterminalSymbol("Expr"); Term = new NonterminalSymbol("Term"); Factor = new NonterminalSymbol("Factor"); LogFactor = new NonterminalSymbol("LogFactor"); ExpFactor = new NonterminalSymbol("ExpFactor"); SinFactor = new NonterminalSymbol("SinFactor"); SimpleExpr = new NonterminalSymbol("SimpleExpr"); SimpleTerm = new NonterminalSymbol("SimpleTerm"); InvExpr = new NonterminalSymbol("InvExpr"); InvTerm = new NonterminalSymbol("InvTerm"); Addition = new TerminalSymbol("+"); Multiplication = new TerminalSymbol("*"); Log = new TerminalSymbol("log"); Exp = new TerminalSymbol("exp"); Sin = new TerminalSymbol("sin"); Inv = new TerminalSymbol("inv"); OpeningBracket = new TerminalSymbol("("); ClosingBracket = new TerminalSymbol(")"); #endregion #region Production rules StartSymbol = Expr; Expr.AddProduction(Term, Expr, Addition); Expr.AddProduction(Term); Term.AddProduction(Factor, Term, Multiplication); Term.AddProduction(Factor); Term.AddProduction(InvExpr, Inv); Factor.AddProduction(Var); Factor.AddProduction(LogFactor); Factor.AddProduction(ExpFactor); Factor.AddProduction(SinFactor); LogFactor.AddProduction(SimpleExpr, Log); ExpFactor.AddProduction(SimpleTerm, Exp); SinFactor.AddProduction(SimpleExpr, Sin); SimpleExpr.AddProduction(SimpleTerm, SimpleExpr, Addition); SimpleExpr.AddProduction(SimpleTerm); SimpleTerm.AddProduction(Var, SimpleTerm, Multiplication); SimpleTerm.AddProduction(Var); InvExpr.AddProduction(InvTerm, InvExpr, Addition); InvExpr.AddProduction(InvTerm); InvTerm.AddProduction(Factor, InvTerm, Multiplication); InvTerm.AddProduction(Factor); #endregion #region Parsing to SymbolicExpressionTree symbolicExpressionGrammar = new TypeCoherentExpressionGrammar(); symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar(); constSy = symbolicExpressionGrammar.Symbols.OfType().First(); varSy = symbolicExpressionGrammar.Symbols.OfType().First(); addSy = symbolicExpressionGrammar.Symbols.OfType().First(); mulSy = symbolicExpressionGrammar.Symbols.OfType().First(); logSy = symbolicExpressionGrammar.Symbols.OfType().First(); expSy = symbolicExpressionGrammar.Symbols.OfType().First(); divSy = symbolicExpressionGrammar.Symbols.OfType().First(); sinSy = symbolicExpressionGrammar.Symbols.OfType().First(); rootSy = symbolicExpressionGrammar.Symbols.OfType().First(); startSy = symbolicExpressionGrammar.Symbols.OfType().First(); #endregion } #region Hashing public int CalcHashCode(SymbolString sentence) { return CalcHashCode(sentence, AggregateIntHashes); } private int CalcHashCode(SymbolString sentence, Func, THashType> aggregateFunction) { Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!"); Stack parseStack = new Stack(sentence); Symbol peek = parseStack.Peek(); return aggregateFunction(peek, GetSubtreeHashes(parseStack, aggregateFunction)).GetHashCode(); } private THashType[] GetSubtreeHashes(Stack parseStack, Func, THashType> aggregateHashes) { Symbol currentSymbol = parseStack.Pop(); // ADDITION if (ReferenceEquals(currentSymbol, Addition)) { var uniqueChildHashes = new HashSet(); // First subtree if (ReferenceEquals(parseStack.Peek(), Addition)) { uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack, aggregateHashes)); } else { var peek = parseStack.Peek(); uniqueChildHashes.Add(aggregateHashes(peek, GetSubtreeHashes(parseStack, aggregateHashes))); } // Second subtree if (ReferenceEquals(parseStack.Peek(), Addition)) { uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack, aggregateHashes)); } else { var peek = parseStack.Peek(); uniqueChildHashes.Add(aggregateHashes(peek, GetSubtreeHashes(parseStack, aggregateHashes))); } var result = uniqueChildHashes.ToArray(); Array.Sort(result); return result; } // MULTIPLICATION if (ReferenceEquals(currentSymbol, Multiplication)) { var childHashes = new List(); // First subtree if (ReferenceEquals(parseStack.Peek(), Multiplication)) { childHashes.AddRange(GetSubtreeHashes(parseStack, aggregateHashes)); } else { childHashes.Add(aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes))); } // Second subtree if (ReferenceEquals(parseStack.Peek(), Multiplication)) { childHashes.AddRange(GetSubtreeHashes(parseStack, aggregateHashes)); } else { childHashes.Add(aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes))); } // Sort due to commutativity childHashes.Sort(); // Cancel out inverse factors. bool[] isFactorRemaining = Enumerable.Repeat(true, childHashes.Count).ToArray(); for (int i = 0; i < isFactorRemaining.Length; i++) { if (!isFactorRemaining[i]) continue; if (isFactorRemaining.Count() <= 2) break; // Until we have constants, we can't cancel out all terms. var currFactor = childHashes[i]; var invFactor = aggregateHashes(Inv, currFactor.ToEnumerable()); int indexOfInv = childHashes.IndexOf(invFactor); if (indexOfInv >= 0 && isFactorRemaining[indexOfInv]) { isFactorRemaining[i] = isFactorRemaining[indexOfInv] = false; } } return Enumerable .Range(0, isFactorRemaining.Length) .Where(i => isFactorRemaining[i]) .Select(i => childHashes[i]) .ToArray(); } // LOG, EXP, SIN, INV if (ReferenceEquals(currentSymbol, Log) || ReferenceEquals(currentSymbol, Exp) || ReferenceEquals(currentSymbol, Sin) || ReferenceEquals(currentSymbol, Inv)) { return new[] { aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes)) }; } // var or nonterminal symbol return new[] { aggregateHashes(currentSymbol, Enumerable.Empty()) }; } private string AggregateStringHashes(Symbol operatorSym, IEnumerable hashes) { var hashesArray = hashes.ToArray(); if ((ReferenceEquals(operatorSym, Addition) || ReferenceEquals(operatorSym, Multiplication)) && hashesArray.Count() <= 1) { return hashesArray[0]; } if (operatorSym is NonterminalSymbol || Var.VariableTerminalSymbols.Contains(operatorSym)) { return operatorSym.StringRepresentation; } return $"[{hashesArray.Aggregate(operatorSym.StringRepresentation, (result, ti) => string.Concat(result, " ° ", ti))}]"; // TODO: use string join instead of string.Concat } private int AggregateIntHashes(Symbol operatorSym, IEnumerable hashes) { var hashesArray = hashes.ToArray(); int start; if ((ReferenceEquals(operatorSym, Addition) || ReferenceEquals(operatorSym, Multiplication)) && hashesArray.Count() <= 1) { start = 0; } else if (operatorSym is NonterminalSymbol || Var.VariableTerminalSymbols.Contains(operatorSym)) { return operatorSym.StringRepresentation.GetHashCode(); } else { start = operatorSym.StringRepresentation.GetHashCode(); } return hashesArray.Aggregate(start, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode()); } #endregion #region Parse to SymbolicExpressionTree public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolString sentence) { Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!"); Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!"); symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar(); // TODO: not necessary to call this for each sentence var rootNode = rootSy.CreateTreeNode(); var startNode = startSy.CreateTreeNode(); rootNode.AddSubtree(startNode); Stack parseStack = new Stack(sentence.OfType()); startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack)); return new SymbolicExpressionTree(rootNode); } public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack parseStack) { TerminalSymbol currentSymbol = parseStack.Pop(); ISymbolicExpressionTreeNode parsedSubTree = null; if (ReferenceEquals(currentSymbol, Addition)) { parsedSubTree = addSy.CreateTreeNode(); parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part } else if (ReferenceEquals(currentSymbol, Multiplication)) { parsedSubTree = mulSy.CreateTreeNode(); parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part } else if (ReferenceEquals(currentSymbol, Log)) { parsedSubTree = logSy.CreateTreeNode(); parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); } else if (ReferenceEquals(currentSymbol, Exp)) { parsedSubTree = expSy.CreateTreeNode(); parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); } else if (ReferenceEquals(currentSymbol, Sin)) { parsedSubTree = sinSy.CreateTreeNode(); parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); } else if (ReferenceEquals(currentSymbol, Inv)) { parsedSubTree = divSy.CreateTreeNode(); ConstantTreeNode dividend = (ConstantTreeNode)constSy.CreateTreeNode(); dividend.Value = 1.0; parsedSubTree.AddSubtree(dividend); parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); } else if (Var.VariableTerminalSymbols.Contains(currentSymbol)) { VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode(); varNode.Weight = 1.0; varNode.VariableName = currentSymbol.StringRepresentation; parsedSubTree = varNode; } Debug.Assert(parsedSubTree != null); return parsedSubTree; } #endregion #region Parse to Infix string public SymbolString PostfixToInfixParser(SymbolString phrase) { Stack parseStack = new Stack(phrase); return PostfixToInfixSubtreeParser(parseStack); } private SymbolString PostfixToInfixSubtreeParser(Stack parseStack) { Symbol head = parseStack.Pop(); SymbolString result = new SymbolString(); if (ReferenceEquals(head, Addition) || ReferenceEquals(head, Multiplication)) { // right part SymbolString rightPart = PostfixToInfixSubtreeParser(parseStack); SymbolString leftPart = PostfixToInfixSubtreeParser(parseStack); result.AddRange(leftPart); result.Add(head); result.AddRange(rightPart); } else if (ReferenceEquals(head, Log) || ReferenceEquals(head, Exp) || ReferenceEquals(head, Sin) || ReferenceEquals(head, Inv)) { result.Add(head); result.Add(OpeningBracket); result.AddRange(PostfixToInfixSubtreeParser(parseStack)); result.Add(ClosingBracket); } else { result.Add(head); } return result; } #endregion } }