using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration; using HeuristicLab.Common; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Problems.DataAnalysis.Symbolic; namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration { public enum GrammarRule { MultipleTerms, MultipleFactors, InverseTerm, Logarithm, Exponentiation, Sine } [StorableClass(StorableClassType.AllFieldsAndAllProperties)] public class Grammar : DeepCloneable { public Symbol StartSymbol { get; private set; } public Hasher Hasher { get; private set; } #region Symbols public IReadOnlyDictionary> Productions { get; private set; } public NonterminalSymbol Var; public IReadOnlyList VarTerminals; public NonterminalSymbol Expr; public NonterminalSymbol Term; public NonterminalSymbol Factor; public NonterminalSymbol LogFactor; public NonterminalSymbol ExpFactor; public NonterminalSymbol SinFactor; public NonterminalSymbol SimpleExpr; public NonterminalSymbol SimpleTerm; public NonterminalSymbol InvExpr; public NonterminalSymbol InvTerm; public TerminalSymbol Addition; public TerminalSymbol Multiplication; public TerminalSymbol Log; public TerminalSymbol Exp; public TerminalSymbol Sin; public TerminalSymbol Inv; public TerminalSymbol Const; #endregion #region HL Symbols for Parsing ExpressionTrees private ISymbol constSy; private ISymbol varSy; private ISymbol addSy; private ISymbol mulSy; private ISymbol logSy; private ISymbol expSy; private ISymbol divSy; private ISymbol sinSy; private ISymbol rootSy; private ISymbol startSy; private InfixExpressionFormatter infixExpressionFormatter; #endregion public Grammar(string[] variables) : this(variables, Enum.GetValues(typeof(GrammarRule)).Cast()) { } [StorableConstructor] protected Grammar(bool deserializing) { InitTreeParser(); } protected Grammar(Grammar original, Cloner cloner) : base(original, cloner) { infixExpressionFormatter = cloner.Clone(original.infixExpressionFormatter); Productions = original.Productions.ToDictionary(x => cloner.Clone(x.Key), x => (IReadOnlyList)x.Value.Select(cloner.Clone).ToList()); VarTerminals = original.VarTerminals.Select(cloner.Clone).ToList(); Var = (NonterminalSymbol)cloner.Clone(original.Var); Expr = (NonterminalSymbol)cloner.Clone(original.Expr); Term = (NonterminalSymbol)cloner.Clone(original.Term); Factor = (NonterminalSymbol)cloner.Clone(original.Factor); LogFactor = (NonterminalSymbol)cloner.Clone(original.LogFactor); ExpFactor = (NonterminalSymbol)cloner.Clone(original.ExpFactor); SinFactor = (NonterminalSymbol)cloner.Clone(original.SinFactor); SimpleExpr = (NonterminalSymbol)cloner.Clone(original.SimpleExpr); SimpleTerm = (NonterminalSymbol)cloner.Clone(original.SimpleTerm); InvExpr = (NonterminalSymbol)cloner.Clone(original.InvExpr); InvTerm = (NonterminalSymbol)cloner.Clone(original.InvTerm); Addition = (TerminalSymbol)cloner.Clone(original.Addition); Multiplication = (TerminalSymbol)cloner.Clone(original.Multiplication); Log = (TerminalSymbol)cloner.Clone(original.Log); Exp = (TerminalSymbol)cloner.Clone(original.Exp); Sin = (TerminalSymbol)cloner.Clone(original.Sin); Inv = (TerminalSymbol)cloner.Clone(original.Inv); Const = (TerminalSymbol)cloner.Clone(original.Const); StartSymbol = Expr; Hasher = cloner.Clone(original.Hasher); InitTreeParser(); // easier this way (and less typing) } private void InitProductions(string[] variables, IEnumerable includedRules) { #region Define Symbols Var = new NonterminalSymbol("Var"); Expr = new NonterminalSymbol("Expr"); Term = new NonterminalSymbol("Term"); Factor = new NonterminalSymbol("Factor"); LogFactor = new NonterminalSymbol("LogFactor"); ExpFactor = new NonterminalSymbol("ExpFactor"); SinFactor = new NonterminalSymbol("SinFactor"); SimpleExpr = new NonterminalSymbol("SimpleExpr"); SimpleTerm = new NonterminalSymbol("SimpleTerm"); InvExpr = new NonterminalSymbol("InvExpr"); InvTerm = new NonterminalSymbol("InvTerm"); Addition = new TerminalSymbol("+"); Multiplication = new TerminalSymbol("*"); Log = new TerminalSymbol("log"); Exp = new TerminalSymbol("exp"); Sin = new TerminalSymbol("sin"); Inv = new TerminalSymbol("inv"); Const = new TerminalSymbol("c"); #endregion #region Production rules StartSymbol = Expr; Dictionary> productions = new Dictionary>(); // Map each variable to a separate production rule of the "Var" nonterminal symbol. VarTerminals = variables.Select(v => new VariableTerminalSymbol(v)).ToArray(); productions[Var] = VarTerminals.Select(v => new Production(v)).ToArray(); // Expression Grammar Rules var exprProductions = new List(); if (includedRules.Contains(GrammarRule.MultipleTerms)) exprProductions.Add(new Production(Const, Term, Multiplication, Expr, Addition)); exprProductions.Add(new Production(Const, Term, Multiplication, Const, Addition)); productions[Expr] = exprProductions.ToArray(); // Term Grammar Rules var termProductions = new List(); if (includedRules.Contains(GrammarRule.MultipleFactors)) termProductions.Add(new Production(Factor, Term, Multiplication)); if (includedRules.Contains(GrammarRule.InverseTerm)) termProductions.Add(new Production(InvExpr, Inv)); termProductions.Add(new Production(Factor)); productions[Term] = termProductions.ToArray(); // Factor Grammar Rules var factorProductions = new List(); factorProductions.Add(new Production(Var)); if (includedRules.Contains(GrammarRule.Logarithm)) factorProductions.Add(new Production(LogFactor)); if (includedRules.Contains(GrammarRule.Exponentiation)) factorProductions.Add(new Production(ExpFactor)); if (includedRules.Contains(GrammarRule.Sine)) factorProductions.Add(new Production(SinFactor)); productions[Factor] = factorProductions.ToArray(); productions[LogFactor] = new[] { new Production(SimpleExpr, Log) }; productions[ExpFactor] = new[] { new Production(Const, SimpleTerm, Multiplication, Exp) }; productions[SinFactor] = new[] { new Production(SimpleExpr, Sin) }; productions[SimpleExpr] = new[] { new Production(Const, SimpleTerm, Multiplication, SimpleExpr, Addition), new Production(Const, SimpleTerm, Multiplication, Const, Addition) }; productions[SimpleTerm] = new[] { new Production(Var, SimpleTerm, Multiplication), new Production(Var) }; productions[InvExpr] = new[] { new Production(Const, InvTerm, Multiplication, InvExpr, Addition), new Production(Const, InvTerm, Multiplication, Const, Addition) }; productions[InvTerm] = new[] { new Production(Factor, InvTerm, Multiplication), new Production(Factor) }; Productions = productions; #endregion } private void InitTreeParser() { #region Parsing to SymbolicExpressionTree var symbolicExpressionGrammar = new TypeCoherentExpressionGrammar(); symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar(); constSy = symbolicExpressionGrammar.Symbols.OfType().First(); varSy = symbolicExpressionGrammar.Symbols.OfType().First(); addSy = symbolicExpressionGrammar.Symbols.OfType().First(); mulSy = symbolicExpressionGrammar.Symbols.OfType().First(); logSy = symbolicExpressionGrammar.Symbols.OfType().First(); expSy = symbolicExpressionGrammar.Symbols.OfType().First(); divSy = symbolicExpressionGrammar.Symbols.OfType().First(); sinSy = symbolicExpressionGrammar.Symbols.OfType().First(); rootSy = symbolicExpressionGrammar.Symbols.OfType().First(); startSy = symbolicExpressionGrammar.Symbols.OfType().First(); infixExpressionFormatter = new InfixExpressionFormatter(); #endregion } public Grammar(string[] variables, IEnumerable includedRules) { InitProductions(variables, includedRules); InitTreeParser(); Hasher = new IntHasher(this); } public int GetComplexity(SymbolString s) { int c = 0; int length = s.Count(); for (int i = 0; i < length; i++) { if (s[i] is NonterminalSymbol || s[i] is VariableTerminalSymbol) c++; } return c; } public double EvaluatePhrase(SymbolString s, IRegressionProblemData problemData, bool optimizeConstants) { SymbolicExpressionTree tree = ParseSymbolicExpressionTree(s); return RSquaredEvaluator.Evaluate(problemData, tree, optimizeConstants); } #region Parse to SymbolicExpressionTree public string ToInfixString(SymbolString sentence) { Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!"); Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!"); return infixExpressionFormatter.Format(ParseSymbolicExpressionTree(sentence)); } public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolString sentence) { Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!"); var rootNode = rootSy.CreateTreeNode(); var startNode = startSy.CreateTreeNode(); rootNode.AddSubtree(startNode); Stack parseStack = new Stack(sentence); startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack)); return new SymbolicExpressionTree(rootNode); } public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack parseStack) { Symbol currentSymbol = parseStack.Pop(); ISymbolicExpressionTreeNode parsedSubTree = null; if (currentSymbol == Addition) { parsedSubTree = addSy.CreateTreeNode(); ISymbolicExpressionTreeNode rightSubtree = ParseSymbolicExpressionTree(parseStack); if (rightSubtree is ConstantTreeNode) { ((ConstantTreeNode)rightSubtree).Value = 0.0; } parsedSubTree.AddSubtree(rightSubtree); // left part ISymbolicExpressionTreeNode leftSubtree = ParseSymbolicExpressionTree(parseStack); if (leftSubtree is ConstantTreeNode) { ((ConstantTreeNode)leftSubtree).Value = 0.0; } parsedSubTree.AddSubtree(leftSubtree); // right part } else if (currentSymbol == Multiplication) { parsedSubTree = mulSy.CreateTreeNode(); parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part } else if (currentSymbol == Log) { parsedSubTree = logSy.CreateTreeNode(); parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); } else if (currentSymbol == Exp) { parsedSubTree = expSy.CreateTreeNode(); parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); } else if (currentSymbol == Sin) { parsedSubTree = sinSy.CreateTreeNode(); parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); } else if (currentSymbol == Inv) { parsedSubTree = divSy.CreateTreeNode(); ConstantTreeNode dividend = (ConstantTreeNode)constSy.CreateTreeNode(); dividend.Value = 1.0; parsedSubTree.AddSubtree(dividend); parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); } else if (currentSymbol == Const) { ConstantTreeNode constNode = (ConstantTreeNode)constSy.CreateTreeNode(); constNode.Value = 1.0; parsedSubTree = constNode; } else if (currentSymbol is VariableTerminalSymbol) { VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode(); varNode.Weight = 1.0; varNode.VariableName = currentSymbol.StringRepresentation; parsedSubTree = varNode; } else if (currentSymbol is NonterminalSymbol) { ConstantTreeNode constNode = (ConstantTreeNode)constSy.CreateTreeNode(); constNode.Value = 0.0; parsedSubTree = constNode; } Debug.Assert(parsedSubTree != null); return parsedSubTree; } #endregion #region abstract DeepCloneable methods public override IDeepCloneable Clone(Cloner cloner) { return new Grammar(this, cloner); } #endregion } }