Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs @ 15772

Last change on this file since 15772 was 15772, checked in by lkammere, 6 years ago

#2886: Extend grammar enumeration algorithm's grammar to exp, log and sine.

File size: 11.0 KB
RevLine 
[15725]1using System;
[15714]2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
6using HeuristicLab.Common;
[15722]7using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
8using HeuristicLab.Problems.DataAnalysis.Symbolic;
[15714]9
10namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
[15712]11  public class Grammar {
12
13    public Symbol StartSymbol;
14
[15714]15    #region Symbols
[15722]16
[15714]17    public VariableSymbol Var;
18
19    public NonterminalSymbol Expr;
20    public NonterminalSymbol Term;
21    public NonterminalSymbol Factor;
[15772]22    public NonterminalSymbol LogFactor;
23    public NonterminalSymbol ExpFactor;
24    public NonterminalSymbol SinFactor;
[15714]25
[15772]26    public NonterminalSymbol SimpleExpr;
27    public NonterminalSymbol SimpleTerm;
28
[15714]29    public TerminalSymbol Addition;
30    public TerminalSymbol Multiplication;
[15772]31    public TerminalSymbol Log;
32    public TerminalSymbol Exp;
33    public TerminalSymbol Sin;
[15722]34
[15772]35    // For infix notation
36    public TerminalSymbol OpeningBracket;
37    public TerminalSymbol ClosingBracket;
38
[15714]39    #endregion
40
41
[15722]42    #region HL Symbols for Parsing ExpressionTrees
43
44    private TypeCoherentExpressionGrammar symbolicExpressionGrammar;
45
46    private ISymbol constSy;
47    private ISymbol varSy;
48
49    private ISymbol addSy;
50    private ISymbol mulSy;
51    private ISymbol logSy;
52    private ISymbol expSy;
53    private ISymbol divSy;
[15772]54    private ISymbol sinSy;
[15722]55
56    private ISymbol rootSy;
57    private ISymbol startSy;
58
59    #endregion
60
[15712]61    public Grammar(string[] variables) {
62      #region Define Symbols
[15714]63      Var = new VariableSymbol("var", variables);
[15712]64
[15714]65      Expr = new NonterminalSymbol("Expr");
66      Term = new NonterminalSymbol("Term");
67      Factor = new NonterminalSymbol("Factor");
[15772]68      LogFactor = new NonterminalSymbol("LogFactor");
69      ExpFactor = new NonterminalSymbol("ExpFactor");
70      SinFactor = new NonterminalSymbol("SinFactor");
[15712]71
[15772]72      SimpleExpr = new NonterminalSymbol("SimpleExpr");
73      SimpleTerm = new NonterminalSymbol("SimpleTerm");
74
[15714]75      Addition = new TerminalSymbol("+");
76      Multiplication = new TerminalSymbol("*");
[15772]77      Log = new TerminalSymbol("log");
78      Exp = new TerminalSymbol("exp");
79      Sin = new TerminalSymbol("sin");
80
81      OpeningBracket = new TerminalSymbol("(");
82      ClosingBracket = new TerminalSymbol(")");
[15712]83      #endregion
84
85
[15723]86      #region Production rules
87      // order of production is important, since they are accessed via index
88      // in memoization.
[15714]89      StartSymbol = Expr;
[15712]90
[15714]91      Expr.AddProduction(Term, Expr, Addition);
92      Expr.AddProduction(Term);
[15712]93
[15714]94      Term.AddProduction(Factor, Term, Multiplication);
95      Term.AddProduction(Factor);
[15712]96
[15714]97      Factor.AddProduction(Var);
[15772]98      Factor.AddProduction(LogFactor);
99      Factor.AddProduction(ExpFactor);
100      Factor.AddProduction(SinFactor);
101
102      LogFactor.AddProduction(SimpleExpr, Log);
103      ExpFactor.AddProduction(SimpleTerm, Exp);
104      SinFactor.AddProduction(SimpleExpr, Sin);
105
106      SimpleExpr.AddProduction(SimpleTerm, SimpleExpr, Addition);
107      SimpleExpr.AddProduction(SimpleTerm);
108
109      SimpleTerm.AddProduction(Var, SimpleTerm, Multiplication);
110      SimpleTerm.AddProduction(Var);
[15712]111      #endregion
[15722]112
113      #region Parsing to SymbolicExpressionTree
114      symbolicExpressionGrammar = new TypeCoherentExpressionGrammar();
115      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
116
117      constSy = symbolicExpressionGrammar.Symbols.OfType<Constant>().First();
118      varSy = symbolicExpressionGrammar.Symbols.OfType<Variable>().First();
[15772]119      addSy = symbolicExpressionGrammar.Symbols.OfType<Addition>().First();
120      mulSy = symbolicExpressionGrammar.Symbols.OfType<Multiplication>().First();
121      logSy = symbolicExpressionGrammar.Symbols.OfType<Logarithm>().First();
122      expSy = symbolicExpressionGrammar.Symbols.OfType<Exponential>().First();
123      divSy = symbolicExpressionGrammar.Symbols.OfType<Division>().First();
124      sinSy = symbolicExpressionGrammar.Symbols.OfType<Sine>().First();
[15722]125
[15772]126      rootSy = symbolicExpressionGrammar.Symbols.OfType<ProgramRootSymbol>().First();
127      startSy = symbolicExpressionGrammar.Symbols.OfType<StartSymbol>().First();
[15722]128
129      #endregion
[15712]130    }
[15714]131
[15723]132    #region Hashing
[15714]133    public int CalcHashCode(SymbolString sentence) {
134      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
[15734]135      // Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
[15714]136
[15734]137      Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
[15714]138
[15734]139      Symbol peek = parseStack.Peek();
[15725]140      int[] subtreeHashes = GetSubtreeHashes(parseStack);
141      return AggregateHashes(peek, subtreeHashes);
[15714]142    }
143
[15734]144    private int[] GetSubtreeHashes(Stack<Symbol> parseStack) {
145      Symbol currentSymbol = parseStack.Pop();
[15714]146
[15725]147      // MULTIPLICATION
148      if (ReferenceEquals(currentSymbol, Multiplication)) {
149        List<int> childHashes = new List<int>();
[15714]150
151        // First subtree
[15725]152        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
153          childHashes.AddRange(GetSubtreeHashes(parseStack));
[15714]154        } else {
[15725]155          childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
[15714]156        }
157        // Second subtree
[15725]158        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
159          childHashes.AddRange(GetSubtreeHashes(parseStack));
[15714]160        } else {
[15725]161          childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
[15714]162        }
163
164        // Sort due to commutativity
165        childHashes.Sort();
[15725]166        return childHashes.ToArray();
167      }
[15714]168
[15725]169      // ADDITION
170      if (ReferenceEquals(currentSymbol, Addition)) {
[15714]171        HashSet<int> uniqueChildHashes = new HashSet<int>();
172
173        // First subtree
[15725]174        if (ReferenceEquals(parseStack.Peek(), Addition)) {
175          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
[15714]176        } else {
[15725]177          var peek = parseStack.Peek();
178          uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
[15714]179        }
180        // Second subtree
[15725]181        if (ReferenceEquals(parseStack.Peek(), Addition)) {
182          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
[15714]183        } else {
[15725]184          var peek = parseStack.Peek();
185          uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
[15714]186        }
187
[15725]188        var result = uniqueChildHashes.ToList();
189        result.Sort();
190        return result.ToArray();
[15714]191      }
[15734]192
[15772]193      // LOG, EXP, SIN
194      if (ReferenceEquals(currentSymbol, Log) || ReferenceEquals(currentSymbol, Exp) ||
195          ReferenceEquals(currentSymbol, Sin)) {
196        return AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)).ToEnumerable().ToArray();
197      }
198
[15746]199      // var or nonterminal symbol
[15734]200      return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray();
[15714]201    }
202
[15734]203    private int AggregateHashes(Symbol operatorSym, IEnumerable<int> hashes) {
[15725]204      var hashesArray = hashes.ToArray();
[15772]205
206      int start;
207      if (ReferenceEquals(operatorSym, Addition) && hashesArray.Count() <= 1) {
208        start = 0;
209      } else {
210        start = operatorSym.StringRepresentation.GetHashCode();
211      }
212
[15725]213      return hashesArray.Aggregate(start, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode());
[15714]214    }
[15722]215    #endregion
216
217    #region Parse to SymbolicExpressionTree
218    public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolString sentence) {
219      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
220      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
221
222      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
223
224      var rootNode = rootSy.CreateTreeNode();
225      var startNode = startSy.CreateTreeNode();
226      rootNode.AddSubtree(startNode);
227
228      Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
229      startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack));
230
231      return new SymbolicExpressionTree(rootNode);
232    }
233
234    public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack<TerminalSymbol> parseStack) {
235      TerminalSymbol currentSymbol = parseStack.Pop();
236
237      ISymbolicExpressionTreeNode parsedSubTree = null;
238
239      if (ReferenceEquals(currentSymbol, Addition)) {
240        parsedSubTree = addSy.CreateTreeNode();
241        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
242        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
243
244      } else if (ReferenceEquals(currentSymbol, Multiplication)) {
245        parsedSubTree = mulSy.CreateTreeNode();
246        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
247        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
248
[15772]249      } else if (ReferenceEquals(currentSymbol, Log)) {
250        parsedSubTree = logSy.CreateTreeNode();
251        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
252
253      } else if (ReferenceEquals(currentSymbol, Exp)) {
254        parsedSubTree = expSy.CreateTreeNode();
255        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
256
257      } else if (ReferenceEquals(currentSymbol, Sin)) {
258        parsedSubTree = sinSy.CreateTreeNode();
259        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
260
[15722]261      } else if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
262        VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode();
263        varNode.Weight = 1.0;
264        varNode.VariableName = currentSymbol.StringRepresentation;
265        parsedSubTree = varNode;
266      }
267
268      Debug.Assert(parsedSubTree != null);
269      return parsedSubTree;
270    }
271    #endregion
[15724]272
273    #region Parse to Infix string
274
275    public SymbolString PostfixToInfixParser(SymbolString phrase) {
276      Stack<Symbol> parseStack = new Stack<Symbol>(phrase);
277
278      return PostfixToInfixSubtreeParser(parseStack);
279    }
280
281    private SymbolString PostfixToInfixSubtreeParser(Stack<Symbol> parseStack) {
282      Symbol head = parseStack.Pop();
283
284      SymbolString result = new SymbolString();
285
286      if (ReferenceEquals(head, Addition) || ReferenceEquals(head, Multiplication)) {
[15725]287        // right part
288        SymbolString rightPart = PostfixToInfixSubtreeParser(parseStack);
289        SymbolString leftPart = PostfixToInfixSubtreeParser(parseStack);
290
291        result.AddRange(leftPart);
[15724]292        result.Add(head);
[15725]293        result.AddRange(rightPart);
[15772]294
295      } else if (ReferenceEquals(head, Log) || ReferenceEquals(head, Exp) || ReferenceEquals(head, Sin)) {
296        result.Add(head);
297        result.Add(OpeningBracket);
298        result.AddRange(PostfixToInfixSubtreeParser(parseStack));
299        result.Add(ClosingBracket);
300
[15724]301      } else {
302        result.Add(head);
303      }
304      return result;
305    }
306
307    #endregion
[15712]308  }
309}
Note: See TracBrowser for help on using the repository browser.