source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs @ 15772

Last change on this file since 15772 was 15772, checked in by lkammere, 20 months ago

#2886: Extend grammar enumeration algorithm's grammar to exp, log and sine.

File size: 11.0 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
6using HeuristicLab.Common;
7using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
8using HeuristicLab.Problems.DataAnalysis.Symbolic;
9
10namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
11  public class Grammar {
12
13    public Symbol StartSymbol;
14
15    #region Symbols
16
17    public VariableSymbol Var;
18
19    public NonterminalSymbol Expr;
20    public NonterminalSymbol Term;
21    public NonterminalSymbol Factor;
22    public NonterminalSymbol LogFactor;
23    public NonterminalSymbol ExpFactor;
24    public NonterminalSymbol SinFactor;
25
26    public NonterminalSymbol SimpleExpr;
27    public NonterminalSymbol SimpleTerm;
28
29    public TerminalSymbol Addition;
30    public TerminalSymbol Multiplication;
31    public TerminalSymbol Log;
32    public TerminalSymbol Exp;
33    public TerminalSymbol Sin;
34
35    // For infix notation
36    public TerminalSymbol OpeningBracket;
37    public TerminalSymbol ClosingBracket;
38
39    #endregion
40
41
42    #region HL Symbols for Parsing ExpressionTrees
43
44    private TypeCoherentExpressionGrammar symbolicExpressionGrammar;
45
46    private ISymbol constSy;
47    private ISymbol varSy;
48
49    private ISymbol addSy;
50    private ISymbol mulSy;
51    private ISymbol logSy;
52    private ISymbol expSy;
53    private ISymbol divSy;
54    private ISymbol sinSy;
55
56    private ISymbol rootSy;
57    private ISymbol startSy;
58
59    #endregion
60
61    public Grammar(string[] variables) {
62      #region Define Symbols
63      Var = new VariableSymbol("var", variables);
64
65      Expr = new NonterminalSymbol("Expr");
66      Term = new NonterminalSymbol("Term");
67      Factor = new NonterminalSymbol("Factor");
68      LogFactor = new NonterminalSymbol("LogFactor");
69      ExpFactor = new NonterminalSymbol("ExpFactor");
70      SinFactor = new NonterminalSymbol("SinFactor");
71
72      SimpleExpr = new NonterminalSymbol("SimpleExpr");
73      SimpleTerm = new NonterminalSymbol("SimpleTerm");
74
75      Addition = new TerminalSymbol("+");
76      Multiplication = new TerminalSymbol("*");
77      Log = new TerminalSymbol("log");
78      Exp = new TerminalSymbol("exp");
79      Sin = new TerminalSymbol("sin");
80
81      OpeningBracket = new TerminalSymbol("(");
82      ClosingBracket = new TerminalSymbol(")");
83      #endregion
84
85
86      #region Production rules
87      // order of production is important, since they are accessed via index
88      // in memoization.
89      StartSymbol = Expr;
90
91      Expr.AddProduction(Term, Expr, Addition);
92      Expr.AddProduction(Term);
93
94      Term.AddProduction(Factor, Term, Multiplication);
95      Term.AddProduction(Factor);
96
97      Factor.AddProduction(Var);
98      Factor.AddProduction(LogFactor);
99      Factor.AddProduction(ExpFactor);
100      Factor.AddProduction(SinFactor);
101
102      LogFactor.AddProduction(SimpleExpr, Log);
103      ExpFactor.AddProduction(SimpleTerm, Exp);
104      SinFactor.AddProduction(SimpleExpr, Sin);
105
106      SimpleExpr.AddProduction(SimpleTerm, SimpleExpr, Addition);
107      SimpleExpr.AddProduction(SimpleTerm);
108
109      SimpleTerm.AddProduction(Var, SimpleTerm, Multiplication);
110      SimpleTerm.AddProduction(Var);
111      #endregion
112
113      #region Parsing to SymbolicExpressionTree
114      symbolicExpressionGrammar = new TypeCoherentExpressionGrammar();
115      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
116
117      constSy = symbolicExpressionGrammar.Symbols.OfType<Constant>().First();
118      varSy = symbolicExpressionGrammar.Symbols.OfType<Variable>().First();
119      addSy = symbolicExpressionGrammar.Symbols.OfType<Addition>().First();
120      mulSy = symbolicExpressionGrammar.Symbols.OfType<Multiplication>().First();
121      logSy = symbolicExpressionGrammar.Symbols.OfType<Logarithm>().First();
122      expSy = symbolicExpressionGrammar.Symbols.OfType<Exponential>().First();
123      divSy = symbolicExpressionGrammar.Symbols.OfType<Division>().First();
124      sinSy = symbolicExpressionGrammar.Symbols.OfType<Sine>().First();
125
126      rootSy = symbolicExpressionGrammar.Symbols.OfType<ProgramRootSymbol>().First();
127      startSy = symbolicExpressionGrammar.Symbols.OfType<StartSymbol>().First();
128
129      #endregion
130    }
131
132    #region Hashing
133    public int CalcHashCode(SymbolString sentence) {
134      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
135      // Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
136
137      Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
138
139      Symbol peek = parseStack.Peek();
140      int[] subtreeHashes = GetSubtreeHashes(parseStack);
141      return AggregateHashes(peek, subtreeHashes);
142    }
143
144    private int[] GetSubtreeHashes(Stack<Symbol> parseStack) {
145      Symbol currentSymbol = parseStack.Pop();
146
147      // MULTIPLICATION
148      if (ReferenceEquals(currentSymbol, Multiplication)) {
149        List<int> childHashes = new List<int>();
150
151        // First subtree
152        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
153          childHashes.AddRange(GetSubtreeHashes(parseStack));
154        } else {
155          childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
156        }
157        // Second subtree
158        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
159          childHashes.AddRange(GetSubtreeHashes(parseStack));
160        } else {
161          childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
162        }
163
164        // Sort due to commutativity
165        childHashes.Sort();
166        return childHashes.ToArray();
167      }
168
169      // ADDITION
170      if (ReferenceEquals(currentSymbol, Addition)) {
171        HashSet<int> uniqueChildHashes = new HashSet<int>();
172
173        // First subtree
174        if (ReferenceEquals(parseStack.Peek(), Addition)) {
175          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
176        } else {
177          var peek = parseStack.Peek();
178          uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
179        }
180        // Second subtree
181        if (ReferenceEquals(parseStack.Peek(), Addition)) {
182          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
183        } else {
184          var peek = parseStack.Peek();
185          uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
186        }
187
188        var result = uniqueChildHashes.ToList();
189        result.Sort();
190        return result.ToArray();
191      }
192
193      // LOG, EXP, SIN
194      if (ReferenceEquals(currentSymbol, Log) || ReferenceEquals(currentSymbol, Exp) ||
195          ReferenceEquals(currentSymbol, Sin)) {
196        return AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)).ToEnumerable().ToArray();
197      }
198
199      // var or nonterminal symbol
200      return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray();
201    }
202
203    private int AggregateHashes(Symbol operatorSym, IEnumerable<int> hashes) {
204      var hashesArray = hashes.ToArray();
205
206      int start;
207      if (ReferenceEquals(operatorSym, Addition) && hashesArray.Count() <= 1) {
208        start = 0;
209      } else {
210        start = operatorSym.StringRepresentation.GetHashCode();
211      }
212
213      return hashesArray.Aggregate(start, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode());
214    }
215    #endregion
216
217    #region Parse to SymbolicExpressionTree
218    public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolString sentence) {
219      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
220      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
221
222      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
223
224      var rootNode = rootSy.CreateTreeNode();
225      var startNode = startSy.CreateTreeNode();
226      rootNode.AddSubtree(startNode);
227
228      Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
229      startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack));
230
231      return new SymbolicExpressionTree(rootNode);
232    }
233
234    public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack<TerminalSymbol> parseStack) {
235      TerminalSymbol currentSymbol = parseStack.Pop();
236
237      ISymbolicExpressionTreeNode parsedSubTree = null;
238
239      if (ReferenceEquals(currentSymbol, Addition)) {
240        parsedSubTree = addSy.CreateTreeNode();
241        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
242        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
243
244      } else if (ReferenceEquals(currentSymbol, Multiplication)) {
245        parsedSubTree = mulSy.CreateTreeNode();
246        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
247        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
248
249      } else if (ReferenceEquals(currentSymbol, Log)) {
250        parsedSubTree = logSy.CreateTreeNode();
251        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
252
253      } else if (ReferenceEquals(currentSymbol, Exp)) {
254        parsedSubTree = expSy.CreateTreeNode();
255        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
256
257      } else if (ReferenceEquals(currentSymbol, Sin)) {
258        parsedSubTree = sinSy.CreateTreeNode();
259        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
260
261      } else if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
262        VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode();
263        varNode.Weight = 1.0;
264        varNode.VariableName = currentSymbol.StringRepresentation;
265        parsedSubTree = varNode;
266      }
267
268      Debug.Assert(parsedSubTree != null);
269      return parsedSubTree;
270    }
271    #endregion
272
273    #region Parse to Infix string
274
275    public SymbolString PostfixToInfixParser(SymbolString phrase) {
276      Stack<Symbol> parseStack = new Stack<Symbol>(phrase);
277
278      return PostfixToInfixSubtreeParser(parseStack);
279    }
280
281    private SymbolString PostfixToInfixSubtreeParser(Stack<Symbol> parseStack) {
282      Symbol head = parseStack.Pop();
283
284      SymbolString result = new SymbolString();
285
286      if (ReferenceEquals(head, Addition) || ReferenceEquals(head, Multiplication)) {
287        // right part
288        SymbolString rightPart = PostfixToInfixSubtreeParser(parseStack);
289        SymbolString leftPart = PostfixToInfixSubtreeParser(parseStack);
290
291        result.AddRange(leftPart);
292        result.Add(head);
293        result.AddRange(rightPart);
294
295      } else if (ReferenceEquals(head, Log) || ReferenceEquals(head, Exp) || ReferenceEquals(head, Sin)) {
296        result.Add(head);
297        result.Add(OpeningBracket);
298        result.AddRange(PostfixToInfixSubtreeParser(parseStack));
299        result.Add(ClosingBracket);
300
301      } else {
302        result.Add(head);
303      }
304      return result;
305    }
306
307    #endregion
308  }
309}
Note: See TracBrowser for help on using the repository browser.