Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs @ 15784

Last change on this file since 15784 was 15784, checked in by lkammere, 6 years ago

#2886: Add basic implementation for inverse factors.

File size: 12.5 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
6using HeuristicLab.Common;
7using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
8using HeuristicLab.Problems.DataAnalysis.Symbolic;
9
10namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
11  public class Grammar {
12
13    public Symbol StartSymbol;
14
15    #region Symbols
16
17    public VariableSymbol Var;
18
19    public NonterminalSymbol Expr;
20    public NonterminalSymbol Term;
21    public NonterminalSymbol Factor;
22    public NonterminalSymbol LogFactor;
23    public NonterminalSymbol ExpFactor;
24    public NonterminalSymbol SinFactor;
25
26    public NonterminalSymbol InvExpr;
27    public NonterminalSymbol InvTerm;
28    public NonterminalSymbol InvFactor;
29
30    public NonterminalSymbol SimpleExpr;
31    public NonterminalSymbol SimpleTerm;
32
33    public TerminalSymbol Addition;
34    public TerminalSymbol Multiplication;
35    public TerminalSymbol Log;
36    public TerminalSymbol Exp;
37    public TerminalSymbol Sin;
38    public TerminalSymbol Inv;
39
40    // For infix notation
41    public TerminalSymbol OpeningBracket;
42    public TerminalSymbol ClosingBracket;
43
44    #endregion
45
46
47    #region HL Symbols for Parsing ExpressionTrees
48
49    private TypeCoherentExpressionGrammar symbolicExpressionGrammar;
50
51    private ISymbol constSy;
52    private ISymbol varSy;
53
54    private ISymbol addSy;
55    private ISymbol mulSy;
56    private ISymbol logSy;
57    private ISymbol expSy;
58    private ISymbol divSy;
59    private ISymbol sinSy;
60
61    private ISymbol rootSy;
62    private ISymbol startSy;
63
64    #endregion
65
66    public Grammar(string[] variables) {
67      #region Define Symbols
68      Var = new VariableSymbol("var", variables);
69
70      Expr = new NonterminalSymbol("Expr");
71      Term = new NonterminalSymbol("Term");
72      Factor = new NonterminalSymbol("Factor");
73      LogFactor = new NonterminalSymbol("LogFactor");
74      ExpFactor = new NonterminalSymbol("ExpFactor");
75      SinFactor = new NonterminalSymbol("SinFactor");
76
77      InvExpr = new NonterminalSymbol("InvExpr");
78      InvTerm = new NonterminalSymbol("InvTerm");
79      InvFactor = new NonterminalSymbol("InvFactor");
80
81      SimpleExpr = new NonterminalSymbol("SimpleExpr");
82      SimpleTerm = new NonterminalSymbol("SimpleTerm");
83
84      Addition = new TerminalSymbol("+");
85      Multiplication = new TerminalSymbol("*");
86      Log = new TerminalSymbol("log");
87      Exp = new TerminalSymbol("exp");
88      Sin = new TerminalSymbol("sin");
89      Inv = new TerminalSymbol("inv");
90
91      OpeningBracket = new TerminalSymbol("(");
92      ClosingBracket = new TerminalSymbol(")");
93      #endregion
94
95
96      #region Production rules
97      // order of production is important, since they are accessed via index
98      // in memoization.
99      StartSymbol = Expr;
100
101      Expr.AddProduction(Term, Expr, Addition);
102      Expr.AddProduction(Term);
103
104      Term.AddProduction(Factor, Term, Multiplication);
105      Term.AddProduction(Factor);
106      Term.AddProduction(InvExpr, Inv);
107
108      Factor.AddProduction(Var);
109      Factor.AddProduction(LogFactor);
110      Factor.AddProduction(ExpFactor);
111      Factor.AddProduction(SinFactor);
112
113      LogFactor.AddProduction(SimpleExpr, Log);
114      ExpFactor.AddProduction(SimpleTerm, Exp);
115      SinFactor.AddProduction(SimpleExpr, Sin);
116
117      SimpleExpr.AddProduction(SimpleTerm, SimpleExpr, Addition);
118      SimpleExpr.AddProduction(SimpleTerm);
119
120      SimpleTerm.AddProduction(Var, SimpleTerm, Multiplication);
121      SimpleTerm.AddProduction(Var);
122
123      InvExpr.AddProduction(InvTerm, InvExpr, Addition);
124      InvExpr.AddProduction(InvTerm);
125
126      InvTerm.AddProduction(InvFactor, InvTerm, Multiplication);
127      InvTerm.AddProduction(InvFactor);
128
129      InvFactor.AddProduction(Var);
130      InvFactor.AddProduction(LogFactor);
131      InvFactor.AddProduction(SinFactor);
132      #endregion
133
134      #region Parsing to SymbolicExpressionTree
135      symbolicExpressionGrammar = new TypeCoherentExpressionGrammar();
136      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
137
138      constSy = symbolicExpressionGrammar.Symbols.OfType<Constant>().First();
139      varSy = symbolicExpressionGrammar.Symbols.OfType<Variable>().First();
140      addSy = symbolicExpressionGrammar.Symbols.OfType<Addition>().First();
141      mulSy = symbolicExpressionGrammar.Symbols.OfType<Multiplication>().First();
142      logSy = symbolicExpressionGrammar.Symbols.OfType<Logarithm>().First();
143      expSy = symbolicExpressionGrammar.Symbols.OfType<Exponential>().First();
144      divSy = symbolicExpressionGrammar.Symbols.OfType<Division>().First();
145      sinSy = symbolicExpressionGrammar.Symbols.OfType<Sine>().First();
146
147      rootSy = symbolicExpressionGrammar.Symbols.OfType<ProgramRootSymbol>().First();
148      startSy = symbolicExpressionGrammar.Symbols.OfType<StartSymbol>().First();
149
150      #endregion
151    }
152
153    #region Hashing
154    public int CalcHashCode(SymbolString sentence) {
155      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
156      // Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
157
158      Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
159
160      Symbol peek = parseStack.Peek();
161      int[] subtreeHashes = GetSubtreeHashes(parseStack);
162      return AggregateHashes(peek, subtreeHashes);
163    }
164
165    private int[] GetSubtreeHashes(Stack<Symbol> parseStack) {
166      Symbol currentSymbol = parseStack.Pop();
167
168      // ADDITION
169      if (ReferenceEquals(currentSymbol, Addition)) {
170        HashSet<int> uniqueChildHashes = new HashSet<int>();
171
172        // First subtree
173        if (ReferenceEquals(parseStack.Peek(), Addition)) {
174          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
175        } else {
176          var peek = parseStack.Peek();
177          uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
178        }
179        // Second subtree
180        if (ReferenceEquals(parseStack.Peek(), Addition)) {
181          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
182        } else {
183          var peek = parseStack.Peek();
184          uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
185        }
186
187        var result = uniqueChildHashes.ToList();
188        result.Sort();
189        return result.ToArray();
190      }
191
192      // MULTIPLICATION
193      if (ReferenceEquals(currentSymbol, Multiplication)) {
194        List<int> childHashes = new List<int>();
195
196        // First subtree
197        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
198          childHashes.AddRange(GetSubtreeHashes(parseStack));
199        } else {
200          childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
201        }
202        // Second subtree
203        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
204          childHashes.AddRange(GetSubtreeHashes(parseStack));
205        } else {
206          childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
207        }
208
209        // Sort due to commutativity
210        childHashes.Sort();
211
212        // Cancel out inverse factors.
213        var inversChildHashes = childHashes
214          .Select(ch => AggregateHashes(Inv, ch.ToEnumerable()))
215          .ToList();
216
217        return childHashes // If this factor cancels out another one, then remove BOTH. Otherwise, keep the factor.
218          .Where(ch => !inversChildHashes.Remove(ch))
219          .ToArray();
220      }
221
222      // LOG, EXP, SIN, INV
223      if (ReferenceEquals(currentSymbol, Log) || ReferenceEquals(currentSymbol, Exp) ||
224          ReferenceEquals(currentSymbol, Sin) || ReferenceEquals(currentSymbol, Inv)) {
225        return AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)).ToEnumerable().ToArray();
226      }
227
228      // var or nonterminal symbol
229      return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray();
230    }
231
232    private int AggregateHashes(Symbol operatorSym, IEnumerable<int> hashes) {
233      var hashesArray = hashes.ToArray();
234
235      int start;
236      if ((ReferenceEquals(operatorSym, Addition) || ReferenceEquals(operatorSym, Multiplication)) && hashesArray.Count() <= 1) {
237        start = 0;
238      } else {
239        start = operatorSym.StringRepresentation.GetHashCode();
240      }
241
242      return hashesArray.Aggregate(start, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode());
243    }
244    #endregion
245
246    #region Parse to SymbolicExpressionTree
247    public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolString sentence) {
248      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
249      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
250
251      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
252
253      var rootNode = rootSy.CreateTreeNode();
254      var startNode = startSy.CreateTreeNode();
255      rootNode.AddSubtree(startNode);
256
257      Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
258      startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack));
259
260      return new SymbolicExpressionTree(rootNode);
261    }
262
263    public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack<TerminalSymbol> parseStack) {
264      TerminalSymbol currentSymbol = parseStack.Pop();
265
266      ISymbolicExpressionTreeNode parsedSubTree = null;
267
268      if (ReferenceEquals(currentSymbol, Addition)) {
269        parsedSubTree = addSy.CreateTreeNode();
270        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
271        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
272
273      } else if (ReferenceEquals(currentSymbol, Multiplication)) {
274        parsedSubTree = mulSy.CreateTreeNode();
275        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
276        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
277
278      } else if (ReferenceEquals(currentSymbol, Log)) {
279        parsedSubTree = logSy.CreateTreeNode();
280        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
281
282      } else if (ReferenceEquals(currentSymbol, Exp)) {
283        parsedSubTree = expSy.CreateTreeNode();
284        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
285
286      } else if (ReferenceEquals(currentSymbol, Sin)) {
287        parsedSubTree = sinSy.CreateTreeNode();
288        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
289
290      } else if (ReferenceEquals(currentSymbol, Inv)) {
291        parsedSubTree = divSy.CreateTreeNode();
292        ConstantTreeNode dividend = (ConstantTreeNode)constSy.CreateTreeNode();
293        dividend.Value = 1.0;
294        parsedSubTree.AddSubtree(dividend);
295        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
296
297      } else if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
298        VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode();
299        varNode.Weight = 1.0;
300        varNode.VariableName = currentSymbol.StringRepresentation;
301        parsedSubTree = varNode;
302      }
303
304      Debug.Assert(parsedSubTree != null);
305      return parsedSubTree;
306    }
307    #endregion
308
309    #region Parse to Infix string
310
311    public SymbolString PostfixToInfixParser(SymbolString phrase) {
312      Stack<Symbol> parseStack = new Stack<Symbol>(phrase);
313
314      return PostfixToInfixSubtreeParser(parseStack);
315    }
316
317    private SymbolString PostfixToInfixSubtreeParser(Stack<Symbol> parseStack) {
318      Symbol head = parseStack.Pop();
319
320      SymbolString result = new SymbolString();
321
322      if (ReferenceEquals(head, Addition) || ReferenceEquals(head, Multiplication)) {
323        // right part
324        SymbolString rightPart = PostfixToInfixSubtreeParser(parseStack);
325        SymbolString leftPart = PostfixToInfixSubtreeParser(parseStack);
326
327        result.AddRange(leftPart);
328        result.Add(head);
329        result.AddRange(rightPart);
330
331      } else if (ReferenceEquals(head, Log) || ReferenceEquals(head, Exp)
332              || ReferenceEquals(head, Sin) || ReferenceEquals(head, Inv)) {
333        result.Add(head);
334        result.Add(OpeningBracket);
335        result.AddRange(PostfixToInfixSubtreeParser(parseStack));
336        result.Add(ClosingBracket);
337
338      } else {
339        result.Add(head);
340      }
341      return result;
342    }
343
344    #endregion
345  }
346}
Note: See TracBrowser for help on using the repository browser.