Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs @ 15734

Last change on this file since 15734 was 15734, checked in by gkronber, 6 years ago

#2886 worked on grammar enumeration

File size: 10.8 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
6using HeuristicLab.Common;
7using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
8using HeuristicLab.Problems.DataAnalysis.Symbolic;
9
10namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
11  public class Grammar {
12
13    public Symbol StartSymbol;
14
15    #region Symbols
16
17    public VariableSymbol Var;
18
19    public NonterminalSymbol Expr;
20    public NonterminalSymbol Term;
21    public NonterminalSymbol Factor;
22
23    public TerminalSymbol Addition;
24    public TerminalSymbol Multiplication;
25
26    #endregion
27
28
29    #region HL Symbols for Parsing ExpressionTrees
30
31    private TypeCoherentExpressionGrammar symbolicExpressionGrammar;
32
33    private ISymbol constSy;
34    private ISymbol varSy;
35
36    private ISymbol addSy;
37    private ISymbol mulSy;
38    private ISymbol logSy;
39    private ISymbol expSy;
40    private ISymbol divSy;
41
42    private ISymbol rootSy;
43    private ISymbol startSy;
44
45    #endregion
46
47    public Grammar(string[] variables) {
48      #region Define Symbols
49      Var = new VariableSymbol("var", variables);
50
51      Expr = new NonterminalSymbol("Expr");
52      Term = new NonterminalSymbol("Term");
53      Factor = new NonterminalSymbol("Factor");
54
55      Addition = new TerminalSymbol("+");
56      Multiplication = new TerminalSymbol("*");
57      #endregion
58
59
60      #region Production rules
61      // order of production is important, since they are accessed via index
62      // in memoization.
63      StartSymbol = Expr;
64
65      Expr.AddProduction(Term, Expr, Addition);
66      Expr.AddProduction(Term);
67
68      Term.AddProduction(Factor, Term, Multiplication);
69      Term.AddProduction(Factor);
70
71      Factor.AddProduction(Var);
72      #endregion
73
74      #region Parsing to SymbolicExpressionTree
75      symbolicExpressionGrammar = new TypeCoherentExpressionGrammar();
76      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
77
78      constSy = symbolicExpressionGrammar.Symbols.OfType<Constant>().First();
79      varSy = symbolicExpressionGrammar.Symbols.OfType<Variable>().First();
80      addSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Addition>().First();
81      mulSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Multiplication>().First();
82      logSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Logarithm>().First();
83      expSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Exponential>().First();
84      divSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Division>().First();
85
86      rootSy = symbolicExpressionGrammar.AllowedSymbols.OfType<ProgramRootSymbol>().First();
87      startSy = symbolicExpressionGrammar.AllowedSymbols.OfType<StartSymbol>().First();
88
89      #endregion
90    }
91
92    /*
93    #region Memoize subtrees
94
95    public void MemoizeSubtrees(SymbolString sentence) {
96      Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
97
98      // Parse root symbol "+"
99      MemoizeSubtreeExpression(parseStack);
100    }
101
102    private SymbolString MemoizeSubtreeExpression(Stack<TerminalSymbol> parseStack) {
103      SymbolString subtree = new SymbolString();
104
105      if (ReferenceEquals(parseStack.Peek(), Addition)) {
106        subtree.Add(parseStack.Pop());
107        subtree.InsertRange(0, MemoizeSubtreeExpression(parseStack));
108        subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack));
109
110        Expr.Alternatives[0].GeneratedSentences.Add(subtree);
111      } else {
112        subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack));
113
114        Expr.Alternatives[1].GeneratedSentences.Add(subtree);
115      }
116
117      return subtree;
118    }
119
120    private SymbolString MemoizeSubtreeTerm(Stack<TerminalSymbol> parseStack) {
121      SymbolString subtree = new SymbolString();
122
123      if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
124        subtree.Add(parseStack.Pop());
125        subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack));
126        subtree.InsertRange(0, MemoizeSubtreeFactor(parseStack));
127
128        Term.Alternatives[0].GeneratedSentences.Add(subtree);
129      } else {
130        subtree.InsertRange(0, MemoizeSubtreeFactor(parseStack));
131
132        Term.Alternatives[1].GeneratedSentences.Add(subtree);
133      }
134
135      return subtree;
136    }
137
138    private SymbolString MemoizeSubtreeFactor(Stack<TerminalSymbol> parseStack) {
139      SymbolString subtree = new SymbolString(MemoizeSubtreeVar(parseStack));
140
141      Factor.Alternatives[0].GeneratedSentences.Add(subtree);
142      return subtree;
143    }
144
145    private SymbolString MemoizeSubtreeVar(Stack<TerminalSymbol> parseStack) {
146      SymbolString subtree = new SymbolString(parseStack.Pop().ToEnumerable());
147
148      // ... not really
149      //Var.Alternatives[0].GeneratedSentences.Add(subtree);
150      return subtree;
151    }
152
153
154    #endregion
155    */
156
157    #region Hashing
158    public int CalcHashCode(SymbolString sentence) {
159      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
160      // Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
161
162      Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
163
164      Symbol peek = parseStack.Peek();
165      int[] subtreeHashes = GetSubtreeHashes(parseStack);
166      return AggregateHashes(peek, subtreeHashes);
167    }
168
169    private int[] GetSubtreeHashes(Stack<Symbol> parseStack) {
170      Symbol currentSymbol = parseStack.Pop();
171
172      // VARIABLE
173      // if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
174      //   return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray();
175      // }
176
177      // MULTIPLICATION
178      if (ReferenceEquals(currentSymbol, Multiplication)) {
179        List<int> childHashes = new List<int>();
180
181        // First subtree
182        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
183          childHashes.AddRange(GetSubtreeHashes(parseStack));
184        } else {
185          childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
186        }
187        // Second subtree
188        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
189          childHashes.AddRange(GetSubtreeHashes(parseStack));
190        } else {
191          childHashes.Add(AggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack)));
192        }
193
194        // Sort due to commutativity
195        childHashes.Sort();
196        return childHashes.ToArray();
197      }
198
199      // ADDITION
200      if (ReferenceEquals(currentSymbol, Addition)) {
201        HashSet<int> uniqueChildHashes = new HashSet<int>();
202
203        // First subtree
204        if (ReferenceEquals(parseStack.Peek(), Addition)) {
205          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
206        } else {
207          var peek = parseStack.Peek();
208          uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
209        }
210        // Second subtree
211        if (ReferenceEquals(parseStack.Peek(), Addition)) {
212          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack));
213        } else {
214          var peek = parseStack.Peek();
215          uniqueChildHashes.Add(AggregateHashes(peek, GetSubtreeHashes(parseStack)));
216        }
217
218        var result = uniqueChildHashes.ToList();
219        result.Sort();
220        return result.ToArray();
221      }
222
223      // var
224      return currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToArray();
225
226
227      // throw new ArgumentException("Trying to hash malformed sentence!");
228    }
229
230    private int AggregateHashes(Symbol operatorSym, IEnumerable<int> hashes) {
231      // If multiple subtrees are "merged" (e.g. added, multiplied, etc.), consider the executed operation
232      var hashesArray = hashes.ToArray();
233      int start = hashesArray.Length > 1 ? operatorSym.StringRepresentation.GetHashCode() : 0;
234      return hashesArray.Aggregate(start, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode());
235    }
236    #endregion
237
238    #region Parse to SymbolicExpressionTree
239    public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolString sentence) {
240      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
241      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
242
243      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
244
245      var rootNode = rootSy.CreateTreeNode();
246      var startNode = startSy.CreateTreeNode();
247      rootNode.AddSubtree(startNode);
248
249      Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
250      startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack));
251
252      return new SymbolicExpressionTree(rootNode);
253    }
254
255    public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack<TerminalSymbol> parseStack) {
256      TerminalSymbol currentSymbol = parseStack.Pop();
257
258      ISymbolicExpressionTreeNode parsedSubTree = null;
259
260      if (ReferenceEquals(currentSymbol, Addition)) {
261        parsedSubTree = addSy.CreateTreeNode();
262        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
263        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
264
265      } else if (ReferenceEquals(currentSymbol, Multiplication)) {
266        parsedSubTree = mulSy.CreateTreeNode();
267        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
268        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
269
270      } else if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
271        VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode();
272        varNode.Weight = 1.0;
273        varNode.VariableName = currentSymbol.StringRepresentation;
274        parsedSubTree = varNode;
275      }
276
277      Debug.Assert(parsedSubTree != null);
278      return parsedSubTree;
279    }
280    #endregion
281
282    #region Parse to Infix string
283
284    public SymbolString PostfixToInfixParser(SymbolString phrase) {
285      Stack<Symbol> parseStack = new Stack<Symbol>(phrase);
286
287      return PostfixToInfixSubtreeParser(parseStack);
288    }
289
290    private SymbolString PostfixToInfixSubtreeParser(Stack<Symbol> parseStack) {
291      Symbol head = parseStack.Pop();
292
293      SymbolString result = new SymbolString();
294
295      if (ReferenceEquals(head, Addition) || ReferenceEquals(head, Multiplication)) {
296        // right part
297        SymbolString rightPart = PostfixToInfixSubtreeParser(parseStack);
298        SymbolString leftPart = PostfixToInfixSubtreeParser(parseStack);
299
300        result.AddRange(leftPart);
301        result.Add(head);
302        result.AddRange(rightPart);
303      } else {
304        result.Add(head);
305      }
306      return result;
307    }
308
309    #endregion
310  }
311}
Note: See TracBrowser for help on using the repository browser.