Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs @ 15817

Last change on this file since 15817 was 15817, checked in by lkammere, 7 years ago

#2886: Add cosine to grammar.

File size: 14.4 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
6using HeuristicLab.Common;
7using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
8using HeuristicLab.Problems.DataAnalysis.Symbolic;
9
10namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
11  public class Grammar {
12
13    public Symbol StartSymbol { get; }
14
15    #region Symbols
16    public VariableSymbol Var;
17
18    public NonterminalSymbol Expr;
19    public NonterminalSymbol Term;
20    public NonterminalSymbol Factor;
21    public NonterminalSymbol LogFactor;
22    public NonterminalSymbol ExpFactor;
23    public NonterminalSymbol SinFactor;
24    public NonterminalSymbol CosFactor;
25
26    public NonterminalSymbol SimpleExpr;
27    public NonterminalSymbol SimpleTerm;
28
29    public NonterminalSymbol InvExpr;
30    public NonterminalSymbol InvTerm;
31
32    public TerminalSymbol Addition;
33    public TerminalSymbol Multiplication;
34    public TerminalSymbol Log;
35    public TerminalSymbol Exp;
36    public TerminalSymbol Sin;
37    public TerminalSymbol Cos;
38    public TerminalSymbol Inv;
39
40    // For infix notation
41    public TerminalSymbol OpeningBracket;
42    public TerminalSymbol ClosingBracket;
43
44    #endregion
45
46    #region HL Symbols for Parsing ExpressionTrees
47    private TypeCoherentExpressionGrammar symbolicExpressionGrammar;
48
49    private ISymbol constSy;
50    private ISymbol varSy;
51
52    private ISymbol addSy;
53    private ISymbol mulSy;
54    private ISymbol logSy;
55    private ISymbol expSy;
56    private ISymbol divSy;
57    private ISymbol sinSy;
58    private ISymbol cosSy;
59   
60    private ISymbol rootSy;
61    private ISymbol startSy;
62    #endregion
63
64    public Grammar(string[] variables) {
65      #region Define Symbols
66      Var = new VariableSymbol("var", variables);
67
68      Expr = new NonterminalSymbol("Expr");
69      Term = new NonterminalSymbol("Term");
70      Factor = new NonterminalSymbol("Factor");
71      LogFactor = new NonterminalSymbol("LogFactor");
72      ExpFactor = new NonterminalSymbol("ExpFactor");
73      SinFactor = new NonterminalSymbol("SinFactor");
74      CosFactor = new NonterminalSymbol("CosFactor");
75         
76      SimpleExpr = new NonterminalSymbol("SimpleExpr");
77      SimpleTerm = new NonterminalSymbol("SimpleTerm");
78
79      InvExpr = new NonterminalSymbol("InvExpr");
80      InvTerm = new NonterminalSymbol("InvTerm");
81
82      Addition = new TerminalSymbol("+");
83      Multiplication = new TerminalSymbol("*");
84      Log = new TerminalSymbol("log");
85      Exp = new TerminalSymbol("exp");
86      Sin = new TerminalSymbol("sin");
87      Cos = new TerminalSymbol("cos");
88      Inv = new TerminalSymbol("inv");
89
90      OpeningBracket = new TerminalSymbol("(");
91      ClosingBracket = new TerminalSymbol(")");
92      #endregion
93
94      #region Production rules
95      StartSymbol = Expr;
96
97      Expr.AddProduction(Term, Expr, Addition);
98      Expr.AddProduction(Term);
99
100      Term.AddProduction(Factor, Term, Multiplication);
101      Term.AddProduction(Factor);
102      Term.AddProduction(InvExpr, Inv);
103
104      Factor.AddProduction(Var);
105      Factor.AddProduction(LogFactor);
106      Factor.AddProduction(ExpFactor);
107      Factor.AddProduction(SinFactor);
108      Factor.AddProduction(CosFactor);
109
110      LogFactor.AddProduction(SimpleExpr, Log);
111      ExpFactor.AddProduction(SimpleTerm, Exp);
112      SinFactor.AddProduction(SimpleExpr, Sin);
113      CosFactor.AddProduction(SimpleExpr, Cos);
114
115      SimpleExpr.AddProduction(SimpleTerm, SimpleExpr, Addition);
116      SimpleExpr.AddProduction(SimpleTerm);
117
118      SimpleTerm.AddProduction(Var, SimpleTerm, Multiplication);
119      SimpleTerm.AddProduction(Var);
120
121      InvExpr.AddProduction(InvTerm, InvExpr, Addition);
122      InvExpr.AddProduction(InvTerm);
123
124      InvTerm.AddProduction(Factor, InvTerm, Multiplication);
125      InvTerm.AddProduction(Factor);
126      #endregion
127
128      #region Parsing to SymbolicExpressionTree
129      symbolicExpressionGrammar = new TypeCoherentExpressionGrammar();
130      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
131
132      constSy = symbolicExpressionGrammar.Symbols.OfType<Constant>().First();
133      varSy = symbolicExpressionGrammar.Symbols.OfType<Variable>().First();
134      addSy = symbolicExpressionGrammar.Symbols.OfType<Addition>().First();
135      mulSy = symbolicExpressionGrammar.Symbols.OfType<Multiplication>().First();
136      logSy = symbolicExpressionGrammar.Symbols.OfType<Logarithm>().First();
137      expSy = symbolicExpressionGrammar.Symbols.OfType<Exponential>().First();
138      divSy = symbolicExpressionGrammar.Symbols.OfType<Division>().First();
139      sinSy = symbolicExpressionGrammar.Symbols.OfType<Sine>().First();
140      cosSy = symbolicExpressionGrammar.Symbols.OfType<Cosine>().First();
141
142      rootSy = symbolicExpressionGrammar.Symbols.OfType<ProgramRootSymbol>().First();
143      startSy = symbolicExpressionGrammar.Symbols.OfType<StartSymbol>().First();
144
145      #endregion
146    }
147
148    #region Hashing
149    public int CalcHashCode(SymbolString sentence) {
150      return CalcHashCode<int>(sentence, AggregateIntHashes);
151    }
152
153    private int CalcHashCode<THashType>(SymbolString sentence, Func<Symbol, THashType[], THashType> aggregateFunction) {
154      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
155
156      Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
157
158      Symbol peek = parseStack.Peek();
159      return aggregateFunction(peek, GetSubtreeHashes(parseStack, aggregateFunction)).GetHashCode();
160    }
161
162    private THashType[] GetSubtreeHashes<THashType>(Stack<Symbol> parseStack, Func<Symbol, THashType[], THashType> aggregateHashes) {
163      Symbol currentSymbol = parseStack.Pop();
164
165      // ADDITION
166      if (ReferenceEquals(currentSymbol, Addition)) {
167        var uniqueChildHashes = new HashSet<THashType>();
168
169        // First subtree
170        if (ReferenceEquals(parseStack.Peek(), Addition)) {
171          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack, aggregateHashes));
172        } else {
173          var peek = parseStack.Peek();
174          uniqueChildHashes.Add(aggregateHashes(peek, GetSubtreeHashes(parseStack, aggregateHashes)));
175        }
176        // Second subtree
177        if (ReferenceEquals(parseStack.Peek(), Addition)) {
178          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack, aggregateHashes));
179        } else {
180          var peek = parseStack.Peek();
181          uniqueChildHashes.Add(aggregateHashes(peek, GetSubtreeHashes(parseStack, aggregateHashes)));
182        }
183
184        var result = uniqueChildHashes.ToArray();
185        Array.Sort(result);
186        return result;
187      }
188
189      // MULTIPLICATION
190      if (ReferenceEquals(currentSymbol, Multiplication)) {
191        var childHashes = new List<THashType>();
192
193        // First subtree
194        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
195          childHashes.AddRange(GetSubtreeHashes(parseStack, aggregateHashes));
196        } else {
197          childHashes.Add(aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes)));
198        }
199        // Second subtree
200        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
201          childHashes.AddRange(GetSubtreeHashes(parseStack, aggregateHashes));
202        } else {
203          childHashes.Add(aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes)));
204        }
205
206        // Sort due to commutativity
207        childHashes.Sort();
208
209        // Cancel out inverse factors.
210        bool[] isFactorRemaining = Enumerable.Repeat(true, childHashes.Count).ToArray();
211
212        for (int i = 0; i < isFactorRemaining.Length; i++) {
213          if (!isFactorRemaining[i]) continue;
214          if (isFactorRemaining.Count() <= 2) break; // Until we have constants, we can't cancel out all terms.
215
216          var currFactor = childHashes[i];
217          var invFactor = aggregateHashes(Inv, new[] { currFactor });
218
219          int indexOfInv = childHashes.IndexOf(invFactor);
220          if (indexOfInv >= 0 && isFactorRemaining[indexOfInv]) {
221            isFactorRemaining[i] = isFactorRemaining[indexOfInv] = false;
222          }
223        }
224        return Enumerable
225          .Range(0, isFactorRemaining.Length)
226          .Where(i => isFactorRemaining[i])
227          .Select(i => childHashes[i])
228          .ToArray();
229      }
230
231      // LOG, EXP, SIN, INV
232      if (ReferenceEquals(currentSymbol, Log) || ReferenceEquals(currentSymbol, Exp) ||
233          ReferenceEquals(currentSymbol, Sin) || ReferenceEquals(currentSymbol, Cos) ||
234          ReferenceEquals(currentSymbol, Inv)) {
235        return new[] { aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes)) };
236      }
237
238      // var or nonterminal symbol
239      return new[] { aggregateHashes(currentSymbol, new THashType[0]) };
240    }
241
242    private string AggregateStringHashes(Symbol operatorSym, string[] hashes) {
243      var hashesArray = hashes.ToArray();
244
245      if ((ReferenceEquals(operatorSym, Addition) || ReferenceEquals(operatorSym, Multiplication)) && hashesArray.Count() <= 1) {
246        return hashesArray[0];
247      }
248      if (operatorSym is NonterminalSymbol || ((TerminalSymbol)operatorSym).IsVariable) {
249        return operatorSym.StringRepresentation;
250      }
251
252      return $"[{hashesArray.Aggregate(operatorSym.StringRepresentation, (result, ti) => string.Concat(result, " ° ", ti))}]";      // TODO: use string join instead of string.Concat
253    }
254
255    private int AggregateIntHashes(Symbol operatorSym, int[] hashes) {
256      int start;
257      if ((ReferenceEquals(operatorSym, Addition) || ReferenceEquals(operatorSym, Multiplication)) &&
258          hashes.Length <= 1) {
259        start = 0;
260
261      } else if (operatorSym is NonterminalSymbol || ((TerminalSymbol)operatorSym).IsVariable) {
262        return operatorSym.StringRepresentation.GetHashCode();
263
264      } else {
265        start = operatorSym.StringRepresentation.GetHashCode();
266      }
267
268      for (int i = 0; i < hashes.Length; i++) {
269        start = ((start << 5) + start) ^ hashes[i];
270      }
271      return start;
272    }
273    #endregion
274
275    #region Parse to SymbolicExpressionTree
276    public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolString sentence) {
277      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
278      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
279
280      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();     // TODO: not necessary to call this for each sentence
281
282      var rootNode = rootSy.CreateTreeNode();
283      var startNode = startSy.CreateTreeNode();
284      rootNode.AddSubtree(startNode);
285
286      Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
287      startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack));
288
289      return new SymbolicExpressionTree(rootNode);
290    }
291
292    public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack<TerminalSymbol> parseStack) {
293      TerminalSymbol currentSymbol = parseStack.Pop();
294
295      ISymbolicExpressionTreeNode parsedSubTree = null;
296
297      if (ReferenceEquals(currentSymbol, Addition)) {
298        parsedSubTree = addSy.CreateTreeNode();
299        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
300        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
301
302      } else if (ReferenceEquals(currentSymbol, Multiplication)) {
303        parsedSubTree = mulSy.CreateTreeNode();
304        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
305        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
306
307      } else if (ReferenceEquals(currentSymbol, Log)) {
308        parsedSubTree = logSy.CreateTreeNode();
309        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
310
311      } else if (ReferenceEquals(currentSymbol, Exp)) {
312        parsedSubTree = expSy.CreateTreeNode();
313        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
314
315      } else if (ReferenceEquals(currentSymbol, Sin)) {
316        parsedSubTree = sinSy.CreateTreeNode();
317        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
318
319      } else if (ReferenceEquals(currentSymbol, Cos)) {
320        parsedSubTree = cosSy.CreateTreeNode();
321        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
322
323      } else if (ReferenceEquals(currentSymbol, Inv)) {
324        parsedSubTree = divSy.CreateTreeNode();
325        ConstantTreeNode dividend = (ConstantTreeNode)constSy.CreateTreeNode();
326        dividend.Value = 1.0;
327        parsedSubTree.AddSubtree(dividend);
328        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
329
330      } else if (currentSymbol.IsVariable) {
331        VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode();
332        varNode.Weight = 1.0;
333        varNode.VariableName = currentSymbol.StringRepresentation;
334        parsedSubTree = varNode;
335      }
336
337      Debug.Assert(parsedSubTree != null);
338      return parsedSubTree;
339    }
340    #endregion
341
342    #region Parse to Infix string
343
344    public SymbolString PostfixToInfixParser(SymbolString phrase) {
345      Stack<Symbol> parseStack = new Stack<Symbol>(phrase);
346
347      return PostfixToInfixSubtreeParser(parseStack);
348    }
349
350    private SymbolString PostfixToInfixSubtreeParser(Stack<Symbol> parseStack) {
351      Symbol head = parseStack.Pop();
352
353      SymbolString result = new SymbolString(parseStack.Count);
354
355      if (ReferenceEquals(head, Addition) || ReferenceEquals(head, Multiplication)) {
356        // right part
357        SymbolString rightPart = PostfixToInfixSubtreeParser(parseStack);
358        SymbolString leftPart = PostfixToInfixSubtreeParser(parseStack);
359
360        result.AddRange(leftPart);
361        result.Add(head);
362        result.AddRange(rightPart);
363
364      } else if (ReferenceEquals(head, Log) || ReferenceEquals(head, Exp)
365              || ReferenceEquals(head, Sin) || ReferenceEquals(head, Cos)
366              || ReferenceEquals(head, Inv)) {
367        result.Add(head);
368        result.Add(OpeningBracket);
369        result.AddRange(PostfixToInfixSubtreeParser(parseStack));
370        result.Add(ClosingBracket);
371
372      } else {
373        result.Add(head);
374      }
375      return result;
376    }
377
378    #endregion
379  }
380}
Note: See TracBrowser for help on using the repository browser.