Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs @ 15827

Last change on this file since 15827 was 15827, checked in by lkammere, 6 years ago

#2886: Change implementation of symbol strings from list to array.

File size: 13.3 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
6using HeuristicLab.Common;
7using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
8using HeuristicLab.Problems.DataAnalysis.Symbolic;
9
10namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
11  public class Grammar {
12
13    public Symbol StartSymbol { get; }
14
15    #region Symbols
16    public VariableSymbol Var;
17
18    public NonterminalSymbol Expr;
19    public NonterminalSymbol Term;
20    public NonterminalSymbol Factor;
21    public NonterminalSymbol LogFactor;
22    public NonterminalSymbol ExpFactor;
23    public NonterminalSymbol SinFactor;
24    public NonterminalSymbol CosFactor;
25
26    public NonterminalSymbol SimpleExpr;
27    public NonterminalSymbol SimpleTerm;
28
29    public NonterminalSymbol InvExpr;
30    public NonterminalSymbol InvTerm;
31
32    public TerminalSymbol Addition;
33    public TerminalSymbol Multiplication;
34    public TerminalSymbol Log;
35    public TerminalSymbol Exp;
36    public TerminalSymbol Sin;
37    public TerminalSymbol Cos;
38    public TerminalSymbol Inv;
39
40    // For infix notation
41    public TerminalSymbol OpeningBracket;
42    public TerminalSymbol ClosingBracket;
43
44    #endregion
45
46    #region HL Symbols for Parsing ExpressionTrees
47
48    private ISymbol constSy;
49    private ISymbol varSy;
50
51    private ISymbol addSy;
52    private ISymbol mulSy;
53    private ISymbol logSy;
54    private ISymbol expSy;
55    private ISymbol divSy;
56    private ISymbol sinSy;
57    private ISymbol cosSy;
58
59    private ISymbol rootSy;
60    private ISymbol startSy;
61
62    private InfixExpressionFormatter infixExpressionFormatter;
63    #endregion
64
65    public Grammar(string[] variables) {
66      #region Define Symbols
67      Var = new VariableSymbol("var", variables);
68
69      Expr = new NonterminalSymbol("Expr");
70      Term = new NonterminalSymbol("Term");
71      Factor = new NonterminalSymbol("Factor");
72      LogFactor = new NonterminalSymbol("LogFactor");
73      ExpFactor = new NonterminalSymbol("ExpFactor");
74      SinFactor = new NonterminalSymbol("SinFactor");
75      CosFactor = new NonterminalSymbol("CosFactor");
76
77      SimpleExpr = new NonterminalSymbol("SimpleExpr");
78      SimpleTerm = new NonterminalSymbol("SimpleTerm");
79
80      InvExpr = new NonterminalSymbol("InvExpr");
81      InvTerm = new NonterminalSymbol("InvTerm");
82
83      Addition = new TerminalSymbol("+");
84      Multiplication = new TerminalSymbol("*");
85      Log = new TerminalSymbol("log");
86      Exp = new TerminalSymbol("exp");
87      Sin = new TerminalSymbol("sin");
88      Cos = new TerminalSymbol("cos");
89      Inv = new TerminalSymbol("inv");
90
91      OpeningBracket = new TerminalSymbol("(");
92      ClosingBracket = new TerminalSymbol(")");
93      #endregion
94
95      #region Production rules
96      StartSymbol = Expr;
97
98      Expr.AddProduction(Term, Expr, Addition);
99      Expr.AddProduction(Term);
100
101      Term.AddProduction(Factor, Term, Multiplication);
102      Term.AddProduction(Factor);
103      Term.AddProduction(InvExpr, Inv);
104
105      Factor.AddProduction(Var);
106      Factor.AddProduction(LogFactor);
107      Factor.AddProduction(ExpFactor);
108      Factor.AddProduction(SinFactor);
109      Factor.AddProduction(CosFactor);
110
111      LogFactor.AddProduction(SimpleExpr, Log);
112      ExpFactor.AddProduction(SimpleTerm, Exp);
113      SinFactor.AddProduction(SimpleExpr, Sin);
114      CosFactor.AddProduction(SimpleExpr, Cos);
115
116      SimpleExpr.AddProduction(SimpleTerm, SimpleExpr, Addition);
117      SimpleExpr.AddProduction(SimpleTerm);
118
119      SimpleTerm.AddProduction(Var, SimpleTerm, Multiplication);
120      SimpleTerm.AddProduction(Var);
121
122      InvExpr.AddProduction(InvTerm, InvExpr, Addition);
123      InvExpr.AddProduction(InvTerm);
124
125      InvTerm.AddProduction(Factor, InvTerm, Multiplication);
126      InvTerm.AddProduction(Factor);
127      #endregion
128
129      #region Parsing to SymbolicExpressionTree
130      var symbolicExpressionGrammar = new TypeCoherentExpressionGrammar();
131      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
132
133      constSy = symbolicExpressionGrammar.Symbols.OfType<Constant>().First();
134      varSy = symbolicExpressionGrammar.Symbols.OfType<Variable>().First();
135      addSy = symbolicExpressionGrammar.Symbols.OfType<Addition>().First();
136      mulSy = symbolicExpressionGrammar.Symbols.OfType<Multiplication>().First();
137      logSy = symbolicExpressionGrammar.Symbols.OfType<Logarithm>().First();
138      expSy = symbolicExpressionGrammar.Symbols.OfType<Exponential>().First();
139      divSy = symbolicExpressionGrammar.Symbols.OfType<Division>().First();
140      sinSy = symbolicExpressionGrammar.Symbols.OfType<Sine>().First();
141      cosSy = symbolicExpressionGrammar.Symbols.OfType<Cosine>().First();
142
143      rootSy = symbolicExpressionGrammar.Symbols.OfType<ProgramRootSymbol>().First();
144      startSy = symbolicExpressionGrammar.Symbols.OfType<StartSymbol>().First();
145
146      infixExpressionFormatter = new InfixExpressionFormatter();
147      #endregion
148    }
149
150    #region Hashing
151    public int CalcHashCode(SymbolString sentence) {
152      return CalcHashCode<int>(sentence, AggregateIntHashes);
153    }
154
155    private int CalcHashCode<THashType>(SymbolString sentence, Func<Symbol, THashType[], THashType> aggregateFunction) {
156      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
157
158      Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
159
160      Symbol peek = parseStack.Peek();
161      return aggregateFunction(peek, GetSubtreeHashes(parseStack, aggregateFunction)).GetHashCode();
162    }
163
164    private THashType[] GetSubtreeHashes<THashType>(Stack<Symbol> parseStack, Func<Symbol, THashType[], THashType> aggregateHashes) {
165      Symbol currentSymbol = parseStack.Pop();
166
167      // ADDITION
168      if (ReferenceEquals(currentSymbol, Addition)) {
169        var uniqueChildHashes = new HashSet<THashType>();
170
171        // First subtree
172        if (ReferenceEquals(parseStack.Peek(), Addition)) {
173          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack, aggregateHashes));
174        } else {
175          var peek = parseStack.Peek();
176          uniqueChildHashes.Add(aggregateHashes(peek, GetSubtreeHashes(parseStack, aggregateHashes)));
177        }
178        // Second subtree
179        if (ReferenceEquals(parseStack.Peek(), Addition)) {
180          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack, aggregateHashes));
181        } else {
182          var peek = parseStack.Peek();
183          uniqueChildHashes.Add(aggregateHashes(peek, GetSubtreeHashes(parseStack, aggregateHashes)));
184        }
185
186        var result = uniqueChildHashes.ToArray();
187        Array.Sort(result);
188        return result;
189      }
190
191      // MULTIPLICATION
192      if (ReferenceEquals(currentSymbol, Multiplication)) {
193        var childHashes = new List<THashType>();
194
195        // First subtree
196        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
197          childHashes.AddRange(GetSubtreeHashes(parseStack, aggregateHashes));
198        } else {
199          childHashes.Add(aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes)));
200        }
201        // Second subtree
202        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
203          childHashes.AddRange(GetSubtreeHashes(parseStack, aggregateHashes));
204        } else {
205          childHashes.Add(aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes)));
206        }
207
208        // Sort due to commutativity
209        childHashes.Sort();
210
211        // Cancel out inverse factors.
212        bool[] isFactorRemaining = Enumerable.Repeat(true, childHashes.Count).ToArray();
213
214        for (int i = 0; i < isFactorRemaining.Length; i++) {
215          if (!isFactorRemaining[i]) continue;
216          if (isFactorRemaining.Length <= 2) break; // Until we have constants, we can't cancel out all terms.
217
218          var currFactor = childHashes[i];
219          var invFactor = aggregateHashes(Inv, new[] { currFactor });
220
221          int indexOfInv = childHashes.IndexOf(invFactor);
222          if (indexOfInv >= 0 && isFactorRemaining[indexOfInv]) {
223            isFactorRemaining[i] = isFactorRemaining[indexOfInv] = false;
224          }
225        }
226
227        return childHashes
228          .Where((c, i) => isFactorRemaining[i])
229          .ToArray();
230      }
231
232      // LOG, EXP, SIN, INV
233      if (ReferenceEquals(currentSymbol, Log) || ReferenceEquals(currentSymbol, Exp) ||
234          ReferenceEquals(currentSymbol, Sin) || ReferenceEquals(currentSymbol, Cos) ||
235          ReferenceEquals(currentSymbol, Inv)) {
236        return new[] { aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes)) };
237      }
238
239      // var or nonterminal symbol
240      return new[] { aggregateHashes(currentSymbol, new THashType[0]) };
241    }
242
243    private string AggregateStringHashes(Symbol operatorSym, string[] hashes) {
244      var hashesArray = hashes.ToArray();
245
246      if ((ReferenceEquals(operatorSym, Addition) || ReferenceEquals(operatorSym, Multiplication)) && hashesArray.Count() <= 1) {
247        return hashesArray[0];
248      }
249      if (operatorSym is NonterminalSymbol || ((TerminalSymbol)operatorSym).IsVariable) {
250        return operatorSym.StringRepresentation;
251      }
252
253      return $"[{hashesArray.Aggregate(operatorSym.StringRepresentation, (result, ti) => string.Concat(result, " ° ", ti))}]";      // TODO: use string join instead of string.Concat
254    }
255
256    private int AggregateIntHashes(Symbol operatorSym, int[] hashes) {
257      int start;
258      if ((ReferenceEquals(operatorSym, Addition) || ReferenceEquals(operatorSym, Multiplication)) &&
259          hashes.Length <= 1) {
260        start = 0;
261
262      } else if (operatorSym is NonterminalSymbol || ((TerminalSymbol)operatorSym).IsVariable) {
263        return operatorSym.StringRepresentation.GetHashCode();
264
265      } else {
266        start = operatorSym.StringRepresentation.GetHashCode();
267      }
268
269      for (int i = 0; i < hashes.Length; i++) {
270        start = ((start << 5) + start) ^ hashes[i];
271      }
272      return start;
273    }
274    #endregion
275
276    #region Parse to SymbolicExpressionTree
277
278    public string ToInfixString(SymbolString sentence) {
279      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
280      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
281
282      return infixExpressionFormatter.Format(ParseSymbolicExpressionTree(sentence));
283    }
284
285    public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolString sentence) {
286      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
287      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
288
289      var rootNode = rootSy.CreateTreeNode();
290      var startNode = startSy.CreateTreeNode();
291      rootNode.AddSubtree(startNode);
292
293      Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
294      startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack));
295
296      return new SymbolicExpressionTree(rootNode);
297    }
298
299    public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack<TerminalSymbol> parseStack) {
300      TerminalSymbol currentSymbol = parseStack.Pop();
301
302      ISymbolicExpressionTreeNode parsedSubTree = null;
303
304      if (ReferenceEquals(currentSymbol, Addition)) {
305        parsedSubTree = addSy.CreateTreeNode();
306        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
307        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
308
309      } else if (ReferenceEquals(currentSymbol, Multiplication)) {
310        parsedSubTree = mulSy.CreateTreeNode();
311        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
312        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
313
314      } else if (ReferenceEquals(currentSymbol, Log)) {
315        parsedSubTree = logSy.CreateTreeNode();
316        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
317
318      } else if (ReferenceEquals(currentSymbol, Exp)) {
319        parsedSubTree = expSy.CreateTreeNode();
320        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
321
322      } else if (ReferenceEquals(currentSymbol, Sin)) {
323        parsedSubTree = sinSy.CreateTreeNode();
324        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
325
326      } else if (ReferenceEquals(currentSymbol, Cos)) {
327        parsedSubTree = cosSy.CreateTreeNode();
328        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
329
330      } else if (ReferenceEquals(currentSymbol, Inv)) {
331        parsedSubTree = divSy.CreateTreeNode();
332        ConstantTreeNode dividend = (ConstantTreeNode)constSy.CreateTreeNode();
333        dividend.Value = 1.0;
334        parsedSubTree.AddSubtree(dividend);
335        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
336
337      } else if (currentSymbol.IsVariable) {
338        VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode();
339        varNode.Weight = 1.0;
340        varNode.VariableName = currentSymbol.StringRepresentation;
341        parsedSubTree = varNode;
342      }
343
344      Debug.Assert(parsedSubTree != null);
345      return parsedSubTree;
346    }
347    #endregion
348  }
349}
Note: See TracBrowser for help on using the repository browser.