Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs @ 15824

Last change on this file since 15824 was 15821, checked in by lkammere, 6 years ago

#2886 Move code for visualization and logging of sentences to separate classes.

File size: 13.4 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
6using HeuristicLab.Common;
7using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
8using HeuristicLab.Problems.DataAnalysis.Symbolic;
9
10namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
11  public class Grammar {
12
13    public Symbol StartSymbol { get; }
14
15    #region Symbols
16    public VariableSymbol Var;
17
18    public NonterminalSymbol Expr;
19    public NonterminalSymbol Term;
20    public NonterminalSymbol Factor;
21    public NonterminalSymbol LogFactor;
22    public NonterminalSymbol ExpFactor;
23    public NonterminalSymbol SinFactor;
24    public NonterminalSymbol CosFactor;
25
26    public NonterminalSymbol SimpleExpr;
27    public NonterminalSymbol SimpleTerm;
28
29    public NonterminalSymbol InvExpr;
30    public NonterminalSymbol InvTerm;
31
32    public TerminalSymbol Addition;
33    public TerminalSymbol Multiplication;
34    public TerminalSymbol Log;
35    public TerminalSymbol Exp;
36    public TerminalSymbol Sin;
37    public TerminalSymbol Cos;
38    public TerminalSymbol Inv;
39
40    // For infix notation
41    public TerminalSymbol OpeningBracket;
42    public TerminalSymbol ClosingBracket;
43
44    #endregion
45
46    #region HL Symbols for Parsing ExpressionTrees
47
48    private ISymbol constSy;
49    private ISymbol varSy;
50
51    private ISymbol addSy;
52    private ISymbol mulSy;
53    private ISymbol logSy;
54    private ISymbol expSy;
55    private ISymbol divSy;
56    private ISymbol sinSy;
57    private ISymbol cosSy;
58
59    private ISymbol rootSy;
60    private ISymbol startSy;
61
62    private InfixExpressionFormatter infixExpressionFormatter;
63    #endregion
64
65    public Grammar(string[] variables) {
66      #region Define Symbols
67      Var = new VariableSymbol("var", variables);
68
69      Expr = new NonterminalSymbol("Expr");
70      Term = new NonterminalSymbol("Term");
71      Factor = new NonterminalSymbol("Factor");
72      LogFactor = new NonterminalSymbol("LogFactor");
73      ExpFactor = new NonterminalSymbol("ExpFactor");
74      SinFactor = new NonterminalSymbol("SinFactor");
75      CosFactor = new NonterminalSymbol("CosFactor");
76
77      SimpleExpr = new NonterminalSymbol("SimpleExpr");
78      SimpleTerm = new NonterminalSymbol("SimpleTerm");
79
80      InvExpr = new NonterminalSymbol("InvExpr");
81      InvTerm = new NonterminalSymbol("InvTerm");
82
83      Addition = new TerminalSymbol("+");
84      Multiplication = new TerminalSymbol("*");
85      Log = new TerminalSymbol("log");
86      Exp = new TerminalSymbol("exp");
87      Sin = new TerminalSymbol("sin");
88      Cos = new TerminalSymbol("cos");
89      Inv = new TerminalSymbol("inv");
90
91      OpeningBracket = new TerminalSymbol("(");
92      ClosingBracket = new TerminalSymbol(")");
93      #endregion
94
95      #region Production rules
96      StartSymbol = Expr;
97
98      Expr.AddProduction(Term, Expr, Addition);
99      Expr.AddProduction(Term);
100
101      Term.AddProduction(Factor, Term, Multiplication);
102      Term.AddProduction(Factor);
103      Term.AddProduction(InvExpr, Inv);
104
105      Factor.AddProduction(Var);
106      Factor.AddProduction(LogFactor);
107      Factor.AddProduction(ExpFactor);
108      Factor.AddProduction(SinFactor);
109      Factor.AddProduction(CosFactor);
110
111      LogFactor.AddProduction(SimpleExpr, Log);
112      ExpFactor.AddProduction(SimpleTerm, Exp);
113      SinFactor.AddProduction(SimpleExpr, Sin);
114      CosFactor.AddProduction(SimpleExpr, Cos);
115
116      SimpleExpr.AddProduction(SimpleTerm, SimpleExpr, Addition);
117      SimpleExpr.AddProduction(SimpleTerm);
118
119      SimpleTerm.AddProduction(Var, SimpleTerm, Multiplication);
120      SimpleTerm.AddProduction(Var);
121
122      InvExpr.AddProduction(InvTerm, InvExpr, Addition);
123      InvExpr.AddProduction(InvTerm);
124
125      InvTerm.AddProduction(Factor, InvTerm, Multiplication);
126      InvTerm.AddProduction(Factor);
127      #endregion
128
129      #region Parsing to SymbolicExpressionTree
130      var symbolicExpressionGrammar = new TypeCoherentExpressionGrammar();
131      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
132
133      constSy = symbolicExpressionGrammar.Symbols.OfType<Constant>().First();
134      varSy = symbolicExpressionGrammar.Symbols.OfType<Variable>().First();
135      addSy = symbolicExpressionGrammar.Symbols.OfType<Addition>().First();
136      mulSy = symbolicExpressionGrammar.Symbols.OfType<Multiplication>().First();
137      logSy = symbolicExpressionGrammar.Symbols.OfType<Logarithm>().First();
138      expSy = symbolicExpressionGrammar.Symbols.OfType<Exponential>().First();
139      divSy = symbolicExpressionGrammar.Symbols.OfType<Division>().First();
140      sinSy = symbolicExpressionGrammar.Symbols.OfType<Sine>().First();
141      cosSy = symbolicExpressionGrammar.Symbols.OfType<Cosine>().First();
142
143      rootSy = symbolicExpressionGrammar.Symbols.OfType<ProgramRootSymbol>().First();
144      startSy = symbolicExpressionGrammar.Symbols.OfType<StartSymbol>().First();
145
146      infixExpressionFormatter = new InfixExpressionFormatter();
147      #endregion
148    }
149
150    #region Hashing
151    public int CalcHashCode(SymbolString sentence) {
152      return CalcHashCode<int>(sentence, AggregateIntHashes);
153    }
154
155    private int CalcHashCode<THashType>(SymbolString sentence, Func<Symbol, THashType[], THashType> aggregateFunction) {
156      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
157
158      Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
159
160      Symbol peek = parseStack.Peek();
161      return aggregateFunction(peek, GetSubtreeHashes(parseStack, aggregateFunction)).GetHashCode();
162    }
163
164    private THashType[] GetSubtreeHashes<THashType>(Stack<Symbol> parseStack, Func<Symbol, THashType[], THashType> aggregateHashes) {
165      Symbol currentSymbol = parseStack.Pop();
166
167      // ADDITION
168      if (ReferenceEquals(currentSymbol, Addition)) {
169        var uniqueChildHashes = new HashSet<THashType>();
170
171        // First subtree
172        if (ReferenceEquals(parseStack.Peek(), Addition)) {
173          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack, aggregateHashes));
174        } else {
175          var peek = parseStack.Peek();
176          uniqueChildHashes.Add(aggregateHashes(peek, GetSubtreeHashes(parseStack, aggregateHashes)));
177        }
178        // Second subtree
179        if (ReferenceEquals(parseStack.Peek(), Addition)) {
180          uniqueChildHashes.UnionWith(GetSubtreeHashes(parseStack, aggregateHashes));
181        } else {
182          var peek = parseStack.Peek();
183          uniqueChildHashes.Add(aggregateHashes(peek, GetSubtreeHashes(parseStack, aggregateHashes)));
184        }
185
186        var result = uniqueChildHashes.ToArray();
187        Array.Sort(result);
188        return result;
189      }
190
191      // MULTIPLICATION
192      if (ReferenceEquals(currentSymbol, Multiplication)) {
193        var childHashes = new List<THashType>();
194
195        // First subtree
196        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
197          childHashes.AddRange(GetSubtreeHashes(parseStack, aggregateHashes));
198        } else {
199          childHashes.Add(aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes)));
200        }
201        // Second subtree
202        if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
203          childHashes.AddRange(GetSubtreeHashes(parseStack, aggregateHashes));
204        } else {
205          childHashes.Add(aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes)));
206        }
207
208        // Sort due to commutativity
209        childHashes.Sort();
210
211        // Cancel out inverse factors.
212        bool[] isFactorRemaining = Enumerable.Repeat(true, childHashes.Count).ToArray();
213
214        for (int i = 0; i < isFactorRemaining.Length; i++) {
215          if (!isFactorRemaining[i]) continue;
216          if (isFactorRemaining.Count() <= 2) break; // Until we have constants, we can't cancel out all terms.
217
218          var currFactor = childHashes[i];
219          var invFactor = aggregateHashes(Inv, new[] { currFactor });
220
221          int indexOfInv = childHashes.IndexOf(invFactor);
222          if (indexOfInv >= 0 && isFactorRemaining[indexOfInv]) {
223            isFactorRemaining[i] = isFactorRemaining[indexOfInv] = false;
224          }
225        }
226        return Enumerable
227          .Range(0, isFactorRemaining.Length)
228          .Where(i => isFactorRemaining[i])
229          .Select(i => childHashes[i])
230          .ToArray();
231      }
232
233      // LOG, EXP, SIN, INV
234      if (ReferenceEquals(currentSymbol, Log) || ReferenceEquals(currentSymbol, Exp) ||
235          ReferenceEquals(currentSymbol, Sin) || ReferenceEquals(currentSymbol, Cos) ||
236          ReferenceEquals(currentSymbol, Inv)) {
237        return new[] { aggregateHashes(parseStack.Peek(), GetSubtreeHashes(parseStack, aggregateHashes)) };
238      }
239
240      // var or nonterminal symbol
241      return new[] { aggregateHashes(currentSymbol, new THashType[0]) };
242    }
243
244    private string AggregateStringHashes(Symbol operatorSym, string[] hashes) {
245      var hashesArray = hashes.ToArray();
246
247      if ((ReferenceEquals(operatorSym, Addition) || ReferenceEquals(operatorSym, Multiplication)) && hashesArray.Count() <= 1) {
248        return hashesArray[0];
249      }
250      if (operatorSym is NonterminalSymbol || ((TerminalSymbol)operatorSym).IsVariable) {
251        return operatorSym.StringRepresentation;
252      }
253
254      return $"[{hashesArray.Aggregate(operatorSym.StringRepresentation, (result, ti) => string.Concat(result, " ° ", ti))}]";      // TODO: use string join instead of string.Concat
255    }
256
257    private int AggregateIntHashes(Symbol operatorSym, int[] hashes) {
258      int start;
259      if ((ReferenceEquals(operatorSym, Addition) || ReferenceEquals(operatorSym, Multiplication)) &&
260          hashes.Length <= 1) {
261        start = 0;
262
263      } else if (operatorSym is NonterminalSymbol || ((TerminalSymbol)operatorSym).IsVariable) {
264        return operatorSym.StringRepresentation.GetHashCode();
265
266      } else {
267        start = operatorSym.StringRepresentation.GetHashCode();
268      }
269
270      for (int i = 0; i < hashes.Length; i++) {
271        start = ((start << 5) + start) ^ hashes[i];
272      }
273      return start;
274    }
275    #endregion
276
277    #region Parse to SymbolicExpressionTree
278
279    public string ToInfixString(SymbolString sentence) {
280      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
281      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
282
283      return infixExpressionFormatter.Format(ParseSymbolicExpressionTree(sentence));
284    }
285
286    public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolString sentence) {
287      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
288      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
289
290      var rootNode = rootSy.CreateTreeNode();
291      var startNode = startSy.CreateTreeNode();
292      rootNode.AddSubtree(startNode);
293
294      Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
295      startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack));
296
297      return new SymbolicExpressionTree(rootNode);
298    }
299
300    public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack<TerminalSymbol> parseStack) {
301      TerminalSymbol currentSymbol = parseStack.Pop();
302
303      ISymbolicExpressionTreeNode parsedSubTree = null;
304
305      if (ReferenceEquals(currentSymbol, Addition)) {
306        parsedSubTree = addSy.CreateTreeNode();
307        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
308        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
309
310      } else if (ReferenceEquals(currentSymbol, Multiplication)) {
311        parsedSubTree = mulSy.CreateTreeNode();
312        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
313        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
314
315      } else if (ReferenceEquals(currentSymbol, Log)) {
316        parsedSubTree = logSy.CreateTreeNode();
317        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
318
319      } else if (ReferenceEquals(currentSymbol, Exp)) {
320        parsedSubTree = expSy.CreateTreeNode();
321        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
322
323      } else if (ReferenceEquals(currentSymbol, Sin)) {
324        parsedSubTree = sinSy.CreateTreeNode();
325        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
326
327      } else if (ReferenceEquals(currentSymbol, Cos)) {
328        parsedSubTree = cosSy.CreateTreeNode();
329        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
330
331      } else if (ReferenceEquals(currentSymbol, Inv)) {
332        parsedSubTree = divSy.CreateTreeNode();
333        ConstantTreeNode dividend = (ConstantTreeNode)constSy.CreateTreeNode();
334        dividend.Value = 1.0;
335        parsedSubTree.AddSubtree(dividend);
336        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
337
338      } else if (currentSymbol.IsVariable) {
339        VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode();
340        varNode.Weight = 1.0;
341        varNode.VariableName = currentSymbol.StringRepresentation;
342        parsedSubTree = varNode;
343      }
344
345      Debug.Assert(parsedSubTree != null);
346      return parsedSubTree;
347    }
348    #endregion
349  }
350}
Note: See TracBrowser for help on using the repository browser.