source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs @ 15722

Last change on this file since 15722 was 15722, checked in by lkammere, 20 months ago

#2886: Add evaluation of sentences.

File size: 7.8 KB
Line 
1using System.Collections;
2using System.Collections.Generic;
3using System.ComponentModel;
4using System.Diagnostics;
5using System.Linq;
6using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
7using HeuristicLab.Common;
8using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
9using HeuristicLab.Problems.DataAnalysis.Symbolic;
10
11namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
12  public class Grammar {
13
14    public Symbol StartSymbol;
15
16    #region Symbols
17
18    public VariableSymbol Var;
19
20    public NonterminalSymbol Expr;
21    public NonterminalSymbol Term;
22    public NonterminalSymbol Factor;
23
24    public TerminalSymbol Addition;
25    public TerminalSymbol Multiplication;
26
27    #endregion
28
29
30    #region HL Symbols for Parsing ExpressionTrees
31
32    private TypeCoherentExpressionGrammar symbolicExpressionGrammar;
33
34    private ISymbol constSy;
35    private ISymbol varSy;
36
37    private ISymbol addSy;
38    private ISymbol mulSy;
39    private ISymbol logSy;
40    private ISymbol expSy;
41    private ISymbol divSy;
42
43    private ISymbol rootSy;
44    private ISymbol startSy;
45
46    #endregion
47
48    public Grammar(string[] variables) {
49      #region Define Symbols
50      Var = new VariableSymbol("var", variables);
51
52      Expr = new NonterminalSymbol("Expr");
53      Term = new NonterminalSymbol("Term");
54      Factor = new NonterminalSymbol("Factor");
55
56      Addition = new TerminalSymbol("+");
57      Multiplication = new TerminalSymbol("*");
58      #endregion
59
60
61      #region Production ruless
62      StartSymbol = Expr;
63
64      Expr.AddProduction(Term, Expr, Addition);
65      Expr.AddProduction(Term);
66
67      Term.AddProduction(Factor, Term, Multiplication);
68      Term.AddProduction(Factor);
69
70      Factor.AddProduction(Var);
71      #endregion
72
73      #region Parsing to SymbolicExpressionTree
74      symbolicExpressionGrammar = new TypeCoherentExpressionGrammar();
75      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
76
77      constSy = symbolicExpressionGrammar.Symbols.OfType<Constant>().First();
78      varSy = symbolicExpressionGrammar.Symbols.OfType<Variable>().First();
79      addSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Addition>().First();
80      mulSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Multiplication>().First();
81      logSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Logarithm>().First();
82      expSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Exponential>().First();
83      divSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Division>().First();
84
85      rootSy = symbolicExpressionGrammar.AllowedSymbols.OfType<ProgramRootSymbol>().First();
86      startSy = symbolicExpressionGrammar.AllowedSymbols.OfType<StartSymbol>().First();
87
88      #endregion
89    }
90
91    public int CalcHashCode(SymbolString sentence) {
92      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
93      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
94
95      Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
96
97      int[] subtreeHashes = GetSubtreeHashes(parseStack.Pop(), parseStack);
98      return AggregateHashes(subtreeHashes);
99    }
100
101    #region Hashing
102    private int[] GetSubtreeHashes(TerminalSymbol currentSymbol, Stack<TerminalSymbol> parseStack) {
103      List<int> childHashes = null;
104
105      if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
106        childHashes = currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToList();
107
108      } else if (ReferenceEquals(currentSymbol, Multiplication)) {
109        // MULTIPLICATION
110        childHashes = new List<int>();
111
112        // First subtree
113        TerminalSymbol firstSubtreeRoot = parseStack.Pop();
114        int[] firstSubtreeHashes = GetSubtreeHashes(firstSubtreeRoot, parseStack);
115
116        if (ReferenceEquals(firstSubtreeRoot, Multiplication)) {
117          childHashes.AddRange(firstSubtreeHashes);
118        } else {
119          childHashes.Add(AggregateHashes(firstSubtreeHashes));
120        }
121
122        // Second subtree
123        TerminalSymbol secondSubtreeRoot = parseStack.Pop();
124        int[] secondSubtreeHashes = GetSubtreeHashes(secondSubtreeRoot, parseStack);
125
126        if (ReferenceEquals(secondSubtreeRoot, Multiplication)) {
127          childHashes.AddRange(secondSubtreeHashes);
128        } else {
129          childHashes.Add(AggregateHashes(secondSubtreeHashes));
130        }
131
132        // Sort due to commutativity
133        childHashes.Sort();
134
135
136
137      } else if (ReferenceEquals(currentSymbol, Addition)) {
138        // ADDITION
139        HashSet<int> uniqueChildHashes = new HashSet<int>();
140
141        TerminalSymbol firstSubtreeRoot = parseStack.Pop();
142        int[] firstSubtreeHashes = GetSubtreeHashes(firstSubtreeRoot, parseStack);
143
144        // First subtree
145        if (ReferenceEquals(firstSubtreeRoot, Addition)) {
146          uniqueChildHashes.UnionWith(firstSubtreeHashes);
147        } else {
148          uniqueChildHashes.Add(AggregateHashes(firstSubtreeHashes));
149        }
150
151        // Second subtree
152        TerminalSymbol secondSubtreeRoot = parseStack.Pop();
153        int[] secondSubtreeHashes = GetSubtreeHashes(secondSubtreeRoot, parseStack);
154
155        if (ReferenceEquals(secondSubtreeRoot, Addition)) {
156          uniqueChildHashes.UnionWith(secondSubtreeHashes);
157        } else {
158          uniqueChildHashes.Add(AggregateHashes(secondSubtreeHashes));
159        }
160
161        childHashes = uniqueChildHashes.ToList();
162        childHashes.Sort();
163      }
164
165      Debug.Assert(childHashes != null, "Sentence not hashable!");
166      return childHashes.ToArray();
167    }
168
169
170    private int AggregateHashes(IEnumerable<int> hashes) {
171      return hashes.Aggregate(0, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode());
172    }
173    #endregion
174
175    #region Parse to SymbolicExpressionTree
176    public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolString sentence) {
177      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
178      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
179
180      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
181
182      var rootNode = rootSy.CreateTreeNode();
183      var startNode = startSy.CreateTreeNode();
184      rootNode.AddSubtree(startNode);
185
186      Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
187      startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack));
188
189      return new SymbolicExpressionTree(rootNode);
190    }
191
192    public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack<TerminalSymbol> parseStack) {
193      TerminalSymbol currentSymbol = parseStack.Pop();
194
195      ISymbolicExpressionTreeNode parsedSubTree = null;
196
197      if (ReferenceEquals(currentSymbol, Addition)) {
198        parsedSubTree = addSy.CreateTreeNode();
199        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
200        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
201
202      } else if (ReferenceEquals(currentSymbol, Multiplication)) {
203        parsedSubTree = mulSy.CreateTreeNode();
204        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
205        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
206
207      } else if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
208        VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode();
209        varNode.Weight = 1.0;
210        varNode.VariableName = currentSymbol.StringRepresentation;
211        parsedSubTree = varNode;
212      }
213
214      Debug.Assert(parsedSubTree != null);
215      return parsedSubTree;
216    }
217    #endregion
218  }
219}
Note: See TracBrowser for help on using the repository browser.