source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs @ 15723

Last change on this file since 15723 was 15723, checked in by lkammere, 20 months ago

#2886: Add simple data analysis tests and further informations about algorithm run.

File size: 10.0 KB
Line 
1using System.Collections;
2using System.Collections.Generic;
3using System.ComponentModel;
4using System.Diagnostics;
5using System.Linq;
6using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
7using HeuristicLab.Common;
8using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
9using HeuristicLab.Problems.DataAnalysis.Symbolic;
10
11namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
12  public class Grammar {
13
14    public Symbol StartSymbol;
15
16    #region Symbols
17
18    public VariableSymbol Var;
19
20    public NonterminalSymbol Expr;
21    public NonterminalSymbol Term;
22    public NonterminalSymbol Factor;
23
24    public TerminalSymbol Addition;
25    public TerminalSymbol Multiplication;
26
27    #endregion
28
29
30    #region HL Symbols for Parsing ExpressionTrees
31
32    private TypeCoherentExpressionGrammar symbolicExpressionGrammar;
33
34    private ISymbol constSy;
35    private ISymbol varSy;
36
37    private ISymbol addSy;
38    private ISymbol mulSy;
39    private ISymbol logSy;
40    private ISymbol expSy;
41    private ISymbol divSy;
42
43    private ISymbol rootSy;
44    private ISymbol startSy;
45
46    #endregion
47
48    public Grammar(string[] variables) {
49      #region Define Symbols
50      Var = new VariableSymbol("var", variables);
51
52      Expr = new NonterminalSymbol("Expr");
53      Term = new NonterminalSymbol("Term");
54      Factor = new NonterminalSymbol("Factor");
55
56      Addition = new TerminalSymbol("+");
57      Multiplication = new TerminalSymbol("*");
58      #endregion
59
60
61      #region Production rules
62      // order of production is important, since they are accessed via index
63      // in memoization.
64      StartSymbol = Expr;
65
66      Expr.AddProduction(Term, Expr, Addition);
67      Expr.AddProduction(Term);
68
69      Term.AddProduction(Factor, Term, Multiplication);
70      Term.AddProduction(Factor);
71
72      Factor.AddProduction(Var);
73      #endregion
74
75      #region Parsing to SymbolicExpressionTree
76      symbolicExpressionGrammar = new TypeCoherentExpressionGrammar();
77      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
78
79      constSy = symbolicExpressionGrammar.Symbols.OfType<Constant>().First();
80      varSy = symbolicExpressionGrammar.Symbols.OfType<Variable>().First();
81      addSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Addition>().First();
82      mulSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Multiplication>().First();
83      logSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Logarithm>().First();
84      expSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Exponential>().First();
85      divSy = symbolicExpressionGrammar.AllowedSymbols.OfType<Division>().First();
86
87      rootSy = symbolicExpressionGrammar.AllowedSymbols.OfType<ProgramRootSymbol>().First();
88      startSy = symbolicExpressionGrammar.AllowedSymbols.OfType<StartSymbol>().First();
89
90      #endregion
91    }
92
93    /*
94    #region Memoize subtrees
95
96    public void MemoizeSubtrees(SymbolString sentence) {
97      Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
98
99      // Parse root symbol "+"
100      MemoizeSubtreeExpression(parseStack);
101    }
102
103    private SymbolString MemoizeSubtreeExpression(Stack<TerminalSymbol> parseStack) {
104      SymbolString subtree = new SymbolString();
105
106      if (ReferenceEquals(parseStack.Peek(), Addition)) {
107        subtree.Add(parseStack.Pop());
108        subtree.InsertRange(0, MemoizeSubtreeExpression(parseStack));
109        subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack));
110
111        Expr.Alternatives[0].GeneratedSentences.Add(subtree);
112      } else {
113        subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack));
114
115        Expr.Alternatives[1].GeneratedSentences.Add(subtree);
116      }
117
118      return subtree;
119    }
120
121    private SymbolString MemoizeSubtreeTerm(Stack<TerminalSymbol> parseStack) {
122      SymbolString subtree = new SymbolString();
123
124      if (ReferenceEquals(parseStack.Peek(), Multiplication)) {
125        subtree.Add(parseStack.Pop());
126        subtree.InsertRange(0, MemoizeSubtreeTerm(parseStack));
127        subtree.InsertRange(0, MemoizeSubtreeFactor(parseStack));
128
129        Term.Alternatives[0].GeneratedSentences.Add(subtree);
130      } else {
131        subtree.InsertRange(0, MemoizeSubtreeFactor(parseStack));
132
133        Term.Alternatives[1].GeneratedSentences.Add(subtree);
134      }
135
136      return subtree;
137    }
138
139    private SymbolString MemoizeSubtreeFactor(Stack<TerminalSymbol> parseStack) {
140      SymbolString subtree = new SymbolString(MemoizeSubtreeVar(parseStack));
141
142      Factor.Alternatives[0].GeneratedSentences.Add(subtree);
143      return subtree;
144    }
145
146    private SymbolString MemoizeSubtreeVar(Stack<TerminalSymbol> parseStack) {
147      SymbolString subtree = new SymbolString(parseStack.Pop().ToEnumerable());
148
149      // ... not really
150      //Var.Alternatives[0].GeneratedSentences.Add(subtree);
151      return subtree;
152    }
153
154
155    #endregion
156    */
157   
158    #region Hashing
159    public int CalcHashCode(SymbolString sentence) {
160      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
161      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
162
163      Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
164
165      int[] subtreeHashes = GetSubtreeHashes(parseStack.Pop(), parseStack);
166      return AggregateHashes(subtreeHashes);
167    }
168
169    private int[] GetSubtreeHashes(TerminalSymbol currentSymbol, Stack<TerminalSymbol> parseStack) {
170      List<int> childHashes = null;
171
172      if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
173        childHashes = currentSymbol.StringRepresentation.GetHashCode().ToEnumerable().ToList();
174
175      } else if (ReferenceEquals(currentSymbol, Multiplication)) {
176        // MULTIPLICATION
177        childHashes = new List<int>();
178
179        // First subtree
180        TerminalSymbol firstSubtreeRoot = parseStack.Pop();
181        int[] firstSubtreeHashes = GetSubtreeHashes(firstSubtreeRoot, parseStack);
182
183        if (ReferenceEquals(firstSubtreeRoot, Multiplication)) {
184          childHashes.AddRange(firstSubtreeHashes);
185        } else {
186          childHashes.Add(AggregateHashes(firstSubtreeHashes));
187        }
188
189        // Second subtree
190        TerminalSymbol secondSubtreeRoot = parseStack.Pop();
191        int[] secondSubtreeHashes = GetSubtreeHashes(secondSubtreeRoot, parseStack);
192
193        if (ReferenceEquals(secondSubtreeRoot, Multiplication)) {
194          childHashes.AddRange(secondSubtreeHashes);
195        } else {
196          childHashes.Add(AggregateHashes(secondSubtreeHashes));
197        }
198
199        // Sort due to commutativity
200        childHashes.Sort();
201
202
203
204      } else if (ReferenceEquals(currentSymbol, Addition)) {
205        // ADDITION
206        HashSet<int> uniqueChildHashes = new HashSet<int>();
207
208        TerminalSymbol firstSubtreeRoot = parseStack.Pop();
209        int[] firstSubtreeHashes = GetSubtreeHashes(firstSubtreeRoot, parseStack);
210
211        // First subtree
212        if (ReferenceEquals(firstSubtreeRoot, Addition)) {
213          uniqueChildHashes.UnionWith(firstSubtreeHashes);
214        } else {
215          uniqueChildHashes.Add(AggregateHashes(firstSubtreeHashes));
216        }
217
218        // Second subtree
219        TerminalSymbol secondSubtreeRoot = parseStack.Pop();
220        int[] secondSubtreeHashes = GetSubtreeHashes(secondSubtreeRoot, parseStack);
221
222        if (ReferenceEquals(secondSubtreeRoot, Addition)) {
223          uniqueChildHashes.UnionWith(secondSubtreeHashes);
224        } else {
225          uniqueChildHashes.Add(AggregateHashes(secondSubtreeHashes));
226        }
227
228        childHashes = uniqueChildHashes.ToList();
229        childHashes.Sort();
230      }
231
232      Debug.Assert(childHashes != null, "Sentence not hashable!");
233      return childHashes.ToArray();
234    }
235
236    private int AggregateHashes(IEnumerable<int> hashes) {
237      return hashes.Aggregate(0, (result, ti) => ((result << 5) + result) ^ ti.GetHashCode());
238    }
239    #endregion
240
241    #region Parse to SymbolicExpressionTree
242    public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolString sentence) {
243      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
244      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
245
246      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
247
248      var rootNode = rootSy.CreateTreeNode();
249      var startNode = startSy.CreateTreeNode();
250      rootNode.AddSubtree(startNode);
251
252      Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
253      startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack));
254
255      return new SymbolicExpressionTree(rootNode);
256    }
257
258    public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack<TerminalSymbol> parseStack) {
259      TerminalSymbol currentSymbol = parseStack.Pop();
260
261      ISymbolicExpressionTreeNode parsedSubTree = null;
262
263      if (ReferenceEquals(currentSymbol, Addition)) {
264        parsedSubTree = addSy.CreateTreeNode();
265        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
266        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
267
268      } else if (ReferenceEquals(currentSymbol, Multiplication)) {
269        parsedSubTree = mulSy.CreateTreeNode();
270        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
271        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
272
273      } else if (Var.VariableTerminalSymbols.Contains(currentSymbol)) {
274        VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode();
275        varNode.Weight = 1.0;
276        varNode.VariableName = currentSymbol.StringRepresentation;
277        parsedSubTree = varNode;
278      }
279
280      Debug.Assert(parsedSubTree != null);
281      return parsedSubTree;
282    }
283    #endregion
284  }
285}
Note: See TracBrowser for help on using the repository browser.