source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs @ 15883

Last change on this file since 15883 was 15883, checked in by lkammere, 3 years ago

#2886: Priorize phrases whose (fully expanded) terms result in high R².

File size: 10.2 KB
Line 
1using System.Collections.Generic;
2using System.Diagnostics;
3using System.Linq;
4using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
5using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
6using HeuristicLab.Problems.DataAnalysis;
7using HeuristicLab.Problems.DataAnalysis.Symbolic;
8
9namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
10  public class Grammar {
11
12    public Symbol StartSymbol { get; }
13
14    public Hasher<int> Hasher { get; }
15
16    #region Symbols
17
18    public IReadOnlyDictionary<Symbol, IReadOnlyList<Production>> Productions { get; }
19
20    public NonterminalSymbol Var;
21    public IReadOnlyList<VariableTerminalSymbol> VarTerminals;
22
23    public NonterminalSymbol Expr;
24    public NonterminalSymbol Term;
25    public NonterminalSymbol Factor;
26    public NonterminalSymbol LogFactor;
27    public NonterminalSymbol ExpFactor;
28    public NonterminalSymbol SinFactor;
29
30    public NonterminalSymbol SimpleExpr;
31    public NonterminalSymbol SimpleTerm;
32
33    public NonterminalSymbol InvExpr;
34    public NonterminalSymbol InvTerm;
35
36    public TerminalSymbol Addition;
37    public TerminalSymbol Multiplication;
38    public TerminalSymbol Log;
39    public TerminalSymbol Exp;
40    public TerminalSymbol Sin;
41    public TerminalSymbol Inv;
42
43    public TerminalSymbol Const;
44
45    #endregion
46
47    #region HL Symbols for Parsing ExpressionTrees
48
49    private ISymbol constSy;
50    private ISymbol varSy;
51
52    private ISymbol addSy;
53    private ISymbol mulSy;
54    private ISymbol logSy;
55    private ISymbol expSy;
56    private ISymbol divSy;
57    private ISymbol sinSy;
58
59    private ISymbol rootSy;
60    private ISymbol startSy;
61
62    private InfixExpressionFormatter infixExpressionFormatter;
63    #endregion
64
65    public Grammar(string[] variables) {
66      #region Define Symbols
67      Var = new NonterminalSymbol("Var");
68
69      Expr = new NonterminalSymbol("Expr");
70      Term = new NonterminalSymbol("Term");
71      Factor = new NonterminalSymbol("Factor");
72      LogFactor = new NonterminalSymbol("LogFactor");
73      ExpFactor = new NonterminalSymbol("ExpFactor");
74      SinFactor = new NonterminalSymbol("SinFactor");
75
76      SimpleExpr = new NonterminalSymbol("SimpleExpr");
77      SimpleTerm = new NonterminalSymbol("SimpleTerm");
78
79      InvExpr = new NonterminalSymbol("InvExpr");
80      InvTerm = new NonterminalSymbol("InvTerm");
81
82      Addition = new TerminalSymbol("+");
83      Multiplication = new TerminalSymbol("*");
84      Log = new TerminalSymbol("log");
85      Exp = new TerminalSymbol("exp");
86      Sin = new TerminalSymbol("sin");
87      Inv = new TerminalSymbol("inv");
88
89      Const = new TerminalSymbol("c");
90      #endregion
91
92      #region Production rules
93      StartSymbol = Expr;
94
95      Dictionary<Symbol, IReadOnlyList<Production>> productions = new Dictionary<Symbol, IReadOnlyList<Production>>();
96
97      // Map each variable to a separate production rule of the "Var" nonterminal symbol.
98      VarTerminals = variables.Select(v => new VariableTerminalSymbol(v)).ToArray();
99      productions[Var] = VarTerminals.Select(v => new Production(v)).ToArray();
100
101      productions[Expr] = new[] {
102        new Production(Const, Term, Multiplication, Expr, Addition),
103        new Production(Const, Term, Multiplication, Const, Addition)
104      };
105
106      productions[Term] = new[] {
107        new Production(Factor, Term, Multiplication),
108        new Production(Factor),
109        new Production(InvExpr, Inv)
110      };
111
112      productions[Factor] = new[] {
113        new Production(Var),
114        new Production(LogFactor),
115        new Production(ExpFactor),
116        new Production(SinFactor),
117      };
118
119      productions[LogFactor] = new[] { new Production(SimpleExpr, Log) };
120      productions[ExpFactor] = new[] { new Production(Const, SimpleTerm, Multiplication, Exp) };
121      productions[SinFactor] = new[] { new Production(SimpleExpr, Sin) };
122
123      productions[SimpleExpr] = new[] {
124        new Production(Const, SimpleTerm, Multiplication, SimpleExpr, Addition),
125        new Production(Const, SimpleTerm, Multiplication, Const, Addition)
126      };
127
128      productions[SimpleTerm] = new[] {
129        new Production(Var, SimpleTerm, Multiplication),
130        new Production(Var)
131      };
132
133      productions[InvExpr] = new[] {
134        new Production(Const, InvTerm, Multiplication, InvExpr, Addition),
135        new Production(Const, InvTerm, Multiplication, Const, Addition)
136      };
137
138      productions[InvTerm] = new[] {
139        new Production(Factor, InvTerm, Multiplication),
140        new Production(Factor)
141      };
142
143      Productions = productions;
144      #endregion
145
146      #region Parsing to SymbolicExpressionTree
147      var symbolicExpressionGrammar = new TypeCoherentExpressionGrammar();
148      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
149
150      constSy = symbolicExpressionGrammar.Symbols.OfType<Constant>().First();
151      varSy = symbolicExpressionGrammar.Symbols.OfType<Variable>().First();
152      addSy = symbolicExpressionGrammar.Symbols.OfType<Addition>().First();
153      mulSy = symbolicExpressionGrammar.Symbols.OfType<Multiplication>().First();
154      logSy = symbolicExpressionGrammar.Symbols.OfType<Logarithm>().First();
155      expSy = symbolicExpressionGrammar.Symbols.OfType<Exponential>().First();
156      divSy = symbolicExpressionGrammar.Symbols.OfType<Division>().First();
157      sinSy = symbolicExpressionGrammar.Symbols.OfType<Sine>().First();
158
159      rootSy = symbolicExpressionGrammar.Symbols.OfType<ProgramRootSymbol>().First();
160      startSy = symbolicExpressionGrammar.Symbols.OfType<StartSymbol>().First();
161
162      infixExpressionFormatter = new InfixExpressionFormatter();
163      #endregion
164
165      Hasher = new IntHasher(this);
166    }
167
168    public int GetComplexity(SymbolString s) {
169      int c = 0;
170      int length = s.Count();
171      for (int i = 0; i < length; i++) {
172        if (s[i] is NonterminalSymbol || s[i] is VariableTerminalSymbol) c++;
173      }
174      return c;
175    }
176
177    public double EvaluatePhrase(SymbolString s, IRegressionProblemData problemData, bool optimizeConstants) {
178      // Create sentences without Expression symbols.
179      Symbol[] sEval = new Symbol[s.Count()];
180
181      for (int i = 0; i < sEval.Length; i++) {
182        Symbol currSym = s[i];
183        if (currSym is NonterminalSymbol && currSym != Expr)
184          return 0.0;
185
186        if (currSym == Expr) {
187          sEval[i] = Const;
188        } else {
189          sEval[i] = currSym;
190        }
191      }
192
193      SymbolicExpressionTree tree = ParseSymbolicExpressionTree(new SymbolString(sEval));
194      double r2 = RSquaredEvaluator.Evaluate(problemData, tree, optimizeConstants);
195
196      return r2;
197    }
198
199    #region Parse to SymbolicExpressionTree
200
201    public string ToInfixString(SymbolString sentence) {
202      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
203      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
204
205      return infixExpressionFormatter.Format(ParseSymbolicExpressionTree(sentence));
206    }
207
208    public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolString sentence) {
209      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
210      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
211
212      var rootNode = rootSy.CreateTreeNode();
213      var startNode = startSy.CreateTreeNode();
214      rootNode.AddSubtree(startNode);
215
216      Stack<TerminalSymbol> parseStack = new Stack<TerminalSymbol>(sentence.OfType<TerminalSymbol>());
217      startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack));
218
219      return new SymbolicExpressionTree(rootNode);
220    }
221
222    public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack<TerminalSymbol> parseStack) {
223      TerminalSymbol currentSymbol = parseStack.Pop();
224
225      ISymbolicExpressionTreeNode parsedSubTree = null;
226
227      if (currentSymbol == Addition) {
228        parsedSubTree = addSy.CreateTreeNode();
229        ISymbolicExpressionTreeNode rightSubtree = ParseSymbolicExpressionTree(parseStack);
230        if (rightSubtree is ConstantTreeNode) {
231          ((ConstantTreeNode)rightSubtree).Value = 0.0;
232        }
233        parsedSubTree.AddSubtree(rightSubtree); // left part
234
235        ISymbolicExpressionTreeNode leftSubtree = ParseSymbolicExpressionTree(parseStack);
236        if (leftSubtree is ConstantTreeNode) {
237          ((ConstantTreeNode)leftSubtree).Value = 0.0;
238        }
239        parsedSubTree.AddSubtree(leftSubtree); // right part
240
241      } else if (currentSymbol == Multiplication) {
242        parsedSubTree = mulSy.CreateTreeNode();
243        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
244        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
245
246      } else if (currentSymbol == Log) {
247        parsedSubTree = logSy.CreateTreeNode();
248        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
249
250      } else if (currentSymbol == Exp) {
251        parsedSubTree = expSy.CreateTreeNode();
252        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
253
254      } else if (currentSymbol == Sin) {
255        parsedSubTree = sinSy.CreateTreeNode();
256        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
257
258      } else if (currentSymbol == Inv) {
259        parsedSubTree = divSy.CreateTreeNode();
260        ConstantTreeNode dividend = (ConstantTreeNode)constSy.CreateTreeNode();
261        dividend.Value = 1.0;
262        parsedSubTree.AddSubtree(dividend);
263        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
264
265      } else if (currentSymbol == Const) {
266        ConstantTreeNode constNode = (ConstantTreeNode)constSy.CreateTreeNode();
267        constNode.Value = 1.0;
268        parsedSubTree = constNode;
269
270      } else if (currentSymbol is VariableTerminalSymbol) {
271        VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode();
272        varNode.Weight = 1.0;
273        varNode.VariableName = currentSymbol.StringRepresentation;
274        parsedSubTree = varNode;
275      }
276
277      Debug.Assert(parsedSubTree != null);
278      return parsedSubTree;
279    }
280    #endregion
281  }
282}
Note: See TracBrowser for help on using the repository browser.