source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs @ 15975

Last change on this file since 15975 was 15975, checked in by bburlacu, 3 years ago

#2886: address additional serialization issues, make Production implement IList<T> instead of deriving from List<T>

File size: 13.4 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
6using HeuristicLab.Common;
7using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
8using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
9using HeuristicLab.Problems.DataAnalysis;
10using HeuristicLab.Problems.DataAnalysis.Symbolic;
11
12namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
13  public enum GrammarRule {
14    MultipleTerms,
15    MultipleFactors,
16    InverseTerm,
17    Logarithm,
18    Exponentiation,
19    Sine
20  }
21
22  [StorableClass(StorableClassType.AllFieldsAndAllProperties)]
23  public class Grammar : DeepCloneable {
24    public Symbol StartSymbol { get; private set; }
25
26    public Hasher<int> Hasher { get; private set; }
27
28    #region Symbols
29
30    public IReadOnlyDictionary<Symbol, IReadOnlyList<Production>> Productions { get; private set; }
31
32    public NonterminalSymbol Var;
33    public IReadOnlyList<VariableTerminalSymbol> VarTerminals;
34
35    public NonterminalSymbol Expr;
36    public NonterminalSymbol Term;
37    public NonterminalSymbol Factor;
38    public NonterminalSymbol LogFactor;
39    public NonterminalSymbol ExpFactor;
40    public NonterminalSymbol SinFactor;
41
42    public NonterminalSymbol SimpleExpr;
43    public NonterminalSymbol SimpleTerm;
44
45    public NonterminalSymbol InvExpr;
46    public NonterminalSymbol InvTerm;
47
48    public TerminalSymbol Addition;
49    public TerminalSymbol Multiplication;
50    public TerminalSymbol Log;
51    public TerminalSymbol Exp;
52    public TerminalSymbol Sin;
53    public TerminalSymbol Inv;
54
55    public TerminalSymbol Const;
56
57    #endregion
58
59    #region HL Symbols for Parsing ExpressionTrees
60
61    private ISymbol constSy;
62    private ISymbol varSy;
63
64    private ISymbol addSy;
65    private ISymbol mulSy;
66    private ISymbol logSy;
67    private ISymbol expSy;
68    private ISymbol divSy;
69    private ISymbol sinSy;
70
71    private ISymbol rootSy;
72    private ISymbol startSy;
73
74    private InfixExpressionFormatter infixExpressionFormatter;
75    #endregion
76
77    public Grammar(string[] variables) : this(variables, Enum.GetValues(typeof(GrammarRule)).Cast<GrammarRule>()) { }
78
79    [StorableConstructor]
80    protected Grammar(bool deserializing) {
81      InitTreeParser();
82    }
83
84    protected Grammar(Grammar original, Cloner cloner) : base(original, cloner) {
85      infixExpressionFormatter = cloner.Clone(original.infixExpressionFormatter);
86
87      Productions = original.Productions.ToDictionary(x => cloner.Clone(x.Key), x => (IReadOnlyList<Production>)x.Value.Select(cloner.Clone).ToList());
88      VarTerminals = original.VarTerminals.Select(cloner.Clone).ToList();
89
90      Var = (NonterminalSymbol)cloner.Clone(original.Var);
91      Expr = (NonterminalSymbol)cloner.Clone(original.Expr);
92      Term = (NonterminalSymbol)cloner.Clone(original.Term);
93      Factor = (NonterminalSymbol)cloner.Clone(original.Factor);
94      LogFactor = (NonterminalSymbol)cloner.Clone(original.LogFactor);
95      ExpFactor = (NonterminalSymbol)cloner.Clone(original.ExpFactor);
96      SinFactor = (NonterminalSymbol)cloner.Clone(original.SinFactor);
97      SimpleExpr = (NonterminalSymbol)cloner.Clone(original.SimpleExpr);
98      SimpleTerm = (NonterminalSymbol)cloner.Clone(original.SimpleTerm);
99      InvExpr = (NonterminalSymbol)cloner.Clone(original.InvExpr);
100      InvTerm = (NonterminalSymbol)cloner.Clone(original.InvTerm);
101
102      Addition = (TerminalSymbol)cloner.Clone(original.Addition);
103      Multiplication = (TerminalSymbol)cloner.Clone(original.Multiplication);
104      Log = (TerminalSymbol)cloner.Clone(original.Log);
105      Exp = (TerminalSymbol)cloner.Clone(original.Exp);
106      Sin = (TerminalSymbol)cloner.Clone(original.Sin);
107      Inv = (TerminalSymbol)cloner.Clone(original.Inv);
108      Const = (TerminalSymbol)cloner.Clone(original.Const);
109
110      StartSymbol = Expr;
111      Hasher = cloner.Clone(original.Hasher);
112
113      InitTreeParser(); // easier this way (and less typing)
114    }
115
116    private void InitProductions(string[] variables, IEnumerable<GrammarRule> includedRules) {
117      #region Define Symbols
118      Var = new NonterminalSymbol("Var");
119
120      Expr = new NonterminalSymbol("Expr");
121      Term = new NonterminalSymbol("Term");
122      Factor = new NonterminalSymbol("Factor");
123      LogFactor = new NonterminalSymbol("LogFactor");
124      ExpFactor = new NonterminalSymbol("ExpFactor");
125      SinFactor = new NonterminalSymbol("SinFactor");
126
127      SimpleExpr = new NonterminalSymbol("SimpleExpr");
128      SimpleTerm = new NonterminalSymbol("SimpleTerm");
129
130      InvExpr = new NonterminalSymbol("InvExpr");
131      InvTerm = new NonterminalSymbol("InvTerm");
132
133      Addition = new TerminalSymbol("+");
134      Multiplication = new TerminalSymbol("*");
135      Log = new TerminalSymbol("log");
136      Exp = new TerminalSymbol("exp");
137      Sin = new TerminalSymbol("sin");
138      Inv = new TerminalSymbol("inv");
139
140      Const = new TerminalSymbol("c");
141      #endregion
142
143      #region Production rules
144      StartSymbol = Expr;
145
146      Dictionary<Symbol, IReadOnlyList<Production>> productions = new Dictionary<Symbol, IReadOnlyList<Production>>();
147
148      // Map each variable to a separate production rule of the "Var" nonterminal symbol.
149      VarTerminals = variables.Select(v => new VariableTerminalSymbol(v)).ToArray();
150      productions[Var] = VarTerminals.Select(v => new Production(v)).ToArray();
151
152      // Expression Grammar Rules
153      var exprProductions = new List<Production>();
154      if (includedRules.Contains(GrammarRule.MultipleTerms))
155        exprProductions.Add(new Production(Const, Term, Multiplication, Expr, Addition));
156
157      exprProductions.Add(new Production(Const, Term, Multiplication, Const, Addition));
158      productions[Expr] = exprProductions.ToArray();
159
160      // Term Grammar Rules
161      var termProductions = new List<Production>();
162      if (includedRules.Contains(GrammarRule.MultipleFactors))
163        termProductions.Add(new Production(Factor, Term, Multiplication));
164      if (includedRules.Contains(GrammarRule.InverseTerm))
165        termProductions.Add(new Production(InvExpr, Inv));
166      termProductions.Add(new Production(Factor));
167      productions[Term] = termProductions.ToArray();
168
169      // Factor Grammar Rules
170      var factorProductions = new List<Production>();
171      factorProductions.Add(new Production(Var));
172      if (includedRules.Contains(GrammarRule.Logarithm))
173        factorProductions.Add(new Production(LogFactor));
174      if (includedRules.Contains(GrammarRule.Exponentiation))
175        factorProductions.Add(new Production(ExpFactor));
176      if (includedRules.Contains(GrammarRule.Sine))
177        factorProductions.Add(new Production(SinFactor));
178      productions[Factor] = factorProductions.ToArray();
179
180      productions[LogFactor] = new[] { new Production(SimpleExpr, Log) };
181      productions[ExpFactor] = new[] { new Production(Const, SimpleTerm, Multiplication, Exp) };
182      productions[SinFactor] = new[] { new Production(SimpleExpr, Sin) };
183
184      productions[SimpleExpr] = new[] {
185        new Production(Const, SimpleTerm, Multiplication, SimpleExpr, Addition),
186        new Production(Const, SimpleTerm, Multiplication, Const, Addition)
187      };
188
189      productions[SimpleTerm] = new[] {
190        new Production(Var, SimpleTerm, Multiplication),
191        new Production(Var)
192      };
193
194      productions[InvExpr] = new[] {
195        new Production(Const, InvTerm, Multiplication, InvExpr, Addition),
196        new Production(Const, InvTerm, Multiplication, Const, Addition)
197      };
198
199      productions[InvTerm] = new[] {
200        new Production(Factor, InvTerm, Multiplication),
201        new Production(Factor)
202      };
203
204      Productions = productions;
205      #endregion
206    }
207
208    private void InitTreeParser() {
209      #region Parsing to SymbolicExpressionTree
210      var symbolicExpressionGrammar = new TypeCoherentExpressionGrammar();
211      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
212
213      constSy = symbolicExpressionGrammar.Symbols.OfType<Constant>().First();
214      varSy = symbolicExpressionGrammar.Symbols.OfType<Variable>().First();
215      addSy = symbolicExpressionGrammar.Symbols.OfType<Addition>().First();
216      mulSy = symbolicExpressionGrammar.Symbols.OfType<Multiplication>().First();
217      logSy = symbolicExpressionGrammar.Symbols.OfType<Logarithm>().First();
218      expSy = symbolicExpressionGrammar.Symbols.OfType<Exponential>().First();
219      divSy = symbolicExpressionGrammar.Symbols.OfType<Division>().First();
220      sinSy = symbolicExpressionGrammar.Symbols.OfType<Sine>().First();
221
222      rootSy = symbolicExpressionGrammar.Symbols.OfType<ProgramRootSymbol>().First();
223      startSy = symbolicExpressionGrammar.Symbols.OfType<StartSymbol>().First();
224
225      infixExpressionFormatter = new InfixExpressionFormatter();
226      #endregion
227    }
228
229    public Grammar(string[] variables, IEnumerable<GrammarRule> includedRules) {
230      InitProductions(variables, includedRules);
231      InitTreeParser();
232
233      Hasher = new IntHasher(this);
234    }
235
236    public int GetComplexity(SymbolString s) {
237      int c = 0;
238      int length = s.Count();
239      for (int i = 0; i < length; i++) {
240        if (s[i] is NonterminalSymbol || s[i] is VariableTerminalSymbol) c++;
241      }
242      return c;
243    }
244
245    public double EvaluatePhrase(SymbolString s, IRegressionProblemData problemData, bool optimizeConstants) {
246      SymbolicExpressionTree tree = ParseSymbolicExpressionTree(s);
247
248      return RSquaredEvaluator.Evaluate(problemData, tree, optimizeConstants);
249    }
250
251    #region Parse to SymbolicExpressionTree
252
253    public string ToInfixString(SymbolString sentence) {
254      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
255      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
256
257      return infixExpressionFormatter.Format(ParseSymbolicExpressionTree(sentence));
258    }
259
260    public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolString sentence) {
261      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
262
263      var rootNode = rootSy.CreateTreeNode();
264      var startNode = startSy.CreateTreeNode();
265      rootNode.AddSubtree(startNode);
266
267      Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
268      startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack));
269
270      return new SymbolicExpressionTree(rootNode);
271    }
272
273    public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack<Symbol> parseStack) {
274      Symbol currentSymbol = parseStack.Pop();
275
276      ISymbolicExpressionTreeNode parsedSubTree = null;
277
278      if (currentSymbol == Addition) {
279        parsedSubTree = addSy.CreateTreeNode();
280        ISymbolicExpressionTreeNode rightSubtree = ParseSymbolicExpressionTree(parseStack);
281        if (rightSubtree is ConstantTreeNode) {
282          ((ConstantTreeNode)rightSubtree).Value = 0.0;
283        }
284        parsedSubTree.AddSubtree(rightSubtree); // left part
285
286        ISymbolicExpressionTreeNode leftSubtree = ParseSymbolicExpressionTree(parseStack);
287        if (leftSubtree is ConstantTreeNode) {
288          ((ConstantTreeNode)leftSubtree).Value = 0.0;
289        }
290        parsedSubTree.AddSubtree(leftSubtree); // right part
291
292      } else if (currentSymbol == Multiplication) {
293        parsedSubTree = mulSy.CreateTreeNode();
294        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
295        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
296
297      } else if (currentSymbol == Log) {
298        parsedSubTree = logSy.CreateTreeNode();
299        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
300
301      } else if (currentSymbol == Exp) {
302        parsedSubTree = expSy.CreateTreeNode();
303        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
304
305      } else if (currentSymbol == Sin) {
306        parsedSubTree = sinSy.CreateTreeNode();
307        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
308
309      } else if (currentSymbol == Inv) {
310        parsedSubTree = divSy.CreateTreeNode();
311        ConstantTreeNode dividend = (ConstantTreeNode)constSy.CreateTreeNode();
312        dividend.Value = 1.0;
313        parsedSubTree.AddSubtree(dividend);
314        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
315
316      } else if (currentSymbol == Const) {
317        ConstantTreeNode constNode = (ConstantTreeNode)constSy.CreateTreeNode();
318        constNode.Value = 1.0;
319        parsedSubTree = constNode;
320
321      } else if (currentSymbol is VariableTerminalSymbol) {
322        VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode();
323        varNode.Weight = 1.0;
324        varNode.VariableName = currentSymbol.StringRepresentation;
325        parsedSubTree = varNode;
326
327      } else if (currentSymbol is NonterminalSymbol) {
328        ConstantTreeNode constNode = (ConstantTreeNode)constSy.CreateTreeNode();
329        constNode.Value = 0.0;
330        parsedSubTree = constNode;
331      }
332
333      Debug.Assert(parsedSubTree != null);
334      return parsedSubTree;
335    }
336    #endregion
337
338    #region abstract DeepCloneable methods
339    public override IDeepCloneable Clone(Cloner cloner) {
340      return new Grammar(this, cloner);
341    }
342    #endregion
343  }
344}
Note: See TracBrowser for help on using the repository browser.