Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs @ 16056

Last change on this file since 16056 was 16056, checked in by bburlacu, 6 years ago

#2886: Remove redundant EvaluatePhrase method in the Grammar class and fix compilation of tests.

File size: 14.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.Linq;
26using HeuristicLab.Common;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
29using HeuristicLab.Problems.DataAnalysis;
30using HeuristicLab.Problems.DataAnalysis.Symbolic;
31
32namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
33  public enum GrammarRule {
34    MultipleTerms,
35    MultipleFactors,
36    InverseTerm,
37    Logarithm,
38    Exponentiation,
39    Sine
40  }
41
42  [StorableClass(StorableClassType.AllFieldsAndAllProperties)]
43  public class Grammar : DeepCloneable {
44    public Symbol StartSymbol { get; private set; }
45
46    public Hasher<int> Hasher { get; private set; }
47
48    #region Symbols
49
50    public IReadOnlyDictionary<Symbol, IReadOnlyList<SymbolList>> Productions { get; private set; }
51
52    public NonterminalSymbol Var;
53    public IReadOnlyList<VariableTerminalSymbol> VarTerminals;
54
55    public NonterminalSymbol Expr;
56    public NonterminalSymbol Term;
57    public NonterminalSymbol Factor;
58    public NonterminalSymbol LogFactor;
59    public NonterminalSymbol ExpFactor;
60    public NonterminalSymbol SinFactor;
61
62    public NonterminalSymbol SimpleExpr;
63    public NonterminalSymbol SimpleTerm;
64
65    public NonterminalSymbol InvExpr;
66    public NonterminalSymbol InvTerm;
67
68    public TerminalSymbol Addition;
69    public TerminalSymbol Multiplication;
70    public TerminalSymbol Log;
71    public TerminalSymbol Exp;
72    public TerminalSymbol Sin;
73    public TerminalSymbol Inv;
74
75    public TerminalSymbol Const;
76
77    #endregion
78
79    #region HL Symbols for Parsing ExpressionTrees
80
81    private ISymbol constSy;
82    private ISymbol varSy;
83
84    private ISymbol addSy;
85    private ISymbol mulSy;
86    private ISymbol logSy;
87    private ISymbol expSy;
88    private ISymbol divSy;
89    private ISymbol sinSy;
90
91    private ISymbol rootSy;
92    private ISymbol startSy;
93
94    private InfixExpressionFormatter infixExpressionFormatter;
95    #endregion
96
97    public Grammar(string[] variables) : this(variables, Enum.GetValues(typeof(GrammarRule)).Cast<GrammarRule>()) { }
98
99    [StorableConstructor]
100    protected Grammar(bool deserializing) {
101      InitTreeParser();
102    }
103
104    protected Grammar(Grammar original, Cloner cloner) : base(original, cloner) {
105      infixExpressionFormatter = cloner.Clone(original.infixExpressionFormatter);
106
107      Productions = original.Productions.ToDictionary(x => cloner.Clone(x.Key), x => (IReadOnlyList<SymbolList>)x.Value.Select(cloner.Clone).ToList());
108      VarTerminals = original.VarTerminals.Select(cloner.Clone).ToList();
109
110      Var = (NonterminalSymbol)cloner.Clone(original.Var);
111      Expr = (NonterminalSymbol)cloner.Clone(original.Expr);
112      Term = (NonterminalSymbol)cloner.Clone(original.Term);
113      Factor = (NonterminalSymbol)cloner.Clone(original.Factor);
114      LogFactor = (NonterminalSymbol)cloner.Clone(original.LogFactor);
115      ExpFactor = (NonterminalSymbol)cloner.Clone(original.ExpFactor);
116      SinFactor = (NonterminalSymbol)cloner.Clone(original.SinFactor);
117      SimpleExpr = (NonterminalSymbol)cloner.Clone(original.SimpleExpr);
118      SimpleTerm = (NonterminalSymbol)cloner.Clone(original.SimpleTerm);
119      InvExpr = (NonterminalSymbol)cloner.Clone(original.InvExpr);
120      InvTerm = (NonterminalSymbol)cloner.Clone(original.InvTerm);
121
122      Addition = (TerminalSymbol)cloner.Clone(original.Addition);
123      Multiplication = (TerminalSymbol)cloner.Clone(original.Multiplication);
124      Log = (TerminalSymbol)cloner.Clone(original.Log);
125      Exp = (TerminalSymbol)cloner.Clone(original.Exp);
126      Sin = (TerminalSymbol)cloner.Clone(original.Sin);
127      Inv = (TerminalSymbol)cloner.Clone(original.Inv);
128      Const = (TerminalSymbol)cloner.Clone(original.Const);
129
130      StartSymbol = Expr;
131      Hasher = cloner.Clone(original.Hasher);
132
133      InitTreeParser(); // easier this way (and less typing)
134    }
135
136    private void InitProductions(string[] variables, IEnumerable<GrammarRule> includedRules) {
137      #region Define Symbols
138      Var = new NonterminalSymbol("Var");
139
140      Expr = new NonterminalSymbol("Expr");
141      Term = new NonterminalSymbol("Term");
142      Factor = new NonterminalSymbol("Factor");
143      LogFactor = new NonterminalSymbol("LogFactor");
144      ExpFactor = new NonterminalSymbol("ExpFactor");
145      SinFactor = new NonterminalSymbol("SinFactor");
146
147      SimpleExpr = new NonterminalSymbol("SimpleExpr");
148      SimpleTerm = new NonterminalSymbol("SimpleTerm");
149
150      InvExpr = new NonterminalSymbol("InvExpr");
151      InvTerm = new NonterminalSymbol("InvTerm");
152
153      Addition = new TerminalSymbol("+");
154      Multiplication = new TerminalSymbol("*");
155      Log = new TerminalSymbol("log");
156      Exp = new TerminalSymbol("exp");
157      Sin = new TerminalSymbol("sin");
158      Inv = new TerminalSymbol("inv");
159
160      Const = new TerminalSymbol("c");
161      #endregion
162
163      #region Production rules
164      StartSymbol = Expr;
165
166      Dictionary<Symbol, IReadOnlyList<SymbolList>> productions = new Dictionary<Symbol, IReadOnlyList<SymbolList>>();
167
168      // Map each variable to a separate production rule of the "Var" nonterminal symbol.
169      VarTerminals = variables.Select(v => new VariableTerminalSymbol(v)).ToArray();
170      productions[Var] = VarTerminals.Select(v => new SymbolList(v)).ToArray();
171
172      // Expression Grammar Rules
173      var exprProductions = new List<SymbolList>();
174      if (includedRules.Contains(GrammarRule.MultipleTerms))
175        exprProductions.Add(new SymbolList(Const, Term, Multiplication, Expr, Addition));
176
177      exprProductions.Add(new SymbolList(Const, Term, Multiplication, Const, Addition));
178      productions[Expr] = exprProductions.ToArray();
179
180      // Term Grammar Rules
181      var termProductions = new List<SymbolList>();
182      if (includedRules.Contains(GrammarRule.MultipleFactors))
183        termProductions.Add(new SymbolList(Factor, Term, Multiplication));
184      if (includedRules.Contains(GrammarRule.InverseTerm))
185        termProductions.Add(new SymbolList(InvExpr, Inv));
186      termProductions.Add(new SymbolList(Factor));
187      productions[Term] = termProductions.ToArray();
188
189      // Factor Grammar Rules
190      var factorProductions = new List<SymbolList>();
191      factorProductions.Add(new SymbolList(Var));
192      if (includedRules.Contains(GrammarRule.Logarithm))
193        factorProductions.Add(new SymbolList(LogFactor));
194      if (includedRules.Contains(GrammarRule.Exponentiation))
195        factorProductions.Add(new SymbolList(ExpFactor));
196      if (includedRules.Contains(GrammarRule.Sine))
197        factorProductions.Add(new SymbolList(SinFactor));
198      productions[Factor] = factorProductions.ToArray();
199
200      productions[LogFactor] = new[] { new SymbolList(SimpleExpr, Log) };
201      productions[ExpFactor] = new[] { new SymbolList(Const, SimpleTerm, Multiplication, Exp) };
202      productions[SinFactor] = new[] { new SymbolList(SimpleExpr, Sin) };
203
204      productions[SimpleExpr] = new[] {
205        new SymbolList(Const, SimpleTerm, Multiplication, SimpleExpr, Addition),
206        new SymbolList(Const, SimpleTerm, Multiplication, Const, Addition)
207      };
208
209      productions[SimpleTerm] = new[] {
210        new SymbolList(Var, SimpleTerm, Multiplication),
211        new SymbolList(Var)
212      };
213
214      productions[InvExpr] = new[] {
215        new SymbolList(Const, InvTerm, Multiplication, InvExpr, Addition),
216        new SymbolList(Const, InvTerm, Multiplication, Const, Addition)
217      };
218
219      productions[InvTerm] = new[] {
220        new SymbolList(Factor, InvTerm, Multiplication),
221        new SymbolList(Factor)
222      };
223
224      Productions = productions;
225      #endregion
226    }
227
228    private void InitTreeParser() {
229      #region Parsing to SymbolicExpressionTree
230      var symbolicExpressionGrammar = new TypeCoherentExpressionGrammar();
231      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
232
233      constSy = symbolicExpressionGrammar.Symbols.OfType<Constant>().First();
234      varSy = symbolicExpressionGrammar.Symbols.OfType<Variable>().First();
235      addSy = symbolicExpressionGrammar.Symbols.OfType<Addition>().First();
236      mulSy = symbolicExpressionGrammar.Symbols.OfType<Multiplication>().First();
237      logSy = symbolicExpressionGrammar.Symbols.OfType<Logarithm>().First();
238      expSy = symbolicExpressionGrammar.Symbols.OfType<Exponential>().First();
239      divSy = symbolicExpressionGrammar.Symbols.OfType<Division>().First();
240      sinSy = symbolicExpressionGrammar.Symbols.OfType<Sine>().First();
241
242      rootSy = symbolicExpressionGrammar.Symbols.OfType<ProgramRootSymbol>().First();
243      startSy = symbolicExpressionGrammar.Symbols.OfType<StartSymbol>().First();
244
245      infixExpressionFormatter = new InfixExpressionFormatter();
246      #endregion
247    }
248
249    public Grammar(string[] variables, IEnumerable<GrammarRule> includedRules) {
250      InitProductions(variables, includedRules);
251      InitTreeParser();
252
253      Hasher = new IntHasher(this);
254    }
255
256    /// <summary>
257    /// returns the maximum achievable sentence length below the maximum complexity
258    /// TODO: figure out a way to calculate this accurately
259    /// </summary>
260    /// <param name="maxComplexity"></param>
261    /// <returns>Maximum sentence length according to the grammar and the maximum complexity</returns>
262    public int GetMaxSentenceLength(int maxComplexity) {
263      SymbolList s = new SymbolList(StartSymbol);
264
265      while (!s.IsSentence() && s.Complexity <= maxComplexity) {
266        int expandedSymbolIndex = s.NextNonterminalIndex();
267        NonterminalSymbol expandedSymbol = (NonterminalSymbol)s[expandedSymbolIndex];
268
269        var productions = Productions[expandedSymbol];
270        var longestProduction = productions                // Find production with most terminal symbols to expand as much as possible...
271          .OrderBy(x => x.TerminalSymbolCount)             // but with lowest complexity/nonterminal count to keep complexity low.                                                                                     
272          .ThenByDescending(x => x.NonTerminalSymbolCount)
273          .First();
274
275        s = s.DerivePhrase(expandedSymbolIndex, longestProduction);
276      }
277      return s.Count;
278    }
279
280    #region Parse to SymbolicExpressionTree
281    public string ToInfixString(SymbolList sentence) {
282      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
283      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
284
285      return infixExpressionFormatter.Format(ParseSymbolicExpressionTree(sentence));
286    }
287
288    public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolList sentence) {
289      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
290
291      var rootNode = rootSy.CreateTreeNode();
292      var startNode = startSy.CreateTreeNode();
293      rootNode.AddSubtree(startNode);
294
295      Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
296      startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack));
297
298      return new SymbolicExpressionTree(rootNode);
299    }
300
301    public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack<Symbol> parseStack) {
302      Symbol currentSymbol = parseStack.Pop();
303
304      ISymbolicExpressionTreeNode parsedSubTree = null;
305
306      if (currentSymbol == Addition) {
307        parsedSubTree = addSy.CreateTreeNode();
308        ISymbolicExpressionTreeNode rightSubtree = ParseSymbolicExpressionTree(parseStack);
309        if (rightSubtree is ConstantTreeNode) {
310          ((ConstantTreeNode)rightSubtree).Value = 0.0;
311        }
312        parsedSubTree.AddSubtree(rightSubtree); // left part
313
314        ISymbolicExpressionTreeNode leftSubtree = ParseSymbolicExpressionTree(parseStack);
315        if (leftSubtree is ConstantTreeNode) {
316          ((ConstantTreeNode)leftSubtree).Value = 0.0;
317        }
318        parsedSubTree.AddSubtree(leftSubtree); // right part
319
320      } else if (currentSymbol == Multiplication) {
321        parsedSubTree = mulSy.CreateTreeNode();
322        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
323        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
324
325      } else if (currentSymbol == Log) {
326        parsedSubTree = logSy.CreateTreeNode();
327        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
328
329      } else if (currentSymbol == Exp) {
330        parsedSubTree = expSy.CreateTreeNode();
331        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
332
333      } else if (currentSymbol == Sin) {
334        parsedSubTree = sinSy.CreateTreeNode();
335        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
336
337      } else if (currentSymbol == Inv) {
338        parsedSubTree = divSy.CreateTreeNode();
339        ConstantTreeNode dividend = (ConstantTreeNode)constSy.CreateTreeNode();
340        dividend.Value = 1.0;
341        parsedSubTree.AddSubtree(dividend);
342        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
343
344      } else if (currentSymbol == Const) {
345        ConstantTreeNode constNode = (ConstantTreeNode)constSy.CreateTreeNode();
346        constNode.Value = 1.0;
347        parsedSubTree = constNode;
348
349      } else if (currentSymbol is VariableTerminalSymbol) {
350        VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode();
351        varNode.Weight = 1.0;
352        varNode.VariableName = currentSymbol.StringRepresentation;
353        parsedSubTree = varNode;
354
355      } else if (currentSymbol is NonterminalSymbol) {
356        ConstantTreeNode constNode = (ConstantTreeNode)constSy.CreateTreeNode();
357        constNode.Value = 0.0;
358        parsedSubTree = constNode;
359      }
360
361      Debug.Assert(parsedSubTree != null);
362      return parsedSubTree;
363    }
364    #endregion
365
366    #region abstract DeepCloneable methods
367    public override IDeepCloneable Clone(Cloner cloner) {
368      return new Grammar(this, cloner);
369    }
370    #endregion
371  }
372}
Note: See TracBrowser for help on using the repository browser.