Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2886_SymRegGrammarEnumeration/HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration/GrammarEnumeration/Grammar.cs @ 16026

Last change on this file since 16026 was 16026, checked in by bburlacu, 6 years ago

#2886:

  • replace functionally-overlapping classes Production and SymbolString with a single class SymbolList
  • refactor methods from Grammar class as methods and properties of SymbolList
  • add parameter for the number of constant optimization iterations
  • refactor code
File size: 14.1 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration.GrammarEnumeration;
6using HeuristicLab.Common;
7using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
8using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
9using HeuristicLab.Problems.DataAnalysis;
10using HeuristicLab.Problems.DataAnalysis.Symbolic;
11
12namespace HeuristicLab.Algorithms.DataAnalysis.SymRegGrammarEnumeration {
13  public enum GrammarRule {
14    MultipleTerms,
15    MultipleFactors,
16    InverseTerm,
17    Logarithm,
18    Exponentiation,
19    Sine
20  }
21
22  [StorableClass(StorableClassType.AllFieldsAndAllProperties)]
23  public class Grammar : DeepCloneable {
24    public Symbol StartSymbol { get; private set; }
25
26    public Hasher<int> Hasher { get; private set; }
27
28    #region Symbols
29
30    public IReadOnlyDictionary<Symbol, IReadOnlyList<SymbolList>> Productions { get; private set; }
31
32    public NonterminalSymbol Var;
33    public IReadOnlyList<VariableTerminalSymbol> VarTerminals;
34
35    public NonterminalSymbol Expr;
36    public NonterminalSymbol Term;
37    public NonterminalSymbol Factor;
38    public NonterminalSymbol LogFactor;
39    public NonterminalSymbol ExpFactor;
40    public NonterminalSymbol SinFactor;
41
42    public NonterminalSymbol SimpleExpr;
43    public NonterminalSymbol SimpleTerm;
44
45    public NonterminalSymbol InvExpr;
46    public NonterminalSymbol InvTerm;
47
48    public TerminalSymbol Addition;
49    public TerminalSymbol Multiplication;
50    public TerminalSymbol Log;
51    public TerminalSymbol Exp;
52    public TerminalSymbol Sin;
53    public TerminalSymbol Inv;
54
55    public TerminalSymbol Const;
56
57    #endregion
58
59    #region HL Symbols for Parsing ExpressionTrees
60
61    private ISymbol constSy;
62    private ISymbol varSy;
63
64    private ISymbol addSy;
65    private ISymbol mulSy;
66    private ISymbol logSy;
67    private ISymbol expSy;
68    private ISymbol divSy;
69    private ISymbol sinSy;
70
71    private ISymbol rootSy;
72    private ISymbol startSy;
73
74    private InfixExpressionFormatter infixExpressionFormatter;
75    #endregion
76
77    public Grammar(string[] variables) : this(variables, Enum.GetValues(typeof(GrammarRule)).Cast<GrammarRule>()) { }
78
79    [StorableConstructor]
80    protected Grammar(bool deserializing) {
81      InitTreeParser();
82    }
83
84    protected Grammar(Grammar original, Cloner cloner) : base(original, cloner) {
85      infixExpressionFormatter = cloner.Clone(original.infixExpressionFormatter);
86
87      Productions = original.Productions.ToDictionary(x => cloner.Clone(x.Key), x => (IReadOnlyList<SymbolList>)x.Value.Select(cloner.Clone).ToList());
88      VarTerminals = original.VarTerminals.Select(cloner.Clone).ToList();
89
90      Var = (NonterminalSymbol)cloner.Clone(original.Var);
91      Expr = (NonterminalSymbol)cloner.Clone(original.Expr);
92      Term = (NonterminalSymbol)cloner.Clone(original.Term);
93      Factor = (NonterminalSymbol)cloner.Clone(original.Factor);
94      LogFactor = (NonterminalSymbol)cloner.Clone(original.LogFactor);
95      ExpFactor = (NonterminalSymbol)cloner.Clone(original.ExpFactor);
96      SinFactor = (NonterminalSymbol)cloner.Clone(original.SinFactor);
97      SimpleExpr = (NonterminalSymbol)cloner.Clone(original.SimpleExpr);
98      SimpleTerm = (NonterminalSymbol)cloner.Clone(original.SimpleTerm);
99      InvExpr = (NonterminalSymbol)cloner.Clone(original.InvExpr);
100      InvTerm = (NonterminalSymbol)cloner.Clone(original.InvTerm);
101
102      Addition = (TerminalSymbol)cloner.Clone(original.Addition);
103      Multiplication = (TerminalSymbol)cloner.Clone(original.Multiplication);
104      Log = (TerminalSymbol)cloner.Clone(original.Log);
105      Exp = (TerminalSymbol)cloner.Clone(original.Exp);
106      Sin = (TerminalSymbol)cloner.Clone(original.Sin);
107      Inv = (TerminalSymbol)cloner.Clone(original.Inv);
108      Const = (TerminalSymbol)cloner.Clone(original.Const);
109
110      StartSymbol = Expr;
111      Hasher = cloner.Clone(original.Hasher);
112
113      InitTreeParser(); // easier this way (and less typing)
114    }
115
116    private void InitProductions(string[] variables, IEnumerable<GrammarRule> includedRules) {
117      #region Define Symbols
118      Var = new NonterminalSymbol("Var");
119
120      Expr = new NonterminalSymbol("Expr");
121      Term = new NonterminalSymbol("Term");
122      Factor = new NonterminalSymbol("Factor");
123      LogFactor = new NonterminalSymbol("LogFactor");
124      ExpFactor = new NonterminalSymbol("ExpFactor");
125      SinFactor = new NonterminalSymbol("SinFactor");
126
127      SimpleExpr = new NonterminalSymbol("SimpleExpr");
128      SimpleTerm = new NonterminalSymbol("SimpleTerm");
129
130      InvExpr = new NonterminalSymbol("InvExpr");
131      InvTerm = new NonterminalSymbol("InvTerm");
132
133      Addition = new TerminalSymbol("+");
134      Multiplication = new TerminalSymbol("*");
135      Log = new TerminalSymbol("log");
136      Exp = new TerminalSymbol("exp");
137      Sin = new TerminalSymbol("sin");
138      Inv = new TerminalSymbol("inv");
139
140      Const = new TerminalSymbol("c");
141      #endregion
142
143      #region Production rules
144      StartSymbol = Expr;
145
146      Dictionary<Symbol, IReadOnlyList<SymbolList>> productions = new Dictionary<Symbol, IReadOnlyList<SymbolList>>();
147
148      // Map each variable to a separate production rule of the "Var" nonterminal symbol.
149      VarTerminals = variables.Select(v => new VariableTerminalSymbol(v)).ToArray();
150      productions[Var] = VarTerminals.Select(v => new SymbolList(v)).ToArray();
151
152      // Expression Grammar Rules
153      var exprProductions = new List<SymbolList>();
154      if (includedRules.Contains(GrammarRule.MultipleTerms))
155        exprProductions.Add(new SymbolList(Const, Term, Multiplication, Expr, Addition));
156
157      exprProductions.Add(new SymbolList(Const, Term, Multiplication, Const, Addition));
158      productions[Expr] = exprProductions.ToArray();
159
160      // Term Grammar Rules
161      var termProductions = new List<SymbolList>();
162      if (includedRules.Contains(GrammarRule.MultipleFactors))
163        termProductions.Add(new SymbolList(Factor, Term, Multiplication));
164      if (includedRules.Contains(GrammarRule.InverseTerm))
165        termProductions.Add(new SymbolList(InvExpr, Inv));
166      termProductions.Add(new SymbolList(Factor));
167      productions[Term] = termProductions.ToArray();
168
169      // Factor Grammar Rules
170      var factorProductions = new List<SymbolList>();
171      factorProductions.Add(new SymbolList(Var));
172      if (includedRules.Contains(GrammarRule.Logarithm))
173        factorProductions.Add(new SymbolList(LogFactor));
174      if (includedRules.Contains(GrammarRule.Exponentiation))
175        factorProductions.Add(new SymbolList(ExpFactor));
176      if (includedRules.Contains(GrammarRule.Sine))
177        factorProductions.Add(new SymbolList(SinFactor));
178      productions[Factor] = factorProductions.ToArray();
179
180      productions[LogFactor] = new[] { new SymbolList(SimpleExpr, Log) };
181      productions[ExpFactor] = new[] { new SymbolList(Const, SimpleTerm, Multiplication, Exp) };
182      productions[SinFactor] = new[] { new SymbolList(SimpleExpr, Sin) };
183
184      productions[SimpleExpr] = new[] {
185        new SymbolList(Const, SimpleTerm, Multiplication, SimpleExpr, Addition),
186        new SymbolList(Const, SimpleTerm, Multiplication, Const, Addition)
187      };
188
189      productions[SimpleTerm] = new[] {
190        new SymbolList(Var, SimpleTerm, Multiplication),
191        new SymbolList(Var)
192      };
193
194      productions[InvExpr] = new[] {
195        new SymbolList(Const, InvTerm, Multiplication, InvExpr, Addition),
196        new SymbolList(Const, InvTerm, Multiplication, Const, Addition)
197      };
198
199      productions[InvTerm] = new[] {
200        new SymbolList(Factor, InvTerm, Multiplication),
201        new SymbolList(Factor)
202      };
203
204      Productions = productions;
205      #endregion
206    }
207
208    private void InitTreeParser() {
209      #region Parsing to SymbolicExpressionTree
210      var symbolicExpressionGrammar = new TypeCoherentExpressionGrammar();
211      symbolicExpressionGrammar.ConfigureAsDefaultRegressionGrammar();
212
213      constSy = symbolicExpressionGrammar.Symbols.OfType<Constant>().First();
214      varSy = symbolicExpressionGrammar.Symbols.OfType<Variable>().First();
215      addSy = symbolicExpressionGrammar.Symbols.OfType<Addition>().First();
216      mulSy = symbolicExpressionGrammar.Symbols.OfType<Multiplication>().First();
217      logSy = symbolicExpressionGrammar.Symbols.OfType<Logarithm>().First();
218      expSy = symbolicExpressionGrammar.Symbols.OfType<Exponential>().First();
219      divSy = symbolicExpressionGrammar.Symbols.OfType<Division>().First();
220      sinSy = symbolicExpressionGrammar.Symbols.OfType<Sine>().First();
221
222      rootSy = symbolicExpressionGrammar.Symbols.OfType<ProgramRootSymbol>().First();
223      startSy = symbolicExpressionGrammar.Symbols.OfType<StartSymbol>().First();
224
225      infixExpressionFormatter = new InfixExpressionFormatter();
226      #endregion
227    }
228
229    public Grammar(string[] variables, IEnumerable<GrammarRule> includedRules) {
230      InitProductions(variables, includedRules);
231      InitTreeParser();
232
233      Hasher = new IntHasher(this);
234    }
235
236    // returns the maximum achievable sentence length below the maximum complexity
237    public int GetMaxSentenceLength(int maxComplexity) {
238      SymbolList s = new SymbolList(StartSymbol);
239
240      while (!s.IsSentence() && s.Complexity <= maxComplexity) {
241        int expandedSymbolIndex = s.NextNonterminalIndex();
242        NonterminalSymbol expandedSymbol = (NonterminalSymbol)s[expandedSymbolIndex];
243
244        var productions = Productions[expandedSymbol];
245        var longestProduction = productions                // Find production with most terminal symbols to expand as much as possible...
246          .OrderBy(x => x.TerminalSymbolCount)             // but with lowest complexity/nonterminal count to keep complexity low.                                                                                     
247          .ThenByDescending(x => x.NonTerminalSymbolCount)
248          .First();
249
250        s = s.DerivePhrase(expandedSymbolIndex, longestProduction);
251      }
252      return s.Count;
253    }
254
255    public double EvaluatePhrase(SymbolList s, IRegressionProblemData problemData, bool optimizeConstants, int iterations) {
256      SymbolicExpressionTree tree = ParseSymbolicExpressionTree(s);
257
258      return RSquaredEvaluator.Evaluate(problemData, tree, optimizeConstants, iterations);
259    }
260
261    #region Parse to SymbolicExpressionTree
262
263    public string ToInfixString(SymbolList sentence) {
264      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
265      Debug.Assert(sentence.All(s => s is TerminalSymbol), "Trying to evaluate symbol sequence with nonterminalsymbols!");
266
267      return infixExpressionFormatter.Format(ParseSymbolicExpressionTree(sentence));
268    }
269
270    public SymbolicExpressionTree ParseSymbolicExpressionTree(SymbolList sentence) {
271      Debug.Assert(sentence.Any(), "Trying to evaluate empty sentence!");
272
273      var rootNode = rootSy.CreateTreeNode();
274      var startNode = startSy.CreateTreeNode();
275      rootNode.AddSubtree(startNode);
276
277      Stack<Symbol> parseStack = new Stack<Symbol>(sentence);
278      startNode.AddSubtree(ParseSymbolicExpressionTree(parseStack));
279
280      return new SymbolicExpressionTree(rootNode);
281    }
282
283    public ISymbolicExpressionTreeNode ParseSymbolicExpressionTree(Stack<Symbol> parseStack) {
284      Symbol currentSymbol = parseStack.Pop();
285
286      ISymbolicExpressionTreeNode parsedSubTree = null;
287
288      if (currentSymbol == Addition) {
289        parsedSubTree = addSy.CreateTreeNode();
290        ISymbolicExpressionTreeNode rightSubtree = ParseSymbolicExpressionTree(parseStack);
291        if (rightSubtree is ConstantTreeNode) {
292          ((ConstantTreeNode)rightSubtree).Value = 0.0;
293        }
294        parsedSubTree.AddSubtree(rightSubtree); // left part
295
296        ISymbolicExpressionTreeNode leftSubtree = ParseSymbolicExpressionTree(parseStack);
297        if (leftSubtree is ConstantTreeNode) {
298          ((ConstantTreeNode)leftSubtree).Value = 0.0;
299        }
300        parsedSubTree.AddSubtree(leftSubtree); // right part
301
302      } else if (currentSymbol == Multiplication) {
303        parsedSubTree = mulSy.CreateTreeNode();
304        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // left part
305        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack)); // right part
306
307      } else if (currentSymbol == Log) {
308        parsedSubTree = logSy.CreateTreeNode();
309        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
310
311      } else if (currentSymbol == Exp) {
312        parsedSubTree = expSy.CreateTreeNode();
313        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
314
315      } else if (currentSymbol == Sin) {
316        parsedSubTree = sinSy.CreateTreeNode();
317        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
318
319      } else if (currentSymbol == Inv) {
320        parsedSubTree = divSy.CreateTreeNode();
321        ConstantTreeNode dividend = (ConstantTreeNode)constSy.CreateTreeNode();
322        dividend.Value = 1.0;
323        parsedSubTree.AddSubtree(dividend);
324        parsedSubTree.AddSubtree(ParseSymbolicExpressionTree(parseStack));
325
326      } else if (currentSymbol == Const) {
327        ConstantTreeNode constNode = (ConstantTreeNode)constSy.CreateTreeNode();
328        constNode.Value = 1.0;
329        parsedSubTree = constNode;
330
331      } else if (currentSymbol is VariableTerminalSymbol) {
332        VariableTreeNode varNode = (VariableTreeNode)varSy.CreateTreeNode();
333        varNode.Weight = 1.0;
334        varNode.VariableName = currentSymbol.StringRepresentation;
335        parsedSubTree = varNode;
336
337      } else if (currentSymbol is NonterminalSymbol) {
338        ConstantTreeNode constNode = (ConstantTreeNode)constSy.CreateTreeNode();
339        constNode.Value = 0.0;
340        parsedSubTree = constNode;
341      }
342
343      Debug.Assert(parsedSubTree != null);
344      return parsedSubTree;
345    }
346    #endregion
347
348    #region abstract DeepCloneable methods
349    public override IDeepCloneable Clone(Cloner cloner) {
350      return new Grammar(this, cloner);
351    }
352    #endregion
353  }
354}
Note: See TracBrowser for help on using the repository browser.