source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Importer/InfixExpressionParser.cs @ 14026

Last change on this file since 14026 was 14026, checked in by gkronber, 5 years ago

#2627: sealed parser and formatter for infix expressions

File size: 14.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Globalization;
25using System.Linq;
26using System.Text;
27using HeuristicLab.Collections;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29
30namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
31  /// <summary>
32  /// Parses mathematical expressions in infix form. E.g. x1 * (3.0 * x2 + x3)
33  /// Identifier format (functions or variables): '_' | letter { '_' | letter | digit }
34  /// Variables names can be set under quotes "" or '' because variable names might contain spaces.
35  /// It is also possible to use functions e.g. log("x1") or real-valued constants e.g. 3.1415 .
36  /// Variable names are case sensitive. Function names are not case sensitive.
37  /// </summary>
38  public sealed class InfixExpressionParser {
39    private enum TokenType { Operator, Identifier, Number, LeftPar, RightPar, End, NA };
40    private class Token {
41      internal double doubleVal;
42      internal string strVal;
43      internal TokenType TokenType;
44    }
45
46    private class SymbolNameComparer : IEqualityComparer<ISymbol>, IComparer<ISymbol> {
47      public int Compare(ISymbol x, ISymbol y) {
48        return x.Name.CompareTo(y.Name);
49      }
50
51      public bool Equals(ISymbol x, ISymbol y) {
52        return Compare(x, y) == 0;
53      }
54
55      public int GetHashCode(ISymbol obj) {
56        return obj.Name.GetHashCode();
57      }
58    }
59    // format name <-> symbol
60    // the lookup table is also used in the corresponding formatter
61    internal static readonly BidirectionalLookup<string, ISymbol>
62      knownSymbols = new BidirectionalLookup<string, ISymbol>(StringComparer.InvariantCulture, new SymbolNameComparer());
63
64    private Constant constant = new Constant();
65    private Variable variable = new Variable();
66
67    private ProgramRootSymbol programRootSymbol = new ProgramRootSymbol();
68    private StartSymbol startSymbol = new StartSymbol();
69
70    static InfixExpressionParser() {
71      // populate bidirectional lookup
72      var dict = new Dictionary<string, ISymbol>
73      {
74        { "+", new Addition()},
75        { "/", new Division()},
76        { "*", new Multiplication()},
77        { "-", new Subtraction()},
78        { "EXP", new Exponential()},
79        { "LOG", new Logarithm()},
80        { "POW", new Power()},
81        { "ROOT", new Root()},
82        { "SQR", new Square() },
83        { "SQRT", new SquareRoot() },
84        { "SIN",new Sine()},
85        { "COS", new Cosine()},
86        { "TAN", new Tangent()},
87        { "AIRYA", new AiryA()},
88        { "AIRYB", new AiryB()},
89        { "BESSEL", new Bessel()},
90        { "COSINT", new CosineIntegral()},
91        { "SININT", new SineIntegral()},
92        { "HYPCOSINT", new HyperbolicCosineIntegral()},
93        { "HYPSININT", new HyperbolicSineIntegral()},
94        { "FRESNELSININT", new FresnelSineIntegral()},
95        { "FRESNELCOSINT", new FresnelCosineIntegral()},
96        { "NORM", new Norm()},
97        { "ERF", new Erf()},
98        { "GAMMA", new Gamma()},
99        { "PSI", new Psi()},
100        { "DAWSON", new Dawson()},
101        { "EXPINT", new ExponentialIntegralEi()},
102        { "MEAN", new Average()},
103        { "IF", new IfThenElse()},
104        { ">", new GreaterThan()},
105        { "<", new LessThan()},
106        { "AND", new And()},
107        { "OR", new Or()},
108        { "NOT", new Not()},
109        { "XOR", new Xor()},
110        { "DIFF", new Derivative()},
111      };
112
113
114      foreach (var kvp in dict) {
115        knownSymbols.Add(kvp.Key, kvp.Value);
116      }
117    }
118
119    public ISymbolicExpressionTree Parse(string str) {
120      ISymbolicExpressionTreeNode root = programRootSymbol.CreateTreeNode();
121      ISymbolicExpressionTreeNode start = startSymbol.CreateTreeNode();
122      var allTokens = GetAllTokens(str).ToArray();
123      ISymbolicExpressionTreeNode mainBranch = ParseS(new Queue<Token>(allTokens));
124
125      // only a main branch was given => insert the main branch into the default tree template
126      root.AddSubtree(start);
127      start.AddSubtree(mainBranch);
128      return new SymbolicExpressionTree(root);
129    }
130
131    private IEnumerable<Token> GetAllTokens(string str) {
132      int pos = 0;
133      while (true) {
134        while (pos < str.Length && Char.IsWhiteSpace(str[pos])) pos++;
135        if (pos >= str.Length) {
136          yield return new Token { TokenType = TokenType.End, strVal = "" };
137          yield break;
138        }
139        if (char.IsDigit(str[pos])) {
140          // read number (=> read until white space or operator)
141          var sb = new StringBuilder();
142          sb.Append(str[pos]);
143          pos++;
144          while (pos < str.Length && !char.IsWhiteSpace(str[pos])
145            && (str[pos] != '+' || str[pos-1] == 'e' || str[pos-1] == 'E')     // continue reading exponents
146            && (str[pos] != '-' || str[pos - 1] == 'e' || str[pos - 1] == 'E')
147            && str[pos] != '*'           
148            && str[pos] != '/'
149            && str[pos] != ')') {
150            sb.Append(str[pos]);
151            pos++;
152          }
153          double dblVal;
154          if (double.TryParse(sb.ToString(), NumberStyles.Float, CultureInfo.InvariantCulture, out dblVal))
155            yield return new Token { TokenType = TokenType.Number, strVal = sb.ToString(), doubleVal = dblVal };
156          else yield return new Token { TokenType = TokenType.NA, strVal = sb.ToString() };
157        } else if (char.IsLetter(str[pos]) || str[pos] == '_') {
158          // read ident
159          var sb = new StringBuilder();
160          sb.Append(str[pos]);
161          pos++;
162          while (pos < str.Length &&
163            (char.IsLetter(str[pos]) || str[pos] == '_' || char.IsDigit(str[pos]))) {
164            sb.Append(str[pos]);
165            pos++;
166          }
167          yield return new Token { TokenType = TokenType.Identifier, strVal = sb.ToString() };
168        } else if (str[pos] == '"') {
169          // read to next "
170          pos++;
171          var sb = new StringBuilder();
172          while (pos < str.Length && str[pos] != '"') {
173            sb.Append(str[pos]);
174            pos++;
175          }
176          if (pos < str.Length && str[pos] == '"') {
177            pos++; // skip "
178            yield return new Token { TokenType = TokenType.Identifier, strVal = sb.ToString() };
179          } else
180            yield return new Token { TokenType = TokenType.NA };
181
182        } else if (str[pos] == '\'') {
183          // read to next '
184          pos++;
185          var sb = new StringBuilder();
186          while (pos < str.Length && str[pos] != '\'') {
187            sb.Append(str[pos]);
188            pos++;
189          }
190          if (pos < str.Length && str[pos] == '\'') {
191            pos++; // skip '
192            yield return new Token { TokenType = TokenType.Identifier, strVal = sb.ToString() };
193          } else
194            yield return new Token { TokenType = TokenType.NA };
195        } else if (str[pos] == '+') {
196          pos++;
197          yield return new Token { TokenType = TokenType.Operator, strVal = "+" };
198        } else if (str[pos] == '-') {
199          pos++;
200          yield return new Token { TokenType = TokenType.Operator, strVal = "-" };
201        } else if (str[pos] == '/') {
202          pos++;
203          yield return new Token { TokenType = TokenType.Operator, strVal = "/" };
204        } else if (str[pos] == '*') {
205          pos++;
206          yield return new Token { TokenType = TokenType.Operator, strVal = "*" };
207        } else if (str[pos] == '(') {
208          pos++;
209          yield return new Token { TokenType = TokenType.LeftPar, strVal = "(" };
210        } else if (str[pos] == ')') {
211          pos++;
212          yield return new Token { TokenType = TokenType.RightPar, strVal = ")" };
213        }
214      }
215    }
216
217    // S = Expr EOF
218    // Expr = ['-' | '+'] Term { '+' Term | '-' Term }
219    // Term = Fact { '*' Fact | '/' Fact }
220    // Fact = '(' Expr ')' | funcId '(' Expr ')' | varId | number
221    private ISymbolicExpressionTreeNode ParseS(Queue<Token> tokens) {
222      var expr = ParseExpr(tokens);
223
224      var endTok = tokens.Dequeue();
225      if (endTok.TokenType != TokenType.End)
226        throw new ArgumentException(string.Format("Expected end of expression (got {0})", endTok.strVal));
227
228      return expr;
229    }
230    private ISymbolicExpressionTreeNode ParseExpr(Queue<Token> tokens) {
231      var next = tokens.Peek();
232      var posTerms = new List<ISymbolicExpressionTreeNode>();
233      var negTerms = new List<ISymbolicExpressionTreeNode>();
234      bool negateFirstTerm = false;
235      if (next.TokenType == TokenType.Operator && (next.strVal == "+" || next.strVal == "-")) {
236        tokens.Dequeue();
237        if (next.strVal == "-")
238          negateFirstTerm = true;
239      }
240      var t = ParseTerm(tokens);
241      if (negateFirstTerm) negTerms.Add(t);
242      else posTerms.Add(t);
243
244      next = tokens.Peek();
245      while (next.strVal == "+" || next.strVal == "-") {
246        switch (next.strVal) {
247          case "+": {
248              tokens.Dequeue();
249              var term = ParseTerm(tokens);
250              posTerms.Add(term);
251              break;
252            }
253          case "-": {
254              tokens.Dequeue();
255              var term = ParseTerm(tokens);
256              negTerms.Add(term);
257              break;
258            }
259        }
260        next = tokens.Peek();
261      }
262
263      var sum = GetSymbol("+").CreateTreeNode();
264      foreach (var posTerm in posTerms) sum.AddSubtree(posTerm);
265      if (negTerms.Any()) {
266        if (negTerms.Count == 1) {
267          var sub = GetSymbol("-").CreateTreeNode();
268          sub.AddSubtree(negTerms.Single());
269          sum.AddSubtree(sub);
270        } else {
271          var sumNeg = GetSymbol("+").CreateTreeNode();
272          foreach (var negTerm in negTerms) sumNeg.AddSubtree(negTerm);
273
274          var constNode = (ConstantTreeNode)constant.CreateTreeNode();
275          constNode.Value = -1.0;
276          var prod = GetSymbol("*").CreateTreeNode();
277          prod.AddSubtree(constNode);
278          prod.AddSubtree(sumNeg);
279
280          sum.AddSubtree(prod);
281        }
282      }
283      if (sum.SubtreeCount == 1) return sum.Subtrees.First();
284      else return sum;
285    }
286
287    private ISymbol GetSymbol(string tok) {
288      var symb = knownSymbols.GetByFirst(tok).FirstOrDefault();
289      if (symb == null) throw new ArgumentException(string.Format("Unknown token {0} found.", tok));
290      return symb;
291    }
292
293    // Term = Fact { '*' Fact | '/' Fact }
294    private ISymbolicExpressionTreeNode ParseTerm(Queue<Token> tokens) {
295      var factors = new List<ISymbolicExpressionTreeNode>();
296      var firstFactor = ParseFact(tokens);
297      factors.Add(firstFactor);
298
299      var next = tokens.Peek();
300      while (next.strVal == "*" || next.strVal == "/") {
301        switch (next.strVal) {
302          case "*": {
303              tokens.Dequeue();
304              var fact = ParseFact(tokens);
305              factors.Add(fact);
306              break;
307            }
308          case "/": {
309              tokens.Dequeue();
310              var invFact = ParseFact(tokens);
311              var divNode = GetSymbol("/").CreateTreeNode(); // 1/x
312              divNode.AddSubtree(invFact);
313              factors.Add(divNode);
314              break;
315            }
316        }
317
318        next = tokens.Peek();
319      }
320      if (factors.Count == 1) return factors.First();
321      else {
322        var prod = GetSymbol("*").CreateTreeNode();
323        foreach (var f in factors) prod.AddSubtree(f);
324        return prod;
325      }
326    }
327
328    // Fact = '(' Expr ')' | funcId '(' Expr ')' | varId | number
329    private ISymbolicExpressionTreeNode ParseFact(Queue<Token> tokens) {
330      var next = tokens.Peek();
331      if (next.TokenType == TokenType.LeftPar) {
332        tokens.Dequeue();
333        var expr = ParseExpr(tokens);
334        var rPar = tokens.Dequeue();
335        if (rPar.TokenType != TokenType.RightPar)
336          throw new ArgumentException("expected )");
337        return expr;
338      } else if (next.TokenType == TokenType.Identifier) {
339        var idTok = tokens.Dequeue();
340        if (tokens.Peek().TokenType == TokenType.LeftPar) {
341          // function identifier
342          var funcId = idTok.strVal.ToUpperInvariant();
343
344          var funcNode = GetSymbol(funcId).CreateTreeNode();
345          var lPar = tokens.Dequeue();
346          if (lPar.TokenType != TokenType.LeftPar)
347            throw new ArgumentException("expected (");
348          var expr = ParseExpr(tokens);
349          var rPar = tokens.Dequeue();
350          if (rPar.TokenType != TokenType.RightPar)
351            throw new ArgumentException("expected )");
352
353          funcNode.AddSubtree(expr);
354          return funcNode;
355        } else {
356          // variable
357          var varNode = (VariableTreeNode)variable.CreateTreeNode();
358          varNode.Weight = 1.0;
359          varNode.VariableName = idTok.strVal;
360          return varNode;
361        }
362      } else if (next.TokenType == TokenType.Number) {
363        var numTok = tokens.Dequeue();
364        var constNode = (ConstantTreeNode)constant.CreateTreeNode();
365        constNode.Value = numTok.doubleVal;
366        return constNode;
367      } else {
368        throw new ArgumentException(string.Format("unexpected token in expression {0}", next.strVal));
369      }
370    }
371  }
372}
Note: See TracBrowser for help on using the repository browser.