source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Importer/InfixExpressionParser.cs @ 14347

Last change on this file since 14347 was 14347, checked in by gkronber, 6 years ago

#2677: extended infix parser and infix formatter to support functions with multiple arguments

File size: 15.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Globalization;
25using System.Linq;
26using System.Text;
27using HeuristicLab.Collections;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29
30namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
31  /// <summary>
32  /// Parses mathematical expressions in infix form. E.g. x1 * (3.0 * x2 + x3)
33  /// Identifier format (functions or variables): '_' | letter { '_' | letter | digit }
34  /// Variables names can be set under quotes "" or '' because variable names might contain spaces.
35  /// It is also possible to use functions e.g. log("x1") or real-valued constants e.g. 3.1415 .
36  /// Variable names are case sensitive. Function names are not case sensitive.
37  /// </summary>
38  public sealed class InfixExpressionParser {
39    private enum TokenType { Operator, Identifier, Number, LeftPar, RightPar, Comma, End, NA };
40    private class Token {
41      internal double doubleVal;
42      internal string strVal;
43      internal TokenType TokenType;
44    }
45
46    private class SymbolNameComparer : IEqualityComparer<ISymbol>, IComparer<ISymbol> {
47      public int Compare(ISymbol x, ISymbol y) {
48        return x.Name.CompareTo(y.Name);
49      }
50
51      public bool Equals(ISymbol x, ISymbol y) {
52        return Compare(x, y) == 0;
53      }
54
55      public int GetHashCode(ISymbol obj) {
56        return obj.Name.GetHashCode();
57      }
58    }
59    // format name <-> symbol
60    // the lookup table is also used in the corresponding formatter
61    internal static readonly BidirectionalLookup<string, ISymbol>
62      knownSymbols = new BidirectionalLookup<string, ISymbol>(StringComparer.InvariantCulture, new SymbolNameComparer());
63
64    private Constant constant = new Constant();
65    private Variable variable = new Variable();
66
67    private ProgramRootSymbol programRootSymbol = new ProgramRootSymbol();
68    private StartSymbol startSymbol = new StartSymbol();
69
70    static InfixExpressionParser() {
71      // populate bidirectional lookup
72      var dict = new Dictionary<string, ISymbol>
73      {
74        { "+", new Addition()},
75        { "/", new Division()},
76        { "*", new Multiplication()},
77        { "-", new Subtraction()},
78        { "EXP", new Exponential()},
79        { "LOG", new Logarithm()},
80        { "POW", new Power()},
81        { "ROOT", new Root()},
82        { "SQR", new Square() },
83        { "SQRT", new SquareRoot() },
84        { "SIN",new Sine()},
85        { "COS", new Cosine()},
86        { "TAN", new Tangent()},
87        { "AIRYA", new AiryA()},
88        { "AIRYB", new AiryB()},
89        { "BESSEL", new Bessel()},
90        { "COSINT", new CosineIntegral()},
91        { "SININT", new SineIntegral()},
92        { "HYPCOSINT", new HyperbolicCosineIntegral()},
93        { "HYPSININT", new HyperbolicSineIntegral()},
94        { "FRESNELSININT", new FresnelSineIntegral()},
95        { "FRESNELCOSINT", new FresnelCosineIntegral()},
96        { "NORM", new Norm()},
97        { "ERF", new Erf()},
98        { "GAMMA", new Gamma()},
99        { "PSI", new Psi()},
100        { "DAWSON", new Dawson()},
101        { "EXPINT", new ExponentialIntegralEi()},
102        { "MEAN", new Average()},
103        { "IF", new IfThenElse()},
104        { "GT", new GreaterThan()},
105        { "LT", new LessThan()},
106        { "AND", new And()},
107        { "OR", new Or()},
108        { "NOT", new Not()},
109        { "XOR", new Xor()},
110        { "DIFF", new Derivative()},
111      };
112
113
114      foreach (var kvp in dict) {
115        knownSymbols.Add(kvp.Key, kvp.Value);
116      }
117    }
118
119    public ISymbolicExpressionTree Parse(string str) {
120      ISymbolicExpressionTreeNode root = programRootSymbol.CreateTreeNode();
121      ISymbolicExpressionTreeNode start = startSymbol.CreateTreeNode();
122      var allTokens = GetAllTokens(str).ToArray();
123      ISymbolicExpressionTreeNode mainBranch = ParseS(new Queue<Token>(allTokens));
124
125      // only a main branch was given => insert the main branch into the default tree template
126      root.AddSubtree(start);
127      start.AddSubtree(mainBranch);
128      return new SymbolicExpressionTree(root);
129    }
130
131    private IEnumerable<Token> GetAllTokens(string str) {
132      int pos = 0;
133      while (true) {
134        while (pos < str.Length && Char.IsWhiteSpace(str[pos])) pos++;
135        if (pos >= str.Length) {
136          yield return new Token { TokenType = TokenType.End, strVal = "" };
137          yield break;
138        }
139        if (char.IsDigit(str[pos])) {
140          // read number (=> read until white space or operator or comma)
141          var sb = new StringBuilder();
142          sb.Append(str[pos]);
143          pos++;
144          while (pos < str.Length && !char.IsWhiteSpace(str[pos])
145            && (str[pos] != '+' || str[pos - 1] == 'e' || str[pos - 1] == 'E')     // continue reading exponents
146            && (str[pos] != '-' || str[pos - 1] == 'e' || str[pos - 1] == 'E')
147            && str[pos] != '*'
148            && str[pos] != '/'
149            && str[pos] != ')'
150            && str[pos] != ',') {
151            sb.Append(str[pos]);
152            pos++;
153          }
154          double dblVal;
155          if (double.TryParse(sb.ToString(), NumberStyles.Float, CultureInfo.InvariantCulture, out dblVal))
156            yield return new Token { TokenType = TokenType.Number, strVal = sb.ToString(), doubleVal = dblVal };
157          else yield return new Token { TokenType = TokenType.NA, strVal = sb.ToString() };
158        } else if (char.IsLetter(str[pos]) || str[pos] == '_') {
159          // read ident
160          var sb = new StringBuilder();
161          sb.Append(str[pos]);
162          pos++;
163          while (pos < str.Length &&
164            (char.IsLetter(str[pos]) || str[pos] == '_' || char.IsDigit(str[pos]))) {
165            sb.Append(str[pos]);
166            pos++;
167          }
168          yield return new Token { TokenType = TokenType.Identifier, strVal = sb.ToString() };
169        } else if (str[pos] == '"') {
170          // read to next "
171          pos++;
172          var sb = new StringBuilder();
173          while (pos < str.Length && str[pos] != '"') {
174            sb.Append(str[pos]);
175            pos++;
176          }
177          if (pos < str.Length && str[pos] == '"') {
178            pos++; // skip "
179            yield return new Token { TokenType = TokenType.Identifier, strVal = sb.ToString() };
180          } else
181            yield return new Token { TokenType = TokenType.NA };
182
183        } else if (str[pos] == '\'') {
184          // read to next '
185          pos++;
186          var sb = new StringBuilder();
187          while (pos < str.Length && str[pos] != '\'') {
188            sb.Append(str[pos]);
189            pos++;
190          }
191          if (pos < str.Length && str[pos] == '\'') {
192            pos++; // skip '
193            yield return new Token { TokenType = TokenType.Identifier, strVal = sb.ToString() };
194          } else
195            yield return new Token { TokenType = TokenType.NA };
196        } else if (str[pos] == '+') {
197          pos++;
198          yield return new Token { TokenType = TokenType.Operator, strVal = "+" };
199        } else if (str[pos] == '-') {
200          pos++;
201          yield return new Token { TokenType = TokenType.Operator, strVal = "-" };
202        } else if (str[pos] == '/') {
203          pos++;
204          yield return new Token { TokenType = TokenType.Operator, strVal = "/" };
205        } else if (str[pos] == '*') {
206          pos++;
207          yield return new Token { TokenType = TokenType.Operator, strVal = "*" };
208        } else if (str[pos] == '(') {
209          pos++;
210          yield return new Token { TokenType = TokenType.LeftPar, strVal = "(" };
211        } else if (str[pos] == ')') {
212          pos++;
213          yield return new Token { TokenType = TokenType.RightPar, strVal = ")" };
214        } else if (str[pos] == ',') {
215          pos++;
216          yield return new Token { TokenType = TokenType.Comma, strVal = "," };
217        } else {
218          throw new ArgumentException("Invalid character: " + str[pos]);
219        }
220      }
221    }
222
223    // S       = Expr EOF
224    // Expr    = ['-' | '+'] Term { '+' Term | '-' Term }
225    // Term    = Fact { '*' Fact | '/' Fact }
226    // Fact    = '(' Expr ')' | funcId '(' ArgList ')' | varId | number
227    // ArgList = Expr { ',' Expr }
228    private ISymbolicExpressionTreeNode ParseS(Queue<Token> tokens) {
229      var expr = ParseExpr(tokens);
230
231      var endTok = tokens.Dequeue();
232      if (endTok.TokenType != TokenType.End)
233        throw new ArgumentException(string.Format("Expected end of expression (got {0})", endTok.strVal));
234
235      return expr;
236    }
237    private ISymbolicExpressionTreeNode ParseExpr(Queue<Token> tokens) {
238      var next = tokens.Peek();
239      var posTerms = new List<ISymbolicExpressionTreeNode>();
240      var negTerms = new List<ISymbolicExpressionTreeNode>();
241      bool negateFirstTerm = false;
242      if (next.TokenType == TokenType.Operator && (next.strVal == "+" || next.strVal == "-")) {
243        tokens.Dequeue();
244        if (next.strVal == "-")
245          negateFirstTerm = true;
246      }
247      var t = ParseTerm(tokens);
248      if (negateFirstTerm) negTerms.Add(t);
249      else posTerms.Add(t);
250
251      next = tokens.Peek();
252      while (next.strVal == "+" || next.strVal == "-") {
253        switch (next.strVal) {
254          case "+": {
255              tokens.Dequeue();
256              var term = ParseTerm(tokens);
257              posTerms.Add(term);
258              break;
259            }
260          case "-": {
261              tokens.Dequeue();
262              var term = ParseTerm(tokens);
263              negTerms.Add(term);
264              break;
265            }
266        }
267        next = tokens.Peek();
268      }
269
270      var sum = GetSymbol("+").CreateTreeNode();
271      foreach (var posTerm in posTerms) sum.AddSubtree(posTerm);
272      if (negTerms.Any()) {
273        if (negTerms.Count == 1) {
274          var sub = GetSymbol("-").CreateTreeNode();
275          sub.AddSubtree(negTerms.Single());
276          sum.AddSubtree(sub);
277        } else {
278          var sumNeg = GetSymbol("+").CreateTreeNode();
279          foreach (var negTerm in negTerms) sumNeg.AddSubtree(negTerm);
280
281          var constNode = (ConstantTreeNode)constant.CreateTreeNode();
282          constNode.Value = -1.0;
283          var prod = GetSymbol("*").CreateTreeNode();
284          prod.AddSubtree(constNode);
285          prod.AddSubtree(sumNeg);
286
287          sum.AddSubtree(prod);
288        }
289      }
290      if (sum.SubtreeCount == 1) return sum.Subtrees.First();
291      else return sum;
292    }
293
294    private ISymbol GetSymbol(string tok) {
295      var symb = knownSymbols.GetByFirst(tok).FirstOrDefault();
296      if (symb == null) throw new ArgumentException(string.Format("Unknown token {0} found.", tok));
297      return symb;
298    }
299
300    // Term = Fact { '*' Fact | '/' Fact }
301    private ISymbolicExpressionTreeNode ParseTerm(Queue<Token> tokens) {
302      var factors = new List<ISymbolicExpressionTreeNode>();
303      var firstFactor = ParseFact(tokens);
304      factors.Add(firstFactor);
305
306      var next = tokens.Peek();
307      while (next.strVal == "*" || next.strVal == "/") {
308        switch (next.strVal) {
309          case "*": {
310              tokens.Dequeue();
311              var fact = ParseFact(tokens);
312              factors.Add(fact);
313              break;
314            }
315          case "/": {
316              tokens.Dequeue();
317              var invFact = ParseFact(tokens);
318              var divNode = GetSymbol("/").CreateTreeNode(); // 1/x
319              divNode.AddSubtree(invFact);
320              factors.Add(divNode);
321              break;
322            }
323        }
324
325        next = tokens.Peek();
326      }
327      if (factors.Count == 1) return factors.First();
328      else {
329        var prod = GetSymbol("*").CreateTreeNode();
330        foreach (var f in factors) prod.AddSubtree(f);
331        return prod;
332      }
333    }
334
335    // Fact = '(' Expr ')' | funcId '(' ArgList ')' | varId | number
336    private ISymbolicExpressionTreeNode ParseFact(Queue<Token> tokens) {
337      var next = tokens.Peek();
338      if (next.TokenType == TokenType.LeftPar) {
339        tokens.Dequeue();
340        var expr = ParseExpr(tokens);
341        var rPar = tokens.Dequeue();
342        if (rPar.TokenType != TokenType.RightPar)
343          throw new ArgumentException("expected )");
344        return expr;
345      } else if (next.TokenType == TokenType.Identifier) {
346        var idTok = tokens.Dequeue();
347        if (tokens.Peek().TokenType == TokenType.LeftPar) {
348          // function identifier
349          var funcId = idTok.strVal.ToUpperInvariant();
350
351          var funcNode = GetSymbol(funcId).CreateTreeNode();
352          var lPar = tokens.Dequeue();
353          if (lPar.TokenType != TokenType.LeftPar)
354            throw new ArgumentException("expected (");
355          var args = ParseArgList(tokens);
356
357          // check semantic constraints
358          if (funcNode.Symbol.MinimumArity > args.Length || funcNode.Symbol.MaximumArity < args.Length)
359            throw new ArgumentException(string.Format("Symbol {0} requires between {1} and  {2} arguments.", funcId,
360              funcNode.Symbol.MinimumArity, funcNode.Symbol.MaximumArity));
361          foreach (var arg in args) funcNode.AddSubtree(arg);
362
363          var rPar = tokens.Dequeue();
364          if (rPar.TokenType != TokenType.RightPar)
365            throw new ArgumentException("expected )");
366
367          return funcNode;
368        } else {
369          // variable
370          var varNode = (VariableTreeNode)variable.CreateTreeNode();
371          varNode.Weight = 1.0;
372          varNode.VariableName = idTok.strVal;
373          return varNode;
374        }
375      } else if (next.TokenType == TokenType.Number) {
376        var numTok = tokens.Dequeue();
377        var constNode = (ConstantTreeNode)constant.CreateTreeNode();
378        constNode.Value = numTok.doubleVal;
379        return constNode;
380      } else {
381        throw new ArgumentException(string.Format("unexpected token in expression {0}", next.strVal));
382      }
383    }
384
385    // ArgList = Expr { ',' Expr }
386    private ISymbolicExpressionTreeNode[] ParseArgList(Queue<Token> tokens) {
387      var exprList = new List<ISymbolicExpressionTreeNode>();
388      exprList.Add(ParseExpr(tokens));
389      while (tokens.Peek().TokenType != TokenType.RightPar) {
390        var comma = tokens.Dequeue();
391        if (comma.TokenType != TokenType.Comma) throw new ArgumentException("expected ',' ");
392        exprList.Add(ParseExpr(tokens));
393      }
394      return exprList.ToArray();
395    }
396  }
397}
Note: See TracBrowser for help on using the repository browser.