Free cookie consent management tool by TermsFeed Policy Generator

source: branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Importer/InfixExpressionParser.cs @ 14251

Last change on this file since 14251 was 14251, checked in by gkronber, 8 years ago

#2650:

  • extended non-linear regression to work with factors
  • fixed bugs in constants optimizer and tree interpreter
  • improved simplification of factor variables
  • added support for factors to ERC view
  • added support for factors to solution comparison view
  • activated view for all factors
File size: 15.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Globalization;
25using System.Linq;
26using System.Text;
27using HeuristicLab.Collections;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29
30namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
31  /// <summary>
32  /// Parses mathematical expressions in infix form. E.g. x1 * (3.0 * x2 + x3)
33  /// Identifier format (functions or variables): '_' | letter { '_' | letter | digit }
34  /// Variables names and variable values can be set under quotes "" or '' because variable names might contain spaces.
35  ///   Variable = ident | " ident " | ' ident '
36  /// It is also possible to use functions e.g. log("x1") or real-valued constants e.g. 3.1415 .
37  /// Variable names are case sensitive. Function names are not case sensitive.
38  ///
39  ///
40  /// S       = Expr EOF
41  /// Expr    = ['-' | '+'] Term { '+' Term | '-' Term }
42  /// Term    = Fact { '*' Fact | '/' Fact }
43  /// Fact    = '(' Expr ')' | funcId '(' Expr ')' | VarExpr | number
44  /// VarExpr = varId [ '=' varVal]
45  /// varId   =  ident | ' ident ' | " ident "
46  /// varVal  =  ident | ' ident ' | " ident "
47  /// ident   =  '_' | letter { '_' | letter | digit }
48  /// </summary>
49  public sealed class InfixExpressionParser {
50    private enum TokenType { Operator, Identifier, Number, LeftPar, RightPar, Eq, End, NA };
51    private class Token {
52      internal double doubleVal;
53      internal string strVal;
54      internal TokenType TokenType;
55    }
56
57    private class SymbolNameComparer : IEqualityComparer<ISymbol>, IComparer<ISymbol> {
58      public int Compare(ISymbol x, ISymbol y) {
59        return x.Name.CompareTo(y.Name);
60      }
61
62      public bool Equals(ISymbol x, ISymbol y) {
63        return Compare(x, y) == 0;
64      }
65
66      public int GetHashCode(ISymbol obj) {
67        return obj.Name.GetHashCode();
68      }
69    }
70    // format name <-> symbol
71    // the lookup table is also used in the corresponding formatter
72    internal static readonly BidirectionalLookup<string, ISymbol>
73      knownSymbols = new BidirectionalLookup<string, ISymbol>(StringComparer.InvariantCulture, new SymbolNameComparer());
74
75    private Constant constant = new Constant();
76    private Variable variable = new Variable();
77    private BinaryFactorVariable binaryFactorVar = new BinaryFactorVariable();
78
79    private ProgramRootSymbol programRootSymbol = new ProgramRootSymbol();
80    private StartSymbol startSymbol = new StartSymbol();
81
82    static InfixExpressionParser() {
83      // populate bidirectional lookup
84      var dict = new Dictionary<string, ISymbol>
85      {
86        { "+", new Addition()},
87        { "/", new Division()},
88        { "*", new Multiplication()},
89        { "-", new Subtraction()},
90        { "EXP", new Exponential()},
91        { "LOG", new Logarithm()},
92        { "POW", new Power()},
93        { "ROOT", new Root()},
94        { "SQR", new Square() },
95        { "SQRT", new SquareRoot() },
96        { "SIN",new Sine()},
97        { "COS", new Cosine()},
98        { "TAN", new Tangent()},
99        { "AIRYA", new AiryA()},
100        { "AIRYB", new AiryB()},
101        { "BESSEL", new Bessel()},
102        { "COSINT", new CosineIntegral()},
103        { "SININT", new SineIntegral()},
104        { "HYPCOSINT", new HyperbolicCosineIntegral()},
105        { "HYPSININT", new HyperbolicSineIntegral()},
106        { "FRESNELSININT", new FresnelSineIntegral()},
107        { "FRESNELCOSINT", new FresnelCosineIntegral()},
108        { "NORM", new Norm()},
109        { "ERF", new Erf()},
110        { "GAMMA", new Gamma()},
111        { "PSI", new Psi()},
112        { "DAWSON", new Dawson()},
113        { "EXPINT", new ExponentialIntegralEi()},
114        { "MEAN", new Average()},
115        { "IF", new IfThenElse()},
116        { ">", new GreaterThan()},
117        { "<", new LessThan()},
118        { "AND", new And()},
119        { "OR", new Or()},
120        { "NOT", new Not()},
121        { "XOR", new Xor()},
122        { "DIFF", new Derivative()},
123      };
124
125
126      foreach (var kvp in dict) {
127        knownSymbols.Add(kvp.Key, kvp.Value);
128      }
129    }
130
131    public ISymbolicExpressionTree Parse(string str) {
132      ISymbolicExpressionTreeNode root = programRootSymbol.CreateTreeNode();
133      ISymbolicExpressionTreeNode start = startSymbol.CreateTreeNode();
134      var allTokens = GetAllTokens(str).ToArray();
135      ISymbolicExpressionTreeNode mainBranch = ParseS(new Queue<Token>(allTokens));
136
137      // only a main branch was given => insert the main branch into the default tree template
138      root.AddSubtree(start);
139      start.AddSubtree(mainBranch);
140      return new SymbolicExpressionTree(root);
141    }
142
143    private IEnumerable<Token> GetAllTokens(string str) {
144      int pos = 0;
145      while (true) {
146        while (pos < str.Length && Char.IsWhiteSpace(str[pos])) pos++;
147        if (pos >= str.Length) {
148          yield return new Token { TokenType = TokenType.End, strVal = "" };
149          yield break;
150        }
151        if (char.IsDigit(str[pos])) {
152          // read number (=> read until white space or operator)
153          var sb = new StringBuilder();
154          sb.Append(str[pos]);
155          pos++;
156          while (pos < str.Length && !char.IsWhiteSpace(str[pos])
157            && (str[pos] != '+' || str[pos - 1] == 'e' || str[pos - 1] == 'E')     // continue reading exponents
158            && (str[pos] != '-' || str[pos - 1] == 'e' || str[pos - 1] == 'E')
159            && str[pos] != '*'
160            && str[pos] != '/'
161            && str[pos] != ')') {
162            sb.Append(str[pos]);
163            pos++;
164          }
165          double dblVal;
166          if (double.TryParse(sb.ToString(), NumberStyles.Float, CultureInfo.InvariantCulture, out dblVal))
167            yield return new Token { TokenType = TokenType.Number, strVal = sb.ToString(), doubleVal = dblVal };
168          else yield return new Token { TokenType = TokenType.NA, strVal = sb.ToString() };
169        } else if (char.IsLetter(str[pos]) || str[pos] == '_') {
170          // read ident
171          var sb = new StringBuilder();
172          sb.Append(str[pos]);
173          pos++;
174          while (pos < str.Length &&
175            (char.IsLetter(str[pos]) || str[pos] == '_' || char.IsDigit(str[pos]))) {
176            sb.Append(str[pos]);
177            pos++;
178          }
179          yield return new Token { TokenType = TokenType.Identifier, strVal = sb.ToString() };
180        } else if (str[pos] == '"') {
181          // read to next "
182          pos++;
183          var sb = new StringBuilder();
184          while (pos < str.Length && str[pos] != '"') {
185            sb.Append(str[pos]);
186            pos++;
187          }
188          if (pos < str.Length && str[pos] == '"') {
189            pos++; // skip "
190            yield return new Token { TokenType = TokenType.Identifier, strVal = sb.ToString() };
191          } else
192            yield return new Token { TokenType = TokenType.NA };
193
194        } else if (str[pos] == '\'') {
195          // read to next '
196          pos++;
197          var sb = new StringBuilder();
198          while (pos < str.Length && str[pos] != '\'') {
199            sb.Append(str[pos]);
200            pos++;
201          }
202          if (pos < str.Length && str[pos] == '\'') {
203            pos++; // skip '
204            yield return new Token { TokenType = TokenType.Identifier, strVal = sb.ToString() };
205          } else
206            yield return new Token { TokenType = TokenType.NA };
207        } else if (str[pos] == '+') {
208          pos++;
209          yield return new Token { TokenType = TokenType.Operator, strVal = "+" };
210        } else if (str[pos] == '-') {
211          pos++;
212          yield return new Token { TokenType = TokenType.Operator, strVal = "-" };
213        } else if (str[pos] == '/') {
214          pos++;
215          yield return new Token { TokenType = TokenType.Operator, strVal = "/" };
216        } else if (str[pos] == '*') {
217          pos++;
218          yield return new Token { TokenType = TokenType.Operator, strVal = "*" };
219        } else if (str[pos] == '(') {
220          pos++;
221          yield return new Token { TokenType = TokenType.LeftPar, strVal = "(" };
222        } else if (str[pos] == ')') {
223          pos++;
224          yield return new Token { TokenType = TokenType.RightPar, strVal = ")" };
225        } else if (str[pos] == '=') {
226          pos++;
227          yield return new Token { TokenType = TokenType.Eq, strVal = "=" };
228        }
229      }
230    }
231
232    private ISymbolicExpressionTreeNode ParseS(Queue<Token> tokens) {
233      var expr = ParseExpr(tokens);
234
235      var endTok = tokens.Dequeue();
236      if (endTok.TokenType != TokenType.End)
237        throw new ArgumentException(string.Format("Expected end of expression (got {0})", endTok.strVal));
238
239      return expr;
240    }
241    private ISymbolicExpressionTreeNode ParseExpr(Queue<Token> tokens) {
242      var next = tokens.Peek();
243      var posTerms = new List<ISymbolicExpressionTreeNode>();
244      var negTerms = new List<ISymbolicExpressionTreeNode>();
245      bool negateFirstTerm = false;
246      if (next.TokenType == TokenType.Operator && (next.strVal == "+" || next.strVal == "-")) {
247        tokens.Dequeue();
248        if (next.strVal == "-")
249          negateFirstTerm = true;
250      }
251      var t = ParseTerm(tokens);
252      if (negateFirstTerm) negTerms.Add(t);
253      else posTerms.Add(t);
254
255      next = tokens.Peek();
256      while (next.strVal == "+" || next.strVal == "-") {
257        switch (next.strVal) {
258          case "+": {
259              tokens.Dequeue();
260              var term = ParseTerm(tokens);
261              posTerms.Add(term);
262              break;
263            }
264          case "-": {
265              tokens.Dequeue();
266              var term = ParseTerm(tokens);
267              negTerms.Add(term);
268              break;
269            }
270        }
271        next = tokens.Peek();
272      }
273
274      var sum = GetSymbol("+").CreateTreeNode();
275      foreach (var posTerm in posTerms) sum.AddSubtree(posTerm);
276      if (negTerms.Any()) {
277        if (negTerms.Count == 1) {
278          var sub = GetSymbol("-").CreateTreeNode();
279          sub.AddSubtree(negTerms.Single());
280          sum.AddSubtree(sub);
281        } else {
282          var sumNeg = GetSymbol("+").CreateTreeNode();
283          foreach (var negTerm in negTerms) sumNeg.AddSubtree(negTerm);
284
285          var constNode = (ConstantTreeNode)constant.CreateTreeNode();
286          constNode.Value = -1.0;
287          var prod = GetSymbol("*").CreateTreeNode();
288          prod.AddSubtree(constNode);
289          prod.AddSubtree(sumNeg);
290
291          sum.AddSubtree(prod);
292        }
293      }
294      if (sum.SubtreeCount == 1) return sum.Subtrees.First();
295      else return sum;
296    }
297
298    private ISymbol GetSymbol(string tok) {
299      var symb = knownSymbols.GetByFirst(tok).FirstOrDefault();
300      if (symb == null) throw new ArgumentException(string.Format("Unknown token {0} found.", tok));
301      return symb;
302    }
303
304    // Term = Fact { '*' Fact | '/' Fact }
305    private ISymbolicExpressionTreeNode ParseTerm(Queue<Token> tokens) {
306      var factors = new List<ISymbolicExpressionTreeNode>();
307      var firstFactor = ParseFact(tokens);
308      factors.Add(firstFactor);
309
310      var next = tokens.Peek();
311      while (next.strVal == "*" || next.strVal == "/") {
312        switch (next.strVal) {
313          case "*": {
314              tokens.Dequeue();
315              var fact = ParseFact(tokens);
316              factors.Add(fact);
317              break;
318            }
319          case "/": {
320              tokens.Dequeue();
321              var invFact = ParseFact(tokens);
322              var divNode = GetSymbol("/").CreateTreeNode(); // 1/x
323              divNode.AddSubtree(invFact);
324              factors.Add(divNode);
325              break;
326            }
327        }
328
329        next = tokens.Peek();
330      }
331      if (factors.Count == 1) return factors.First();
332      else {
333        var prod = GetSymbol("*").CreateTreeNode();
334        foreach (var f in factors) prod.AddSubtree(f);
335        return prod;
336      }
337    }
338
339    // Fact = '(' Expr ')' | funcId '(' Expr ')' | varId [ = valId ] | number
340    private ISymbolicExpressionTreeNode ParseFact(Queue<Token> tokens) {
341      var next = tokens.Peek();
342      if (next.TokenType == TokenType.LeftPar) {
343        tokens.Dequeue();
344        var expr = ParseExpr(tokens);
345        var rPar = tokens.Dequeue();
346        if (rPar.TokenType != TokenType.RightPar)
347          throw new ArgumentException("expected )");
348        return expr;
349      } else if (next.TokenType == TokenType.Identifier) {
350        var idTok = tokens.Dequeue();
351        if (tokens.Peek().TokenType == TokenType.LeftPar) {
352          // function identifier
353          var funcId = idTok.strVal.ToUpperInvariant();
354
355          var funcNode = GetSymbol(funcId).CreateTreeNode();
356          var lPar = tokens.Dequeue();
357          if (lPar.TokenType != TokenType.LeftPar)
358            throw new ArgumentException("expected (");
359          var expr = ParseExpr(tokens);
360          var rPar = tokens.Dequeue();
361          if (rPar.TokenType != TokenType.RightPar)
362            throw new ArgumentException("expected )");
363
364          funcNode.AddSubtree(expr);
365          return funcNode;
366        } else {
367          // variable
368          if (tokens.Peek().TokenType == TokenType.Eq) {
369            // binary factor
370            tokens.Dequeue(); // skip Eq
371            var valTok = tokens.Dequeue();
372            if (valTok.TokenType != TokenType.Identifier) throw new ArgumentException("expected identifier");
373            var binFactorNode = (BinaryFactorVariableTreeNode)binaryFactorVar.CreateTreeNode();
374            binFactorNode.Weight = 1.0;
375            binFactorNode.VariableName = idTok.strVal;
376            binFactorNode.VariableValue = valTok.strVal;
377            return binFactorNode;
378          } else {
379            // variable
380            var varNode = (VariableTreeNode)variable.CreateTreeNode();
381            varNode.Weight = 1.0;
382            varNode.VariableName = idTok.strVal;
383            return varNode;
384          }
385        }
386      } else if (next.TokenType == TokenType.Number) {
387        var numTok = tokens.Dequeue();
388        var constNode = (ConstantTreeNode)constant.CreateTreeNode();
389        constNode.Value = numTok.doubleVal;
390        return constNode;
391      } else {
392        throw new ArgumentException(string.Format("unexpected token in expression {0}", next.strVal));
393      }
394    }
395
396  }
397}
Note: See TracBrowser for help on using the repository browser.