#region License Information /* HeuristicLab * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Text; using HeuristicLab.Collections; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { /// /// Parses mathematical expressions in infix form. E.g. x1 * (3.0 * x2 + x3) /// Identifier format (functions or variables): '_' | letter { '_' | letter | digit } /// Variables names can be set under quotes "" or '' because variable names might contain spaces. /// It is also possible to use functions e.g. log("x1") or real-valued constants e.g. 3.1415 . /// Variable names are case sensitive. Function names are not case sensitive. /// public sealed class InfixExpressionParser { private enum TokenType { Operator, Identifier, Number, LeftPar, RightPar, End, NA }; private class Token { internal double doubleVal; internal string strVal; internal TokenType TokenType; } private class SymbolNameComparer : IEqualityComparer, IComparer { public int Compare(ISymbol x, ISymbol y) { return x.Name.CompareTo(y.Name); } public bool Equals(ISymbol x, ISymbol y) { return Compare(x, y) == 0; } public int GetHashCode(ISymbol obj) { return obj.Name.GetHashCode(); } } // format name <-> symbol // the lookup table is also used in the corresponding formatter internal static readonly BidirectionalLookup knownSymbols = new BidirectionalLookup(StringComparer.InvariantCulture, new SymbolNameComparer()); private Constant constant = new Constant(); private Variable variable = new Variable(); private ProgramRootSymbol programRootSymbol = new ProgramRootSymbol(); private StartSymbol startSymbol = new StartSymbol(); static InfixExpressionParser() { // populate bidirectional lookup var dict = new Dictionary { { "+", new Addition()}, { "/", new Division()}, { "*", new Multiplication()}, { "-", new Subtraction()}, { "EXP", new Exponential()}, { "LOG", new Logarithm()}, { "POW", new Power()}, { "ROOT", new Root()}, { "SQR", new Square() }, { "SQRT", new SquareRoot() }, { "SIN",new Sine()}, { "COS", new Cosine()}, { "TAN", new Tangent()}, { "AIRYA", new AiryA()}, { "AIRYB", new AiryB()}, { "BESSEL", new Bessel()}, { "COSINT", new CosineIntegral()}, { "SININT", new SineIntegral()}, { "HYPCOSINT", new HyperbolicCosineIntegral()}, { "HYPSININT", new HyperbolicSineIntegral()}, { "FRESNELSININT", new FresnelSineIntegral()}, { "FRESNELCOSINT", new FresnelCosineIntegral()}, { "NORM", new Norm()}, { "ERF", new Erf()}, { "GAMMA", new Gamma()}, { "PSI", new Psi()}, { "DAWSON", new Dawson()}, { "EXPINT", new ExponentialIntegralEi()}, { "MEAN", new Average()}, { "IF", new IfThenElse()}, { ">", new GreaterThan()}, { "<", new LessThan()}, { "AND", new And()}, { "OR", new Or()}, { "NOT", new Not()}, { "XOR", new Xor()}, { "DIFF", new Derivative()}, }; foreach (var kvp in dict) { knownSymbols.Add(kvp.Key, kvp.Value); } } public ISymbolicExpressionTree Parse(string str) { ISymbolicExpressionTreeNode root = programRootSymbol.CreateTreeNode(); ISymbolicExpressionTreeNode start = startSymbol.CreateTreeNode(); var allTokens = GetAllTokens(str).ToArray(); ISymbolicExpressionTreeNode mainBranch = ParseS(new Queue(allTokens)); // only a main branch was given => insert the main branch into the default tree template root.AddSubtree(start); start.AddSubtree(mainBranch); return new SymbolicExpressionTree(root); } private IEnumerable GetAllTokens(string str) { int pos = 0; while (true) { while (pos < str.Length && Char.IsWhiteSpace(str[pos])) pos++; if (pos >= str.Length) { yield return new Token { TokenType = TokenType.End, strVal = "" }; yield break; } if (char.IsDigit(str[pos])) { // read number (=> read until white space or operator) var sb = new StringBuilder(); sb.Append(str[pos]); pos++; while (pos < str.Length && !char.IsWhiteSpace(str[pos]) && (str[pos] != '+' || str[pos-1] == 'e' || str[pos-1] == 'E') // continue reading exponents && (str[pos] != '-' || str[pos - 1] == 'e' || str[pos - 1] == 'E') && str[pos] != '*' && str[pos] != '/' && str[pos] != ')') { sb.Append(str[pos]); pos++; } double dblVal; if (double.TryParse(sb.ToString(), NumberStyles.Float, CultureInfo.InvariantCulture, out dblVal)) yield return new Token { TokenType = TokenType.Number, strVal = sb.ToString(), doubleVal = dblVal }; else yield return new Token { TokenType = TokenType.NA, strVal = sb.ToString() }; } else if (char.IsLetter(str[pos]) || str[pos] == '_') { // read ident var sb = new StringBuilder(); sb.Append(str[pos]); pos++; while (pos < str.Length && (char.IsLetter(str[pos]) || str[pos] == '_' || char.IsDigit(str[pos]))) { sb.Append(str[pos]); pos++; } yield return new Token { TokenType = TokenType.Identifier, strVal = sb.ToString() }; } else if (str[pos] == '"') { // read to next " pos++; var sb = new StringBuilder(); while (pos < str.Length && str[pos] != '"') { sb.Append(str[pos]); pos++; } if (pos < str.Length && str[pos] == '"') { pos++; // skip " yield return new Token { TokenType = TokenType.Identifier, strVal = sb.ToString() }; } else yield return new Token { TokenType = TokenType.NA }; } else if (str[pos] == '\'') { // read to next ' pos++; var sb = new StringBuilder(); while (pos < str.Length && str[pos] != '\'') { sb.Append(str[pos]); pos++; } if (pos < str.Length && str[pos] == '\'') { pos++; // skip ' yield return new Token { TokenType = TokenType.Identifier, strVal = sb.ToString() }; } else yield return new Token { TokenType = TokenType.NA }; } else if (str[pos] == '+') { pos++; yield return new Token { TokenType = TokenType.Operator, strVal = "+" }; } else if (str[pos] == '-') { pos++; yield return new Token { TokenType = TokenType.Operator, strVal = "-" }; } else if (str[pos] == '/') { pos++; yield return new Token { TokenType = TokenType.Operator, strVal = "/" }; } else if (str[pos] == '*') { pos++; yield return new Token { TokenType = TokenType.Operator, strVal = "*" }; } else if (str[pos] == '(') { pos++; yield return new Token { TokenType = TokenType.LeftPar, strVal = "(" }; } else if (str[pos] == ')') { pos++; yield return new Token { TokenType = TokenType.RightPar, strVal = ")" }; } } } // S = Expr EOF // Expr = ['-' | '+'] Term { '+' Term | '-' Term } // Term = Fact { '*' Fact | '/' Fact } // Fact = '(' Expr ')' | funcId '(' Expr ')' | varId | number private ISymbolicExpressionTreeNode ParseS(Queue tokens) { var expr = ParseExpr(tokens); var endTok = tokens.Dequeue(); if (endTok.TokenType != TokenType.End) throw new ArgumentException(string.Format("Expected end of expression (got {0})", endTok.strVal)); return expr; } private ISymbolicExpressionTreeNode ParseExpr(Queue tokens) { var next = tokens.Peek(); var posTerms = new List(); var negTerms = new List(); bool negateFirstTerm = false; if (next.TokenType == TokenType.Operator && (next.strVal == "+" || next.strVal == "-")) { tokens.Dequeue(); if (next.strVal == "-") negateFirstTerm = true; } var t = ParseTerm(tokens); if (negateFirstTerm) negTerms.Add(t); else posTerms.Add(t); next = tokens.Peek(); while (next.strVal == "+" || next.strVal == "-") { switch (next.strVal) { case "+": { tokens.Dequeue(); var term = ParseTerm(tokens); posTerms.Add(term); break; } case "-": { tokens.Dequeue(); var term = ParseTerm(tokens); negTerms.Add(term); break; } } next = tokens.Peek(); } var sum = GetSymbol("+").CreateTreeNode(); foreach (var posTerm in posTerms) sum.AddSubtree(posTerm); if (negTerms.Any()) { if (negTerms.Count == 1) { var sub = GetSymbol("-").CreateTreeNode(); sub.AddSubtree(negTerms.Single()); sum.AddSubtree(sub); } else { var sumNeg = GetSymbol("+").CreateTreeNode(); foreach (var negTerm in negTerms) sumNeg.AddSubtree(negTerm); var constNode = (ConstantTreeNode)constant.CreateTreeNode(); constNode.Value = -1.0; var prod = GetSymbol("*").CreateTreeNode(); prod.AddSubtree(constNode); prod.AddSubtree(sumNeg); sum.AddSubtree(prod); } } if (sum.SubtreeCount == 1) return sum.Subtrees.First(); else return sum; } private ISymbol GetSymbol(string tok) { var symb = knownSymbols.GetByFirst(tok).FirstOrDefault(); if (symb == null) throw new ArgumentException(string.Format("Unknown token {0} found.", tok)); return symb; } // Term = Fact { '*' Fact | '/' Fact } private ISymbolicExpressionTreeNode ParseTerm(Queue tokens) { var factors = new List(); var firstFactor = ParseFact(tokens); factors.Add(firstFactor); var next = tokens.Peek(); while (next.strVal == "*" || next.strVal == "/") { switch (next.strVal) { case "*": { tokens.Dequeue(); var fact = ParseFact(tokens); factors.Add(fact); break; } case "/": { tokens.Dequeue(); var invFact = ParseFact(tokens); var divNode = GetSymbol("/").CreateTreeNode(); // 1/x divNode.AddSubtree(invFact); factors.Add(divNode); break; } } next = tokens.Peek(); } if (factors.Count == 1) return factors.First(); else { var prod = GetSymbol("*").CreateTreeNode(); foreach (var f in factors) prod.AddSubtree(f); return prod; } } // Fact = '(' Expr ')' | funcId '(' Expr ')' | varId | number private ISymbolicExpressionTreeNode ParseFact(Queue tokens) { var next = tokens.Peek(); if (next.TokenType == TokenType.LeftPar) { tokens.Dequeue(); var expr = ParseExpr(tokens); var rPar = tokens.Dequeue(); if (rPar.TokenType != TokenType.RightPar) throw new ArgumentException("expected )"); return expr; } else if (next.TokenType == TokenType.Identifier) { var idTok = tokens.Dequeue(); if (tokens.Peek().TokenType == TokenType.LeftPar) { // function identifier var funcId = idTok.strVal.ToUpperInvariant(); var funcNode = GetSymbol(funcId).CreateTreeNode(); var lPar = tokens.Dequeue(); if (lPar.TokenType != TokenType.LeftPar) throw new ArgumentException("expected ("); var expr = ParseExpr(tokens); var rPar = tokens.Dequeue(); if (rPar.TokenType != TokenType.RightPar) throw new ArgumentException("expected )"); funcNode.AddSubtree(expr); return funcNode; } else { // variable var varNode = (VariableTreeNode)variable.CreateTreeNode(); varNode.Weight = 1.0; varNode.VariableName = idTok.strVal; return varNode; } } else if (next.TokenType == TokenType.Number) { var numTok = tokens.Dequeue(); var constNode = (ConstantTreeNode)constant.CreateTreeNode(); constNode.Value = numTok.doubleVal; return constNode; } else { throw new ArgumentException(string.Format("unexpected token in expression {0}", next.strVal)); } } } }