Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Importer/SymbolicExpressionImporter.cs @ 18242

Last change on this file since 18242 was 18220, checked in by gkronber, 3 years ago

#3136: reintegrated structure-template GP branch into trunk

File size: 12.3 KB
RevLine 
[5574]1#region License Information
2/* HeuristicLab
[17180]3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
[5574]4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.Linq;
26using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
27
[11457]28namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
29  public class SymbolicExpressionImporter {
[5574]30    private const string VARSTART = "VAR";
[6809]31    private const string LAGGEDVARSTART = "LAGVARIABLE";
32    private const string INTEGRALSTART = "INTEG";
[5574]33    private const string DEFUNSTART = "DEFUN";
34    private const string ARGSTART = "ARG";
35    private const string INVOKESTART = "CALL";
[6809]36    private const string TIMELAGSTART = "LAG";
[14826]37    private Dictionary<string, Symbol> knownSymbols = new Dictionary<string, Symbol>()
[5574]38      {
39        {"+", new Addition()},
40        {"/", new Division()},
41        {"*", new Multiplication()},
42        {"-", new Subtraction()},
[16356]43        {"ABS", new Absolute() },
[5574]44        {"EXP", new Exponential()},
45        {"LOG", new Logarithm()},
46        {"POW", new Power()},
47        {"ROOT", new Root()},
[14826]48        {"SQR", new Square()},
49        {"SQRT", new SquareRoot()},
[16356]50        {"CUBE", new Cube()},
51        {"CUBEROOT", new CubeRoot()},
[5574]52        {"SIN",new Sine()},
53        {"COS", new Cosine()},
54        {"TAN", new Tangent()},
[16656]55        {"TANH", new HyperbolicTangent ()},
[8123]56        {"AIRYA", new AiryA()},
57        {"AIRYB", new AiryB()},
58        {"BESSEL", new Bessel()},
59        {"COSINT", new CosineIntegral()},
60        {"SININT", new SineIntegral()},
61        {"HYPCOSINT", new HyperbolicCosineIntegral()},
62        {"HYPSININT", new HyperbolicSineIntegral()},
63        {"FRESNELSININT", new FresnelSineIntegral()},
64        {"FRESNELCOSINT", new FresnelCosineIntegral()},
65        {"NORM", new Norm()},
66        {"ERF", new Erf()},
67        {"GAMMA", new Gamma()},
68        {"PSI", new Psi()},
69        {"DAWSON", new Dawson()},
70        {"EXPINT", new ExponentialIntegralEi()},
[16360]71        {"AQ", new AnalyticQuotient() },
[5574]72        {"MEAN", new Average()},
73        {"IF", new IfThenElse()},
74        {">", new GreaterThan()},
75        {"<", new LessThan()},
76        {"AND", new And()},
77        {"OR", new Or()},
78        {"NOT", new Not()},
[10790]79        {"XOR", new Xor()},
[6809]80        {"DIFF", new Derivative()},
[5574]81        {"PROG", new ProgramRootSymbol()},
82        {"MAIN", new StartSymbol()},
[14826]83        {"FACTOR", new FactorVariable() },
84        {"BINFACTOR", new BinaryFactorVariable()}
[5574]85      };
86
[18132]87    Number number = new Number();
[5574]88    Variable variable = new Variable();
[6769]89    LaggedVariable laggedVariable = new LaggedVariable();
[5574]90    Defun defun = new Defun();
[6809]91    TimeLag timeLag = new TimeLag();
92    Integral integral = new Integral();
[14826]93    BinaryFactorVariable binFactorVar = new BinaryFactorVariable();
[5574]94
95    ProgramRootSymbol programRootSymbol = new ProgramRootSymbol();
96    StartSymbol startSymbol = new StartSymbol();
97
[11457]98    public ISymbolicExpressionTree Import(string str) {
[18132]99      str = str.Replace("(", " ( ").Replace(")", " ) ")
100        .Replace("<", " < ").Replace(">", " > ")
101        .Replace("=", " = ");
[5574]102      ISymbolicExpressionTreeNode root = programRootSymbol.CreateTreeNode();
103      ISymbolicExpressionTreeNode start = startSymbol.CreateTreeNode();
104      ISymbolicExpressionTreeNode mainBranch = ParseSexp(new Queue<Token>(GetTokenStream(str)));
105      if (mainBranch.Symbol is ProgramRootSymbol) {
106        // when a root symbol was parsed => use main branch as root
107        root = mainBranch;
108      } else {
109        // only a main branch was given => insert the main branch into the default tree template
[5733]110        root.AddSubtree(start);
111        start.AddSubtree(mainBranch);
[5574]112      }
113      return new SymbolicExpressionTree(root);
114    }
115
116    private IEnumerable<Token> GetTokenStream(string str) {
117      return
118             from strToken in str.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).AsEnumerable()
119             let t = Token.Parse(strToken)
120             where t != null
121             select t;
122    }
123
124    private ISymbolicExpressionTreeNode ParseSexp(Queue<Token> tokens) {
125      if (tokens.Peek().Symbol == TokenSymbol.LPAR) {
126        ISymbolicExpressionTreeNode tree;
127        Expect(Token.LPAR, tokens);
128        if (tokens.Peek().StringValue.StartsWith(VARSTART)) {
129          tree = ParseVariable(tokens);
[6769]130        } else if (tokens.Peek().StringValue.StartsWith(LAGGEDVARSTART)) {
131          tree = ParseLaggedVariable(tokens);
[6809]132        } else if (tokens.Peek().StringValue.StartsWith(TIMELAGSTART)) {
133          tree = ParseTimeLag(tokens);
134          tree.AddSubtree(ParseSexp(tokens));
135        } else if (tokens.Peek().StringValue.StartsWith(INTEGRALSTART)) {
136          tree = ParseIntegral(tokens);
137          tree.AddSubtree(ParseSexp(tokens));
[5574]138        } else if (tokens.Peek().StringValue.StartsWith(DEFUNSTART)) {
139          tree = ParseDefun(tokens);
140          while (!tokens.Peek().Equals(Token.RPAR)) {
[5733]141            tree.AddSubtree(ParseSexp(tokens));
[5574]142          }
143        } else if (tokens.Peek().StringValue.StartsWith(ARGSTART)) {
144          tree = ParseArgument(tokens);
145        } else if (tokens.Peek().StringValue.StartsWith(INVOKESTART)) {
146          tree = ParseInvoke(tokens);
147          while (!tokens.Peek().Equals(Token.RPAR)) {
[5733]148            tree.AddSubtree(ParseSexp(tokens));
[5574]149          }
[14826]150        } else if (tokens.Peek().StringValue.StartsWith("FACTOR")) {
151          tree = ParseFactor(tokens);
152        } else if (tokens.Peek().StringValue.StartsWith("BINFACTOR")) {
153          tree = ParseBinaryFactor(tokens);
[5574]154        } else {
155          Token curToken = tokens.Dequeue();
156          tree = CreateTree(curToken);
157          while (!tokens.Peek().Equals(Token.RPAR)) {
[5733]158            tree.AddSubtree(ParseSexp(tokens));
[5574]159          }
160        }
161        Expect(Token.RPAR, tokens);
162        return tree;
[18132]163      } else if (tokens.Peek().Symbol == TokenSymbol.CONSTANT) {
[18143]164        var value = tokens.Dequeue().DoubleValue;
165        var constant = new Constant() { Value = value };
166        return constant.CreateTreeNode();
[18132]167      } else if (tokens.Peek().Symbol == TokenSymbol.LBRACKET) {
168        Expect(Token.LBRACKET, tokens);
169        Expect(Token.NUM, tokens);
[18143]170        var t = (NumberTreeNode)number.CreateTreeNode();
[18132]171        if (tokens.Peek().Symbol == TokenSymbol.EQ) {
172          Expect(Token.EQ, tokens);
173          var initValToken = tokens.Dequeue();
[18220]174          if (initValToken.Symbol == TokenSymbol.CONSTANT) {
[18132]175            t.Value = initValToken.DoubleValue;
176          } else {
177            throw new FormatException("Expected a real value");
178          }
179        }
180        Expect(Token.RBRACKET, tokens);
181        return t;
182      } else throw new FormatException("Expected function or number symbol");
[5574]183    }
184
185    private ISymbolicExpressionTreeNode ParseInvoke(Queue<Token> tokens) {
186      Token invokeTok = tokens.Dequeue();
187      Debug.Assert(invokeTok.StringValue == "CALL");
188      InvokeFunction invokeSym = new InvokeFunction(tokens.Dequeue().StringValue);
189      ISymbolicExpressionTreeNode invokeNode = invokeSym.CreateTreeNode();
190      return invokeNode;
191    }
192
193    private ISymbolicExpressionTreeNode ParseArgument(Queue<Token> tokens) {
194      Token argTok = tokens.Dequeue();
195      Debug.Assert(argTok.StringValue == "ARG");
196      Argument argument = new Argument((int)tokens.Dequeue().DoubleValue);
197      ISymbolicExpressionTreeNode argNode = argument.CreateTreeNode();
198      return argNode;
199    }
200
201    private ISymbolicExpressionTreeNode ParseDefun(Queue<Token> tokens) {
202      Token defTok = tokens.Dequeue();
203      Debug.Assert(defTok.StringValue == "DEFUN");
204      DefunTreeNode t = (DefunTreeNode)defun.CreateTreeNode();
205      t.FunctionName = tokens.Dequeue().StringValue;
206      return t;
207    }
208
[6809]209    private ISymbolicExpressionTreeNode ParseTimeLag(Queue<Token> tokens) {
210      Token varTok = tokens.Dequeue();
211      Debug.Assert(varTok.StringValue == "LAG");
212      LaggedTreeNode t = (LaggedTreeNode)timeLag.CreateTreeNode();
213      t.Lag = (int)tokens.Dequeue().DoubleValue;
214      return t;
215    }
216
217    private ISymbolicExpressionTreeNode ParseIntegral(Queue<Token> tokens) {
218      Token varTok = tokens.Dequeue();
219      Debug.Assert(varTok.StringValue == "INTEGRAL");
220      LaggedTreeNode t = (LaggedTreeNode)integral.CreateTreeNode();
221      t.Lag = (int)tokens.Dequeue().DoubleValue;
222      return t;
223    }
224
[5574]225    private ISymbolicExpressionTreeNode ParseVariable(Queue<Token> tokens) {
226      Token varTok = tokens.Dequeue();
227      Debug.Assert(varTok.StringValue == "VARIABLE");
228      VariableTreeNode t = (VariableTreeNode)variable.CreateTreeNode();
229      t.Weight = tokens.Dequeue().DoubleValue;
230      t.VariableName = tokens.Dequeue().StringValue;
231      return t;
232    }
233
[14826]234    private ISymbolicExpressionTreeNode ParseFactor(Queue<Token> tokens) {
235      Token tok = tokens.Dequeue();
236      Debug.Assert(tok.StringValue == "FACTOR");
237      FactorVariableTreeNode t = (FactorVariableTreeNode)(new FactorVariable()).CreateTreeNode(); // create a new symbol each time on purpose
238      var varNameTok = tokens.Dequeue();
239      Debug.Assert(tok.Symbol == TokenSymbol.SYMB);
240      t.VariableName = varNameTok.StringValue;
241
242      var weights = new List<double>();
[18132]243      while (tokens.Peek().Symbol == TokenSymbol.CONSTANT) {
[14826]244        weights.Add(tokens.Dequeue().DoubleValue);
245      }
246
247      t.Weights = weights.ToArray();
248
249      // create a set of (virtual) values to match the number of weights
250      t.Symbol.VariableNames = new string[] { t.VariableName };
251      t.Symbol.VariableValues = new[]
252      { new KeyValuePair<string, Dictionary<string,int>>(
253        t.VariableName,
254        weights.Select((_, i) => Tuple.Create(_,i)).ToDictionary(tup=>"X" + tup.Item2, tup=>tup.Item2)) };
255      return t;
256    }
257
258    private ISymbolicExpressionTreeNode ParseBinaryFactor(Queue<Token> tokens) {
259      Token tok = tokens.Dequeue();
260      Debug.Assert(tok.StringValue == "BINFACTOR");
261      var t = (BinaryFactorVariableTreeNode)binFactorVar.CreateTreeNode();
262      var varNameTok = tokens.Dequeue();
263      Debug.Assert(varNameTok.Symbol == TokenSymbol.SYMB);
264      t.VariableName = varNameTok.StringValue;
265
266      var varValTok = tokens.Dequeue();
267      Debug.Assert(varValTok.Symbol == TokenSymbol.SYMB);
268      t.VariableValue = varValTok.StringValue;
269
270      var weightTok = tokens.Dequeue();
[18132]271      Debug.Assert(weightTok.Symbol == TokenSymbol.CONSTANT);
[14826]272      t.Weight = weightTok.DoubleValue;
273
274      return t;
275    }
276
277
[6769]278    private ISymbolicExpressionTreeNode ParseLaggedVariable(Queue<Token> tokens) {
279      Token varTok = tokens.Dequeue();
280      Debug.Assert(varTok.StringValue == "LAGVARIABLE");
281      LaggedVariableTreeNode t = (LaggedVariableTreeNode)laggedVariable.CreateTreeNode();
282      t.Weight = tokens.Dequeue().DoubleValue;
283      t.VariableName = tokens.Dequeue().StringValue;
284      t.Lag = (int)tokens.Dequeue().DoubleValue;
285      return t;
286    }
287
[5574]288    private ISymbolicExpressionTreeNode CreateTree(Token token) {
[18132]289      if (token.Symbol != TokenSymbol.SYMB &&
290          token.Symbol != TokenSymbol.LBRACKET &&  // LBRACKET and RBRACKET are used for <num=..> and as LT, GT operators
[18220]291          token.Symbol != TokenSymbol.RBRACKET
[18132]292          ) throw new FormatException("Expected function symbol, but got: " + token.StringValue);
[5574]293      return knownSymbols[token.StringValue].CreateTreeNode();
294    }
295
296    private void Expect(Token token, Queue<Token> tokens) {
297      Token cur = tokens.Dequeue();
298      if (!token.Equals(cur)) throw new FormatException("Expected: " + token.StringValue + ", but got: " + cur.StringValue);
299    }
300  }
301}
Note: See TracBrowser for help on using the repository browser.