Free cookie consent management tool by TermsFeed Policy Generator

source: branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Importer/SymbolicExpressionImporter.cs @ 17606

Last change on this file since 17606 was 17606, checked in by pfleck, 4 years ago

#3040

  • Extended importer (vectorvariable, vec-aggregations, ...).
  • Started adding unit test for vector simplifications.
File size: 12.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.Linq;
26using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
27
28using DoubleVector = MathNet.Numerics.LinearAlgebra.Vector<double>;
29
30namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
31  public class SymbolicExpressionImporter {
32    private const string VARSTART = "VAR";
33    private const string LAGGEDVARSTART = "LAGVARIABLE";
34    private const string VECVARSTART = "VEC";
35    private const string INTEGRALSTART = "INTEG";
36    private const string DEFUNSTART = "DEFUN";
37    private const string ARGSTART = "ARG";
38    private const string INVOKESTART = "CALL";
39    private const string TIMELAGSTART = "LAG";
40    private Dictionary<string, Symbol> knownSymbols = new Dictionary<string, Symbol>()
41      {
42        {"+", new Addition()},
43        {"/", new Division()},
44        {"*", new Multiplication()},
45        {"-", new Subtraction()},
46        {"ABS", new Absolute() },
47        {"EXP", new Exponential()},
48        {"LOG", new Logarithm()},
49        {"POW", new Power()},
50        {"ROOT", new Root()},
51        {"SQR", new Square()},
52        {"SQRT", new SquareRoot()},
53        {"CUBE", new Cube()},
54        {"CUBEROOT", new CubeRoot()},
55        {"SIN",new Sine()},
56        {"COS", new Cosine()},
57        {"TAN", new Tangent()},
58        {"TANH", new HyperbolicTangent ()},
59        {"AIRYA", new AiryA()},
60        {"AIRYB", new AiryB()},
61        {"BESSEL", new Bessel()},
62        {"COSINT", new CosineIntegral()},
63        {"SININT", new SineIntegral()},
64        {"HYPCOSINT", new HyperbolicCosineIntegral()},
65        {"HYPSININT", new HyperbolicSineIntegral()},
66        {"FRESNELSININT", new FresnelSineIntegral()},
67        {"FRESNELCOSINT", new FresnelCosineIntegral()},
68        {"NORM", new Norm()},
69        {"ERF", new Erf()},
70        {"GAMMA", new Gamma()},
71        {"PSI", new Psi()},
72        {"DAWSON", new Dawson()},
73        {"EXPINT", new ExponentialIntegralEi()},
74        {"AQ", new AnalyticQuotient() },
75        //{"MEAN", new Average()},
76        {"IF", new IfThenElse()},
77        {">", new GreaterThan()},
78        {"<", new LessThan()},
79        {"AND", new And()},
80        {"OR", new Or()},
81        {"NOT", new Not()},
82        {"XOR", new Xor()},
83        {"DIFF", new Derivative()},
84        {"PROG", new ProgramRootSymbol()},
85        {"MAIN", new StartSymbol()},
86        {"FACTOR", new FactorVariable() },
87        {"BINFACTOR", new BinaryFactorVariable()},
88        {"SUM", new Sum() },
89        {"MEAN", new Mean()},
90        {"LENGTH", new Length()},
91        {"STDEV", new StandardDeviation()},
92        {"VAR", new Variance()}
93      };
94
95    Constant constant = new Constant();
96    Variable variable = new Variable() { VariableDataType = typeof(double) };
97    LaggedVariable laggedVariable = new LaggedVariable();
98    Variable vectorVariable = new Variable() { VariableDataType = typeof(DoubleVector) };
99    Defun defun = new Defun();
100    TimeLag timeLag = new TimeLag();
101    Integral integral = new Integral();
102    FactorVariable factorVar = new FactorVariable();
103    BinaryFactorVariable binFactorVar = new BinaryFactorVariable();
104
105    ProgramRootSymbol programRootSymbol = new ProgramRootSymbol();
106    StartSymbol startSymbol = new StartSymbol();
107
108    public ISymbolicExpressionTree Import(string str) {
109      str = str.Replace("(", " ( ").Replace(")", " ) ");
110      ISymbolicExpressionTreeNode root = programRootSymbol.CreateTreeNode();
111      ISymbolicExpressionTreeNode start = startSymbol.CreateTreeNode();
112      ISymbolicExpressionTreeNode mainBranch = ParseSexp(new Queue<Token>(GetTokenStream(str)));
113      if (mainBranch.Symbol is ProgramRootSymbol) {
114        // when a root symbol was parsed => use main branch as root
115        root = mainBranch;
116      } else {
117        // only a main branch was given => insert the main branch into the default tree template
118        root.AddSubtree(start);
119        start.AddSubtree(mainBranch);
120      }
121      return new SymbolicExpressionTree(root);
122    }
123
124    private IEnumerable<Token> GetTokenStream(string str) {
125      return
126             from strToken in str.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).AsEnumerable()
127             let t = Token.Parse(strToken)
128             where t != null
129             select t;
130    }
131
132    private ISymbolicExpressionTreeNode ParseSexp(Queue<Token> tokens) {
133      if (tokens.Peek().Symbol == TokenSymbol.LPAR) {
134        ISymbolicExpressionTreeNode tree;
135        Expect(Token.LPAR, tokens);
136        if (tokens.Peek().StringValue.StartsWith(VARSTART)) {
137          tree = ParseVariable(tokens);
138        } else if (tokens.Peek().StringValue.StartsWith(LAGGEDVARSTART)) {
139          tree = ParseLaggedVariable(tokens);
140        } else if (tokens.Peek().StringValue.StartsWith(VECVARSTART)) {
141          tree = ParseVectorVariable(tokens);
142        } else if (tokens.Peek().StringValue.StartsWith(TIMELAGSTART)) {
143          tree = ParseTimeLag(tokens);
144          tree.AddSubtree(ParseSexp(tokens));
145        } else if (tokens.Peek().StringValue.StartsWith(INTEGRALSTART)) {
146          tree = ParseIntegral(tokens);
147          tree.AddSubtree(ParseSexp(tokens));
148        } else if (tokens.Peek().StringValue.StartsWith(DEFUNSTART)) {
149          tree = ParseDefun(tokens);
150          while (!tokens.Peek().Equals(Token.RPAR)) {
151            tree.AddSubtree(ParseSexp(tokens));
152          }
153        } else if (tokens.Peek().StringValue.StartsWith(ARGSTART)) {
154          tree = ParseArgument(tokens);
155        } else if (tokens.Peek().StringValue.StartsWith(INVOKESTART)) {
156          tree = ParseInvoke(tokens);
157          while (!tokens.Peek().Equals(Token.RPAR)) {
158            tree.AddSubtree(ParseSexp(tokens));
159          }
160        } else if (tokens.Peek().StringValue.StartsWith("FACTOR")) {
161          tree = ParseFactor(tokens);
162        } else if (tokens.Peek().StringValue.StartsWith("BINFACTOR")) {
163          tree = ParseBinaryFactor(tokens);
164        } else {
165          Token curToken = tokens.Dequeue();
166          tree = CreateTree(curToken);
167          while (!tokens.Peek().Equals(Token.RPAR)) {
168            tree.AddSubtree(ParseSexp(tokens));
169          }
170        }
171        Expect(Token.RPAR, tokens);
172        return tree;
173      } else if (tokens.Peek().Symbol == TokenSymbol.NUMBER) {
174        ConstantTreeNode t = (ConstantTreeNode)constant.CreateTreeNode();
175        t.Value = tokens.Dequeue().DoubleValue;
176        return t;
177      } else throw new FormatException("Expected function or constant symbol");
178    }
179
180    private ISymbolicExpressionTreeNode ParseInvoke(Queue<Token> tokens) {
181      Token invokeTok = tokens.Dequeue();
182      Debug.Assert(invokeTok.StringValue == "CALL");
183      InvokeFunction invokeSym = new InvokeFunction(tokens.Dequeue().StringValue);
184      ISymbolicExpressionTreeNode invokeNode = invokeSym.CreateTreeNode();
185      return invokeNode;
186    }
187
188    private ISymbolicExpressionTreeNode ParseArgument(Queue<Token> tokens) {
189      Token argTok = tokens.Dequeue();
190      Debug.Assert(argTok.StringValue == "ARG");
191      Argument argument = new Argument((int)tokens.Dequeue().DoubleValue);
192      ISymbolicExpressionTreeNode argNode = argument.CreateTreeNode();
193      return argNode;
194    }
195
196    private ISymbolicExpressionTreeNode ParseDefun(Queue<Token> tokens) {
197      Token defTok = tokens.Dequeue();
198      Debug.Assert(defTok.StringValue == "DEFUN");
199      DefunTreeNode t = (DefunTreeNode)defun.CreateTreeNode();
200      t.FunctionName = tokens.Dequeue().StringValue;
201      return t;
202    }
203
204    private ISymbolicExpressionTreeNode ParseTimeLag(Queue<Token> tokens) {
205      Token varTok = tokens.Dequeue();
206      Debug.Assert(varTok.StringValue == "LAG");
207      LaggedTreeNode t = (LaggedTreeNode)timeLag.CreateTreeNode();
208      t.Lag = (int)tokens.Dequeue().DoubleValue;
209      return t;
210    }
211
212    private ISymbolicExpressionTreeNode ParseIntegral(Queue<Token> tokens) {
213      Token varTok = tokens.Dequeue();
214      Debug.Assert(varTok.StringValue == "INTEGRAL");
215      LaggedTreeNode t = (LaggedTreeNode)integral.CreateTreeNode();
216      t.Lag = (int)tokens.Dequeue().DoubleValue;
217      return t;
218    }
219
220    private ISymbolicExpressionTreeNode ParseVariable(Queue<Token> tokens) {
221      Token varTok = tokens.Dequeue();
222      Debug.Assert(varTok.StringValue == "VARIABLE");
223      VariableTreeNode t = (VariableTreeNode)variable.CreateTreeNode();
224      t.Weight = tokens.Dequeue().DoubleValue;
225      t.VariableName = tokens.Dequeue().StringValue;
226      return t;
227    }
228
229    private ISymbolicExpressionTreeNode ParseVectorVariable(Queue<Token> tokens) {
230      Token varTok = tokens.Dequeue();
231      Debug.Assert(varTok.StringValue == "VECTORVARIABLE");
232      VariableTreeNode t = (VariableTreeNode)vectorVariable.CreateTreeNode();
233      t.Weight = tokens.Dequeue().DoubleValue;
234      t.VariableName = tokens.Dequeue().StringValue;
235      return t;
236    }
237
238    private ISymbolicExpressionTreeNode ParseFactor(Queue<Token> tokens) {
239      Token tok = tokens.Dequeue();
240      Debug.Assert(tok.StringValue == "FACTOR");
241      FactorVariableTreeNode t = (FactorVariableTreeNode)(new FactorVariable()).CreateTreeNode(); // create a new symbol each time on purpose
242      var varNameTok = tokens.Dequeue();
243      Debug.Assert(tok.Symbol == TokenSymbol.SYMB);
244      t.VariableName = varNameTok.StringValue;
245
246      var weights = new List<double>();
247      while (tokens.Peek().Symbol == TokenSymbol.NUMBER) {
248        weights.Add(tokens.Dequeue().DoubleValue);
249      }
250
251      t.Weights = weights.ToArray();
252
253      // create a set of (virtual) values to match the number of weights
254      t.Symbol.VariableNames = new string[] { t.VariableName };
255      t.Symbol.VariableValues = new[]
256      { new KeyValuePair<string, Dictionary<string,int>>(
257        t.VariableName,
258        weights.Select((_, i) => Tuple.Create(_,i)).ToDictionary(tup=>"X" + tup.Item2, tup=>tup.Item2)) };
259      return t;
260    }
261
262    private ISymbolicExpressionTreeNode ParseBinaryFactor(Queue<Token> tokens) {
263      Token tok = tokens.Dequeue();
264      Debug.Assert(tok.StringValue == "BINFACTOR");
265      var t = (BinaryFactorVariableTreeNode)binFactorVar.CreateTreeNode();
266      var varNameTok = tokens.Dequeue();
267      Debug.Assert(varNameTok.Symbol == TokenSymbol.SYMB);
268      t.VariableName = varNameTok.StringValue;
269
270      var varValTok = tokens.Dequeue();
271      Debug.Assert(varValTok.Symbol == TokenSymbol.SYMB);
272      t.VariableValue = varValTok.StringValue;
273
274      var weightTok = tokens.Dequeue();
275      Debug.Assert(weightTok.Symbol == TokenSymbol.NUMBER);
276      t.Weight = weightTok.DoubleValue;
277
278      return t;
279    }
280
281
282    private ISymbolicExpressionTreeNode ParseLaggedVariable(Queue<Token> tokens) {
283      Token varTok = tokens.Dequeue();
284      Debug.Assert(varTok.StringValue == "LAGVARIABLE");
285      LaggedVariableTreeNode t = (LaggedVariableTreeNode)laggedVariable.CreateTreeNode();
286      t.Weight = tokens.Dequeue().DoubleValue;
287      t.VariableName = tokens.Dequeue().StringValue;
288      t.Lag = (int)tokens.Dequeue().DoubleValue;
289      return t;
290    }
291
292    private ISymbolicExpressionTreeNode CreateTree(Token token) {
293      if (token.Symbol != TokenSymbol.SYMB) throw new FormatException("Expected function symbol, but got: " + token.StringValue);
294      return knownSymbols[token.StringValue].CreateTreeNode();
295    }
296
297    private void Expect(Token token, Queue<Token> tokens) {
298      Token cur = tokens.Dequeue();
299      if (!token.Equals(cur)) throw new FormatException("Expected: " + token.StringValue + ", but got: " + cur.StringValue);
300    }
301  }
302}
Note: See TracBrowser for help on using the repository browser.