Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2990_VariableImpactBasedFeatureSelection/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Importer/SymbolicExpressionImporter.cs @ 17607

Last change on this file since 17607 was 16565, checked in by gkronber, 6 years ago

#2520: merged changes from PersistenceOverhaul branch (r16451:16564) into trunk

File size: 11.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Diagnostics;
25using System.Linq;
26using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
27
28namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
29  public class SymbolicExpressionImporter {
30    private const string VARSTART = "VAR";
31    private const string LAGGEDVARSTART = "LAGVARIABLE";
32    private const string INTEGRALSTART = "INTEG";
33    private const string DEFUNSTART = "DEFUN";
34    private const string ARGSTART = "ARG";
35    private const string INVOKESTART = "CALL";
36    private const string TIMELAGSTART = "LAG";
37    private Dictionary<string, Symbol> knownSymbols = new Dictionary<string, Symbol>()
38      {
39        {"+", new Addition()},
40        {"/", new Division()},
41        {"*", new Multiplication()},
42        {"-", new Subtraction()},
43        {"ABS", new Absolute() },
44        {"EXP", new Exponential()},
45        {"LOG", new Logarithm()},
46        {"POW", new Power()},
47        {"ROOT", new Root()},
48        {"SQR", new Square()},
49        {"SQRT", new SquareRoot()},
50        {"CUBE", new Cube()},
51        {"CUBEROOT", new CubeRoot()},
52        {"SIN",new Sine()},
53        {"COS", new Cosine()},
54        {"TAN", new Tangent()},
55        {"AIRYA", new AiryA()},
56        {"AIRYB", new AiryB()},
57        {"BESSEL", new Bessel()},
58        {"COSINT", new CosineIntegral()},
59        {"SININT", new SineIntegral()},
60        {"HYPCOSINT", new HyperbolicCosineIntegral()},
61        {"HYPSININT", new HyperbolicSineIntegral()},
62        {"FRESNELSININT", new FresnelSineIntegral()},
63        {"FRESNELCOSINT", new FresnelCosineIntegral()},
64        {"NORM", new Norm()},
65        {"ERF", new Erf()},
66        {"GAMMA", new Gamma()},
67        {"PSI", new Psi()},
68        {"DAWSON", new Dawson()},
69        {"EXPINT", new ExponentialIntegralEi()},
70        {"AQ", new AnalyticQuotient() },
71        {"MEAN", new Average()},
72        {"IF", new IfThenElse()},
73        {">", new GreaterThan()},
74        {"<", new LessThan()},
75        {"AND", new And()},
76        {"OR", new Or()},
77        {"NOT", new Not()},
78        {"XOR", new Xor()},
79        {"DIFF", new Derivative()},
80        {"PROG", new ProgramRootSymbol()},
81        {"MAIN", new StartSymbol()},
82        {"FACTOR", new FactorVariable() },
83        {"BINFACTOR", new BinaryFactorVariable()}
84      };
85
86    Constant constant = new Constant();
87    Variable variable = new Variable();
88    LaggedVariable laggedVariable = new LaggedVariable();
89    Defun defun = new Defun();
90    TimeLag timeLag = new TimeLag();
91    Integral integral = new Integral();
92    FactorVariable factorVar = new FactorVariable();
93    BinaryFactorVariable binFactorVar = new BinaryFactorVariable();
94
95    ProgramRootSymbol programRootSymbol = new ProgramRootSymbol();
96    StartSymbol startSymbol = new StartSymbol();
97
98    public ISymbolicExpressionTree Import(string str) {
99      str = str.Replace("(", " ( ").Replace(")", " ) ");
100      ISymbolicExpressionTreeNode root = programRootSymbol.CreateTreeNode();
101      ISymbolicExpressionTreeNode start = startSymbol.CreateTreeNode();
102      ISymbolicExpressionTreeNode mainBranch = ParseSexp(new Queue<Token>(GetTokenStream(str)));
103      if (mainBranch.Symbol is ProgramRootSymbol) {
104        // when a root symbol was parsed => use main branch as root
105        root = mainBranch;
106      } else {
107        // only a main branch was given => insert the main branch into the default tree template
108        root.AddSubtree(start);
109        start.AddSubtree(mainBranch);
110      }
111      return new SymbolicExpressionTree(root);
112    }
113
114    private IEnumerable<Token> GetTokenStream(string str) {
115      return
116             from strToken in str.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).AsEnumerable()
117             let t = Token.Parse(strToken)
118             where t != null
119             select t;
120    }
121
122    private ISymbolicExpressionTreeNode ParseSexp(Queue<Token> tokens) {
123      if (tokens.Peek().Symbol == TokenSymbol.LPAR) {
124        ISymbolicExpressionTreeNode tree;
125        Expect(Token.LPAR, tokens);
126        if (tokens.Peek().StringValue.StartsWith(VARSTART)) {
127          tree = ParseVariable(tokens);
128        } else if (tokens.Peek().StringValue.StartsWith(LAGGEDVARSTART)) {
129          tree = ParseLaggedVariable(tokens);
130        } else if (tokens.Peek().StringValue.StartsWith(TIMELAGSTART)) {
131          tree = ParseTimeLag(tokens);
132          tree.AddSubtree(ParseSexp(tokens));
133        } else if (tokens.Peek().StringValue.StartsWith(INTEGRALSTART)) {
134          tree = ParseIntegral(tokens);
135          tree.AddSubtree(ParseSexp(tokens));
136        } else if (tokens.Peek().StringValue.StartsWith(DEFUNSTART)) {
137          tree = ParseDefun(tokens);
138          while (!tokens.Peek().Equals(Token.RPAR)) {
139            tree.AddSubtree(ParseSexp(tokens));
140          }
141        } else if (tokens.Peek().StringValue.StartsWith(ARGSTART)) {
142          tree = ParseArgument(tokens);
143        } else if (tokens.Peek().StringValue.StartsWith(INVOKESTART)) {
144          tree = ParseInvoke(tokens);
145          while (!tokens.Peek().Equals(Token.RPAR)) {
146            tree.AddSubtree(ParseSexp(tokens));
147          }
148        } else if (tokens.Peek().StringValue.StartsWith("FACTOR")) {
149          tree = ParseFactor(tokens);
150        } else if (tokens.Peek().StringValue.StartsWith("BINFACTOR")) {
151          tree = ParseBinaryFactor(tokens);
152        } else {
153          Token curToken = tokens.Dequeue();
154          tree = CreateTree(curToken);
155          while (!tokens.Peek().Equals(Token.RPAR)) {
156            tree.AddSubtree(ParseSexp(tokens));
157          }
158        }
159        Expect(Token.RPAR, tokens);
160        return tree;
161      } else if (tokens.Peek().Symbol == TokenSymbol.NUMBER) {
162        ConstantTreeNode t = (ConstantTreeNode)constant.CreateTreeNode();
163        t.Value = tokens.Dequeue().DoubleValue;
164        return t;
165      } else throw new FormatException("Expected function or constant symbol");
166    }
167
168    private ISymbolicExpressionTreeNode ParseInvoke(Queue<Token> tokens) {
169      Token invokeTok = tokens.Dequeue();
170      Debug.Assert(invokeTok.StringValue == "CALL");
171      InvokeFunction invokeSym = new InvokeFunction(tokens.Dequeue().StringValue);
172      ISymbolicExpressionTreeNode invokeNode = invokeSym.CreateTreeNode();
173      return invokeNode;
174    }
175
176    private ISymbolicExpressionTreeNode ParseArgument(Queue<Token> tokens) {
177      Token argTok = tokens.Dequeue();
178      Debug.Assert(argTok.StringValue == "ARG");
179      Argument argument = new Argument((int)tokens.Dequeue().DoubleValue);
180      ISymbolicExpressionTreeNode argNode = argument.CreateTreeNode();
181      return argNode;
182    }
183
184    private ISymbolicExpressionTreeNode ParseDefun(Queue<Token> tokens) {
185      Token defTok = tokens.Dequeue();
186      Debug.Assert(defTok.StringValue == "DEFUN");
187      DefunTreeNode t = (DefunTreeNode)defun.CreateTreeNode();
188      t.FunctionName = tokens.Dequeue().StringValue;
189      return t;
190    }
191
192    private ISymbolicExpressionTreeNode ParseTimeLag(Queue<Token> tokens) {
193      Token varTok = tokens.Dequeue();
194      Debug.Assert(varTok.StringValue == "LAG");
195      LaggedTreeNode t = (LaggedTreeNode)timeLag.CreateTreeNode();
196      t.Lag = (int)tokens.Dequeue().DoubleValue;
197      return t;
198    }
199
200    private ISymbolicExpressionTreeNode ParseIntegral(Queue<Token> tokens) {
201      Token varTok = tokens.Dequeue();
202      Debug.Assert(varTok.StringValue == "INTEGRAL");
203      LaggedTreeNode t = (LaggedTreeNode)integral.CreateTreeNode();
204      t.Lag = (int)tokens.Dequeue().DoubleValue;
205      return t;
206    }
207
208    private ISymbolicExpressionTreeNode ParseVariable(Queue<Token> tokens) {
209      Token varTok = tokens.Dequeue();
210      Debug.Assert(varTok.StringValue == "VARIABLE");
211      VariableTreeNode t = (VariableTreeNode)variable.CreateTreeNode();
212      t.Weight = tokens.Dequeue().DoubleValue;
213      t.VariableName = tokens.Dequeue().StringValue;
214      return t;
215    }
216
217    private ISymbolicExpressionTreeNode ParseFactor(Queue<Token> tokens) {
218      Token tok = tokens.Dequeue();
219      Debug.Assert(tok.StringValue == "FACTOR");
220      FactorVariableTreeNode t = (FactorVariableTreeNode)(new FactorVariable()).CreateTreeNode(); // create a new symbol each time on purpose
221      var varNameTok = tokens.Dequeue();
222      Debug.Assert(tok.Symbol == TokenSymbol.SYMB);
223      t.VariableName = varNameTok.StringValue;
224
225      var weights = new List<double>();
226      while (tokens.Peek().Symbol == TokenSymbol.NUMBER) {
227        weights.Add(tokens.Dequeue().DoubleValue);
228      }
229
230      t.Weights = weights.ToArray();
231
232      // create a set of (virtual) values to match the number of weights
233      t.Symbol.VariableNames = new string[] { t.VariableName };
234      t.Symbol.VariableValues = new[]
235      { new KeyValuePair<string, Dictionary<string,int>>(
236        t.VariableName,
237        weights.Select((_, i) => Tuple.Create(_,i)).ToDictionary(tup=>"X" + tup.Item2, tup=>tup.Item2)) };
238      return t;
239    }
240
241    private ISymbolicExpressionTreeNode ParseBinaryFactor(Queue<Token> tokens) {
242      Token tok = tokens.Dequeue();
243      Debug.Assert(tok.StringValue == "BINFACTOR");
244      var t = (BinaryFactorVariableTreeNode)binFactorVar.CreateTreeNode();
245      var varNameTok = tokens.Dequeue();
246      Debug.Assert(varNameTok.Symbol == TokenSymbol.SYMB);
247      t.VariableName = varNameTok.StringValue;
248
249      var varValTok = tokens.Dequeue();
250      Debug.Assert(varValTok.Symbol == TokenSymbol.SYMB);
251      t.VariableValue = varValTok.StringValue;
252
253      var weightTok = tokens.Dequeue();
254      Debug.Assert(weightTok.Symbol == TokenSymbol.NUMBER);
255      t.Weight = weightTok.DoubleValue;
256
257      return t;
258    }
259
260
261    private ISymbolicExpressionTreeNode ParseLaggedVariable(Queue<Token> tokens) {
262      Token varTok = tokens.Dequeue();
263      Debug.Assert(varTok.StringValue == "LAGVARIABLE");
264      LaggedVariableTreeNode t = (LaggedVariableTreeNode)laggedVariable.CreateTreeNode();
265      t.Weight = tokens.Dequeue().DoubleValue;
266      t.VariableName = tokens.Dequeue().StringValue;
267      t.Lag = (int)tokens.Dequeue().DoubleValue;
268      return t;
269    }
270
271    private ISymbolicExpressionTreeNode CreateTree(Token token) {
272      if (token.Symbol != TokenSymbol.SYMB) throw new FormatException("Expected function symbol, but got: " + token.StringValue);
273      return knownSymbols[token.StringValue].CreateTreeNode();
274    }
275
276    private void Expect(Token token, Queue<Token> tokens) {
277      Token cur = tokens.Dequeue();
278      if (!token.Equals(cur)) throw new FormatException("Expected: " + token.StringValue + ", but got: " + cur.StringValue);
279    }
280  }
281}
Note: See TracBrowser for help on using the repository browser.