1 | #region License Information
|
---|
2 | /* HeuristicLab
|
---|
3 | * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
4 | *
|
---|
5 | * This file is part of HeuristicLab.
|
---|
6 | *
|
---|
7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
8 | * it under the terms of the GNU General Public License as published by
|
---|
9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
10 | * (at your option) any later version.
|
---|
11 | *
|
---|
12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
15 | * GNU General Public License for more details.
|
---|
16 | *
|
---|
17 | * You should have received a copy of the GNU General Public License
|
---|
18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
19 | */
|
---|
20 | #endregion
|
---|
21 |
|
---|
22 | using System;
|
---|
23 | using System.Collections.Generic;
|
---|
24 | using System.Diagnostics;
|
---|
25 | using System.Linq;
|
---|
26 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
27 |
|
---|
28 | using DoubleVector = MathNet.Numerics.LinearAlgebra.Vector<double>;
|
---|
29 |
|
---|
30 | namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
|
---|
31 | public class SymbolicExpressionImporter {
|
---|
32 | private const string VARSTART = "VAR";
|
---|
33 | private const string LAGGEDVARSTART = "LAGVARIABLE";
|
---|
34 | private const string VECVARSTART = "VEC";
|
---|
35 | private const string INTEGRALSTART = "INTEG";
|
---|
36 | private const string DEFUNSTART = "DEFUN";
|
---|
37 | private const string ARGSTART = "ARG";
|
---|
38 | private const string INVOKESTART = "CALL";
|
---|
39 | private const string TIMELAGSTART = "LAG";
|
---|
40 | private Dictionary<string, Symbol> knownSymbols = new Dictionary<string, Symbol>()
|
---|
41 | {
|
---|
42 | {"+", new Addition()},
|
---|
43 | {"/", new Division()},
|
---|
44 | {"*", new Multiplication()},
|
---|
45 | {"-", new Subtraction()},
|
---|
46 | {"ABS", new Absolute() },
|
---|
47 | {"EXP", new Exponential()},
|
---|
48 | {"LOG", new Logarithm()},
|
---|
49 | {"POW", new Power()},
|
---|
50 | {"ROOT", new Root()},
|
---|
51 | {"SQR", new Square()},
|
---|
52 | {"SQRT", new SquareRoot()},
|
---|
53 | {"CUBE", new Cube()},
|
---|
54 | {"CUBEROOT", new CubeRoot()},
|
---|
55 | {"SIN",new Sine()},
|
---|
56 | {"COS", new Cosine()},
|
---|
57 | {"TAN", new Tangent()},
|
---|
58 | {"TANH", new HyperbolicTangent ()},
|
---|
59 | {"AIRYA", new AiryA()},
|
---|
60 | {"AIRYB", new AiryB()},
|
---|
61 | {"BESSEL", new Bessel()},
|
---|
62 | {"COSINT", new CosineIntegral()},
|
---|
63 | {"SININT", new SineIntegral()},
|
---|
64 | {"HYPCOSINT", new HyperbolicCosineIntegral()},
|
---|
65 | {"HYPSININT", new HyperbolicSineIntegral()},
|
---|
66 | {"FRESNELSININT", new FresnelSineIntegral()},
|
---|
67 | {"FRESNELCOSINT", new FresnelCosineIntegral()},
|
---|
68 | {"NORM", new Norm()},
|
---|
69 | {"ERF", new Erf()},
|
---|
70 | {"GAMMA", new Gamma()},
|
---|
71 | {"PSI", new Psi()},
|
---|
72 | {"DAWSON", new Dawson()},
|
---|
73 | {"EXPINT", new ExponentialIntegralEi()},
|
---|
74 | {"AQ", new AnalyticQuotient() },
|
---|
75 | //{"MEAN", new Average()},
|
---|
76 | {"IF", new IfThenElse()},
|
---|
77 | {">", new GreaterThan()},
|
---|
78 | {"<", new LessThan()},
|
---|
79 | {"AND", new And()},
|
---|
80 | {"OR", new Or()},
|
---|
81 | {"NOT", new Not()},
|
---|
82 | {"XOR", new Xor()},
|
---|
83 | {"DIFF", new Derivative()},
|
---|
84 | {"PROG", new ProgramRootSymbol()},
|
---|
85 | {"MAIN", new StartSymbol()},
|
---|
86 | {"FACTOR", new FactorVariable() },
|
---|
87 | {"BINFACTOR", new BinaryFactorVariable()},
|
---|
88 | {"SUM", new Sum() },
|
---|
89 | {"MEAN", new Mean()},
|
---|
90 | {"LENGTH", new Length()},
|
---|
91 | {"STDEV", new StandardDeviation()},
|
---|
92 | {"VAR", new Variance()}
|
---|
93 | };
|
---|
94 |
|
---|
95 | Constant constant = new Constant();
|
---|
96 | Variable variable = new Variable() { VariableDataType = typeof(double) };
|
---|
97 | LaggedVariable laggedVariable = new LaggedVariable();
|
---|
98 | Variable vectorVariable = new Variable() { VariableDataType = typeof(DoubleVector) };
|
---|
99 | Defun defun = new Defun();
|
---|
100 | TimeLag timeLag = new TimeLag();
|
---|
101 | Integral integral = new Integral();
|
---|
102 | FactorVariable factorVar = new FactorVariable();
|
---|
103 | BinaryFactorVariable binFactorVar = new BinaryFactorVariable();
|
---|
104 |
|
---|
105 | ProgramRootSymbol programRootSymbol = new ProgramRootSymbol();
|
---|
106 | StartSymbol startSymbol = new StartSymbol();
|
---|
107 |
|
---|
108 | public ISymbolicExpressionTree Import(string str) {
|
---|
109 | str = str.Replace("(", " ( ").Replace(")", " ) ");
|
---|
110 | ISymbolicExpressionTreeNode root = programRootSymbol.CreateTreeNode();
|
---|
111 | ISymbolicExpressionTreeNode start = startSymbol.CreateTreeNode();
|
---|
112 | ISymbolicExpressionTreeNode mainBranch = ParseSexp(new Queue<Token>(GetTokenStream(str)));
|
---|
113 | if (mainBranch.Symbol is ProgramRootSymbol) {
|
---|
114 | // when a root symbol was parsed => use main branch as root
|
---|
115 | root = mainBranch;
|
---|
116 | } else {
|
---|
117 | // only a main branch was given => insert the main branch into the default tree template
|
---|
118 | root.AddSubtree(start);
|
---|
119 | start.AddSubtree(mainBranch);
|
---|
120 | }
|
---|
121 | return new SymbolicExpressionTree(root);
|
---|
122 | }
|
---|
123 |
|
---|
124 | private IEnumerable<Token> GetTokenStream(string str) {
|
---|
125 | return
|
---|
126 | from strToken in str.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).AsEnumerable()
|
---|
127 | let t = Token.Parse(strToken)
|
---|
128 | where t != null
|
---|
129 | select t;
|
---|
130 | }
|
---|
131 |
|
---|
132 | private ISymbolicExpressionTreeNode ParseSexp(Queue<Token> tokens) {
|
---|
133 | if (tokens.Peek().Symbol == TokenSymbol.LPAR) {
|
---|
134 | ISymbolicExpressionTreeNode tree;
|
---|
135 | Expect(Token.LPAR, tokens);
|
---|
136 | if (tokens.Peek().StringValue.StartsWith(VARSTART)) {
|
---|
137 | tree = ParseVariable(tokens);
|
---|
138 | } else if (tokens.Peek().StringValue.StartsWith(LAGGEDVARSTART)) {
|
---|
139 | tree = ParseLaggedVariable(tokens);
|
---|
140 | } else if (tokens.Peek().StringValue.StartsWith(VECVARSTART)) {
|
---|
141 | tree = ParseVectorVariable(tokens);
|
---|
142 | } else if (tokens.Peek().StringValue.StartsWith(TIMELAGSTART)) {
|
---|
143 | tree = ParseTimeLag(tokens);
|
---|
144 | tree.AddSubtree(ParseSexp(tokens));
|
---|
145 | } else if (tokens.Peek().StringValue.StartsWith(INTEGRALSTART)) {
|
---|
146 | tree = ParseIntegral(tokens);
|
---|
147 | tree.AddSubtree(ParseSexp(tokens));
|
---|
148 | } else if (tokens.Peek().StringValue.StartsWith(DEFUNSTART)) {
|
---|
149 | tree = ParseDefun(tokens);
|
---|
150 | while (!tokens.Peek().Equals(Token.RPAR)) {
|
---|
151 | tree.AddSubtree(ParseSexp(tokens));
|
---|
152 | }
|
---|
153 | } else if (tokens.Peek().StringValue.StartsWith(ARGSTART)) {
|
---|
154 | tree = ParseArgument(tokens);
|
---|
155 | } else if (tokens.Peek().StringValue.StartsWith(INVOKESTART)) {
|
---|
156 | tree = ParseInvoke(tokens);
|
---|
157 | while (!tokens.Peek().Equals(Token.RPAR)) {
|
---|
158 | tree.AddSubtree(ParseSexp(tokens));
|
---|
159 | }
|
---|
160 | } else if (tokens.Peek().StringValue.StartsWith("FACTOR")) {
|
---|
161 | tree = ParseFactor(tokens);
|
---|
162 | } else if (tokens.Peek().StringValue.StartsWith("BINFACTOR")) {
|
---|
163 | tree = ParseBinaryFactor(tokens);
|
---|
164 | } else {
|
---|
165 | Token curToken = tokens.Dequeue();
|
---|
166 | tree = CreateTree(curToken);
|
---|
167 | while (!tokens.Peek().Equals(Token.RPAR)) {
|
---|
168 | tree.AddSubtree(ParseSexp(tokens));
|
---|
169 | }
|
---|
170 | }
|
---|
171 | Expect(Token.RPAR, tokens);
|
---|
172 | return tree;
|
---|
173 | } else if (tokens.Peek().Symbol == TokenSymbol.NUMBER) {
|
---|
174 | ConstantTreeNode t = (ConstantTreeNode)constant.CreateTreeNode();
|
---|
175 | t.Value = tokens.Dequeue().DoubleValue;
|
---|
176 | return t;
|
---|
177 | } else throw new FormatException("Expected function or constant symbol");
|
---|
178 | }
|
---|
179 |
|
---|
180 | private ISymbolicExpressionTreeNode ParseInvoke(Queue<Token> tokens) {
|
---|
181 | Token invokeTok = tokens.Dequeue();
|
---|
182 | Debug.Assert(invokeTok.StringValue == "CALL");
|
---|
183 | InvokeFunction invokeSym = new InvokeFunction(tokens.Dequeue().StringValue);
|
---|
184 | ISymbolicExpressionTreeNode invokeNode = invokeSym.CreateTreeNode();
|
---|
185 | return invokeNode;
|
---|
186 | }
|
---|
187 |
|
---|
188 | private ISymbolicExpressionTreeNode ParseArgument(Queue<Token> tokens) {
|
---|
189 | Token argTok = tokens.Dequeue();
|
---|
190 | Debug.Assert(argTok.StringValue == "ARG");
|
---|
191 | Argument argument = new Argument((int)tokens.Dequeue().DoubleValue);
|
---|
192 | ISymbolicExpressionTreeNode argNode = argument.CreateTreeNode();
|
---|
193 | return argNode;
|
---|
194 | }
|
---|
195 |
|
---|
196 | private ISymbolicExpressionTreeNode ParseDefun(Queue<Token> tokens) {
|
---|
197 | Token defTok = tokens.Dequeue();
|
---|
198 | Debug.Assert(defTok.StringValue == "DEFUN");
|
---|
199 | DefunTreeNode t = (DefunTreeNode)defun.CreateTreeNode();
|
---|
200 | t.FunctionName = tokens.Dequeue().StringValue;
|
---|
201 | return t;
|
---|
202 | }
|
---|
203 |
|
---|
204 | private ISymbolicExpressionTreeNode ParseTimeLag(Queue<Token> tokens) {
|
---|
205 | Token varTok = tokens.Dequeue();
|
---|
206 | Debug.Assert(varTok.StringValue == "LAG");
|
---|
207 | LaggedTreeNode t = (LaggedTreeNode)timeLag.CreateTreeNode();
|
---|
208 | t.Lag = (int)tokens.Dequeue().DoubleValue;
|
---|
209 | return t;
|
---|
210 | }
|
---|
211 |
|
---|
212 | private ISymbolicExpressionTreeNode ParseIntegral(Queue<Token> tokens) {
|
---|
213 | Token varTok = tokens.Dequeue();
|
---|
214 | Debug.Assert(varTok.StringValue == "INTEGRAL");
|
---|
215 | LaggedTreeNode t = (LaggedTreeNode)integral.CreateTreeNode();
|
---|
216 | t.Lag = (int)tokens.Dequeue().DoubleValue;
|
---|
217 | return t;
|
---|
218 | }
|
---|
219 |
|
---|
220 | private ISymbolicExpressionTreeNode ParseVariable(Queue<Token> tokens) {
|
---|
221 | Token varTok = tokens.Dequeue();
|
---|
222 | Debug.Assert(varTok.StringValue == "VARIABLE");
|
---|
223 | VariableTreeNode t = (VariableTreeNode)variable.CreateTreeNode();
|
---|
224 | t.Weight = tokens.Dequeue().DoubleValue;
|
---|
225 | t.VariableName = tokens.Dequeue().StringValue;
|
---|
226 | return t;
|
---|
227 | }
|
---|
228 |
|
---|
229 | private ISymbolicExpressionTreeNode ParseVectorVariable(Queue<Token> tokens) {
|
---|
230 | Token varTok = tokens.Dequeue();
|
---|
231 | Debug.Assert(varTok.StringValue == "VECTORVARIABLE");
|
---|
232 | VariableTreeNode t = (VariableTreeNode)vectorVariable.CreateTreeNode();
|
---|
233 | t.Weight = tokens.Dequeue().DoubleValue;
|
---|
234 | t.VariableName = tokens.Dequeue().StringValue;
|
---|
235 | return t;
|
---|
236 | }
|
---|
237 |
|
---|
238 | private ISymbolicExpressionTreeNode ParseFactor(Queue<Token> tokens) {
|
---|
239 | Token tok = tokens.Dequeue();
|
---|
240 | Debug.Assert(tok.StringValue == "FACTOR");
|
---|
241 | FactorVariableTreeNode t = (FactorVariableTreeNode)(new FactorVariable()).CreateTreeNode(); // create a new symbol each time on purpose
|
---|
242 | var varNameTok = tokens.Dequeue();
|
---|
243 | Debug.Assert(tok.Symbol == TokenSymbol.SYMB);
|
---|
244 | t.VariableName = varNameTok.StringValue;
|
---|
245 |
|
---|
246 | var weights = new List<double>();
|
---|
247 | while (tokens.Peek().Symbol == TokenSymbol.NUMBER) {
|
---|
248 | weights.Add(tokens.Dequeue().DoubleValue);
|
---|
249 | }
|
---|
250 |
|
---|
251 | t.Weights = weights.ToArray();
|
---|
252 |
|
---|
253 | // create a set of (virtual) values to match the number of weights
|
---|
254 | t.Symbol.VariableNames = new string[] { t.VariableName };
|
---|
255 | t.Symbol.VariableValues = new[]
|
---|
256 | { new KeyValuePair<string, Dictionary<string,int>>(
|
---|
257 | t.VariableName,
|
---|
258 | weights.Select((_, i) => Tuple.Create(_,i)).ToDictionary(tup=>"X" + tup.Item2, tup=>tup.Item2)) };
|
---|
259 | return t;
|
---|
260 | }
|
---|
261 |
|
---|
262 | private ISymbolicExpressionTreeNode ParseBinaryFactor(Queue<Token> tokens) {
|
---|
263 | Token tok = tokens.Dequeue();
|
---|
264 | Debug.Assert(tok.StringValue == "BINFACTOR");
|
---|
265 | var t = (BinaryFactorVariableTreeNode)binFactorVar.CreateTreeNode();
|
---|
266 | var varNameTok = tokens.Dequeue();
|
---|
267 | Debug.Assert(varNameTok.Symbol == TokenSymbol.SYMB);
|
---|
268 | t.VariableName = varNameTok.StringValue;
|
---|
269 |
|
---|
270 | var varValTok = tokens.Dequeue();
|
---|
271 | Debug.Assert(varValTok.Symbol == TokenSymbol.SYMB);
|
---|
272 | t.VariableValue = varValTok.StringValue;
|
---|
273 |
|
---|
274 | var weightTok = tokens.Dequeue();
|
---|
275 | Debug.Assert(weightTok.Symbol == TokenSymbol.NUMBER);
|
---|
276 | t.Weight = weightTok.DoubleValue;
|
---|
277 |
|
---|
278 | return t;
|
---|
279 | }
|
---|
280 |
|
---|
281 |
|
---|
282 | private ISymbolicExpressionTreeNode ParseLaggedVariable(Queue<Token> tokens) {
|
---|
283 | Token varTok = tokens.Dequeue();
|
---|
284 | Debug.Assert(varTok.StringValue == "LAGVARIABLE");
|
---|
285 | LaggedVariableTreeNode t = (LaggedVariableTreeNode)laggedVariable.CreateTreeNode();
|
---|
286 | t.Weight = tokens.Dequeue().DoubleValue;
|
---|
287 | t.VariableName = tokens.Dequeue().StringValue;
|
---|
288 | t.Lag = (int)tokens.Dequeue().DoubleValue;
|
---|
289 | return t;
|
---|
290 | }
|
---|
291 |
|
---|
292 | private ISymbolicExpressionTreeNode CreateTree(Token token) {
|
---|
293 | if (token.Symbol != TokenSymbol.SYMB) throw new FormatException("Expected function symbol, but got: " + token.StringValue);
|
---|
294 | return knownSymbols[token.StringValue].CreateTreeNode();
|
---|
295 | }
|
---|
296 |
|
---|
297 | private void Expect(Token token, Queue<Token> tokens) {
|
---|
298 | Token cur = tokens.Dequeue();
|
---|
299 | if (!token.Equals(cur)) throw new FormatException("Expected: " + token.StringValue + ", but got: " + cur.StringValue);
|
---|
300 | }
|
---|
301 | }
|
---|
302 | }
|
---|