[5574] | 1 | #region License Information
|
---|
| 2 | /* HeuristicLab
|
---|
[17181] | 3 | * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
[5574] | 4 | *
|
---|
| 5 | * This file is part of HeuristicLab.
|
---|
| 6 | *
|
---|
| 7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
| 8 | * it under the terms of the GNU General Public License as published by
|
---|
| 9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
| 10 | * (at your option) any later version.
|
---|
| 11 | *
|
---|
| 12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 15 | * GNU General Public License for more details.
|
---|
| 16 | *
|
---|
| 17 | * You should have received a copy of the GNU General Public License
|
---|
| 18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 19 | */
|
---|
| 20 | #endregion
|
---|
| 21 |
|
---|
| 22 | using System;
|
---|
| 23 | using System.Collections.Generic;
|
---|
| 24 | using System.Diagnostics;
|
---|
| 25 | using System.Linq;
|
---|
| 26 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
| 27 |
|
---|
[11457] | 28 | namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
|
---|
| 29 | public class SymbolicExpressionImporter {
|
---|
[5574] | 30 | private const string VARSTART = "VAR";
|
---|
[6809] | 31 | private const string LAGGEDVARSTART = "LAGVARIABLE";
|
---|
| 32 | private const string INTEGRALSTART = "INTEG";
|
---|
[5574] | 33 | private const string DEFUNSTART = "DEFUN";
|
---|
| 34 | private const string ARGSTART = "ARG";
|
---|
| 35 | private const string INVOKESTART = "CALL";
|
---|
[6809] | 36 | private const string TIMELAGSTART = "LAG";
|
---|
[15131] | 37 | private Dictionary<string, Symbol> knownSymbols = new Dictionary<string, Symbol>()
|
---|
[5574] | 38 | {
|
---|
| 39 | {"+", new Addition()},
|
---|
| 40 | {"/", new Division()},
|
---|
| 41 | {"*", new Multiplication()},
|
---|
| 42 | {"-", new Subtraction()},
|
---|
[17072] | 43 | {"ABS", new Absolute() },
|
---|
[5574] | 44 | {"EXP", new Exponential()},
|
---|
| 45 | {"LOG", new Logarithm()},
|
---|
| 46 | {"POW", new Power()},
|
---|
| 47 | {"ROOT", new Root()},
|
---|
[15131] | 48 | {"SQR", new Square()},
|
---|
| 49 | {"SQRT", new SquareRoot()},
|
---|
[17072] | 50 | {"CUBE", new Cube()},
|
---|
| 51 | {"CUBEROOT", new CubeRoot()},
|
---|
[5574] | 52 | {"SIN",new Sine()},
|
---|
| 53 | {"COS", new Cosine()},
|
---|
| 54 | {"TAN", new Tangent()},
|
---|
[17101] | 55 | {"TANH", new HyperbolicTangent ()},
|
---|
[8123] | 56 | {"AIRYA", new AiryA()},
|
---|
| 57 | {"AIRYB", new AiryB()},
|
---|
| 58 | {"BESSEL", new Bessel()},
|
---|
| 59 | {"COSINT", new CosineIntegral()},
|
---|
| 60 | {"SININT", new SineIntegral()},
|
---|
| 61 | {"HYPCOSINT", new HyperbolicCosineIntegral()},
|
---|
| 62 | {"HYPSININT", new HyperbolicSineIntegral()},
|
---|
| 63 | {"FRESNELSININT", new FresnelSineIntegral()},
|
---|
| 64 | {"FRESNELCOSINT", new FresnelCosineIntegral()},
|
---|
| 65 | {"NORM", new Norm()},
|
---|
| 66 | {"ERF", new Erf()},
|
---|
| 67 | {"GAMMA", new Gamma()},
|
---|
| 68 | {"PSI", new Psi()},
|
---|
| 69 | {"DAWSON", new Dawson()},
|
---|
| 70 | {"EXPINT", new ExponentialIntegralEi()},
|
---|
[17072] | 71 | {"AQ", new AnalyticQuotient() },
|
---|
[5574] | 72 | {"MEAN", new Average()},
|
---|
| 73 | {"IF", new IfThenElse()},
|
---|
| 74 | {">", new GreaterThan()},
|
---|
| 75 | {"<", new LessThan()},
|
---|
| 76 | {"AND", new And()},
|
---|
| 77 | {"OR", new Or()},
|
---|
| 78 | {"NOT", new Not()},
|
---|
[10790] | 79 | {"XOR", new Xor()},
|
---|
[6809] | 80 | {"DIFF", new Derivative()},
|
---|
[5574] | 81 | {"PROG", new ProgramRootSymbol()},
|
---|
| 82 | {"MAIN", new StartSymbol()},
|
---|
[15131] | 83 | {"FACTOR", new FactorVariable() },
|
---|
| 84 | {"BINFACTOR", new BinaryFactorVariable()}
|
---|
[5574] | 85 | };
|
---|
| 86 |
|
---|
| 87 | Constant constant = new Constant();
|
---|
| 88 | Variable variable = new Variable();
|
---|
[6769] | 89 | LaggedVariable laggedVariable = new LaggedVariable();
|
---|
[5574] | 90 | Defun defun = new Defun();
|
---|
[6809] | 91 | TimeLag timeLag = new TimeLag();
|
---|
| 92 | Integral integral = new Integral();
|
---|
[15131] | 93 | FactorVariable factorVar = new FactorVariable();
|
---|
| 94 | BinaryFactorVariable binFactorVar = new BinaryFactorVariable();
|
---|
[5574] | 95 |
|
---|
| 96 | ProgramRootSymbol programRootSymbol = new ProgramRootSymbol();
|
---|
| 97 | StartSymbol startSymbol = new StartSymbol();
|
---|
| 98 |
|
---|
[11457] | 99 | public ISymbolicExpressionTree Import(string str) {
|
---|
[5574] | 100 | str = str.Replace("(", " ( ").Replace(")", " ) ");
|
---|
| 101 | ISymbolicExpressionTreeNode root = programRootSymbol.CreateTreeNode();
|
---|
| 102 | ISymbolicExpressionTreeNode start = startSymbol.CreateTreeNode();
|
---|
| 103 | ISymbolicExpressionTreeNode mainBranch = ParseSexp(new Queue<Token>(GetTokenStream(str)));
|
---|
| 104 | if (mainBranch.Symbol is ProgramRootSymbol) {
|
---|
| 105 | // when a root symbol was parsed => use main branch as root
|
---|
| 106 | root = mainBranch;
|
---|
| 107 | } else {
|
---|
| 108 | // only a main branch was given => insert the main branch into the default tree template
|
---|
[5733] | 109 | root.AddSubtree(start);
|
---|
| 110 | start.AddSubtree(mainBranch);
|
---|
[5574] | 111 | }
|
---|
| 112 | return new SymbolicExpressionTree(root);
|
---|
| 113 | }
|
---|
| 114 |
|
---|
| 115 | private IEnumerable<Token> GetTokenStream(string str) {
|
---|
| 116 | return
|
---|
| 117 | from strToken in str.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).AsEnumerable()
|
---|
| 118 | let t = Token.Parse(strToken)
|
---|
| 119 | where t != null
|
---|
| 120 | select t;
|
---|
| 121 | }
|
---|
| 122 |
|
---|
| 123 | private ISymbolicExpressionTreeNode ParseSexp(Queue<Token> tokens) {
|
---|
| 124 | if (tokens.Peek().Symbol == TokenSymbol.LPAR) {
|
---|
| 125 | ISymbolicExpressionTreeNode tree;
|
---|
| 126 | Expect(Token.LPAR, tokens);
|
---|
| 127 | if (tokens.Peek().StringValue.StartsWith(VARSTART)) {
|
---|
| 128 | tree = ParseVariable(tokens);
|
---|
[6769] | 129 | } else if (tokens.Peek().StringValue.StartsWith(LAGGEDVARSTART)) {
|
---|
| 130 | tree = ParseLaggedVariable(tokens);
|
---|
[6809] | 131 | } else if (tokens.Peek().StringValue.StartsWith(TIMELAGSTART)) {
|
---|
| 132 | tree = ParseTimeLag(tokens);
|
---|
| 133 | tree.AddSubtree(ParseSexp(tokens));
|
---|
| 134 | } else if (tokens.Peek().StringValue.StartsWith(INTEGRALSTART)) {
|
---|
| 135 | tree = ParseIntegral(tokens);
|
---|
| 136 | tree.AddSubtree(ParseSexp(tokens));
|
---|
[5574] | 137 | } else if (tokens.Peek().StringValue.StartsWith(DEFUNSTART)) {
|
---|
| 138 | tree = ParseDefun(tokens);
|
---|
| 139 | while (!tokens.Peek().Equals(Token.RPAR)) {
|
---|
[5733] | 140 | tree.AddSubtree(ParseSexp(tokens));
|
---|
[5574] | 141 | }
|
---|
| 142 | } else if (tokens.Peek().StringValue.StartsWith(ARGSTART)) {
|
---|
| 143 | tree = ParseArgument(tokens);
|
---|
| 144 | } else if (tokens.Peek().StringValue.StartsWith(INVOKESTART)) {
|
---|
| 145 | tree = ParseInvoke(tokens);
|
---|
| 146 | while (!tokens.Peek().Equals(Token.RPAR)) {
|
---|
[5733] | 147 | tree.AddSubtree(ParseSexp(tokens));
|
---|
[5574] | 148 | }
|
---|
[15131] | 149 | } else if (tokens.Peek().StringValue.StartsWith("FACTOR")) {
|
---|
| 150 | tree = ParseFactor(tokens);
|
---|
| 151 | } else if (tokens.Peek().StringValue.StartsWith("BINFACTOR")) {
|
---|
| 152 | tree = ParseBinaryFactor(tokens);
|
---|
[5574] | 153 | } else {
|
---|
| 154 | Token curToken = tokens.Dequeue();
|
---|
| 155 | tree = CreateTree(curToken);
|
---|
| 156 | while (!tokens.Peek().Equals(Token.RPAR)) {
|
---|
[5733] | 157 | tree.AddSubtree(ParseSexp(tokens));
|
---|
[5574] | 158 | }
|
---|
| 159 | }
|
---|
| 160 | Expect(Token.RPAR, tokens);
|
---|
| 161 | return tree;
|
---|
| 162 | } else if (tokens.Peek().Symbol == TokenSymbol.NUMBER) {
|
---|
| 163 | ConstantTreeNode t = (ConstantTreeNode)constant.CreateTreeNode();
|
---|
| 164 | t.Value = tokens.Dequeue().DoubleValue;
|
---|
| 165 | return t;
|
---|
| 166 | } else throw new FormatException("Expected function or constant symbol");
|
---|
| 167 | }
|
---|
| 168 |
|
---|
| 169 | private ISymbolicExpressionTreeNode ParseInvoke(Queue<Token> tokens) {
|
---|
| 170 | Token invokeTok = tokens.Dequeue();
|
---|
| 171 | Debug.Assert(invokeTok.StringValue == "CALL");
|
---|
| 172 | InvokeFunction invokeSym = new InvokeFunction(tokens.Dequeue().StringValue);
|
---|
| 173 | ISymbolicExpressionTreeNode invokeNode = invokeSym.CreateTreeNode();
|
---|
| 174 | return invokeNode;
|
---|
| 175 | }
|
---|
| 176 |
|
---|
| 177 | private ISymbolicExpressionTreeNode ParseArgument(Queue<Token> tokens) {
|
---|
| 178 | Token argTok = tokens.Dequeue();
|
---|
| 179 | Debug.Assert(argTok.StringValue == "ARG");
|
---|
| 180 | Argument argument = new Argument((int)tokens.Dequeue().DoubleValue);
|
---|
| 181 | ISymbolicExpressionTreeNode argNode = argument.CreateTreeNode();
|
---|
| 182 | return argNode;
|
---|
| 183 | }
|
---|
| 184 |
|
---|
| 185 | private ISymbolicExpressionTreeNode ParseDefun(Queue<Token> tokens) {
|
---|
| 186 | Token defTok = tokens.Dequeue();
|
---|
| 187 | Debug.Assert(defTok.StringValue == "DEFUN");
|
---|
| 188 | DefunTreeNode t = (DefunTreeNode)defun.CreateTreeNode();
|
---|
| 189 | t.FunctionName = tokens.Dequeue().StringValue;
|
---|
| 190 | return t;
|
---|
| 191 | }
|
---|
| 192 |
|
---|
[6809] | 193 | private ISymbolicExpressionTreeNode ParseTimeLag(Queue<Token> tokens) {
|
---|
| 194 | Token varTok = tokens.Dequeue();
|
---|
| 195 | Debug.Assert(varTok.StringValue == "LAG");
|
---|
| 196 | LaggedTreeNode t = (LaggedTreeNode)timeLag.CreateTreeNode();
|
---|
| 197 | t.Lag = (int)tokens.Dequeue().DoubleValue;
|
---|
| 198 | return t;
|
---|
| 199 | }
|
---|
| 200 |
|
---|
| 201 | private ISymbolicExpressionTreeNode ParseIntegral(Queue<Token> tokens) {
|
---|
| 202 | Token varTok = tokens.Dequeue();
|
---|
| 203 | Debug.Assert(varTok.StringValue == "INTEGRAL");
|
---|
| 204 | LaggedTreeNode t = (LaggedTreeNode)integral.CreateTreeNode();
|
---|
| 205 | t.Lag = (int)tokens.Dequeue().DoubleValue;
|
---|
| 206 | return t;
|
---|
| 207 | }
|
---|
| 208 |
|
---|
[5574] | 209 | private ISymbolicExpressionTreeNode ParseVariable(Queue<Token> tokens) {
|
---|
| 210 | Token varTok = tokens.Dequeue();
|
---|
| 211 | Debug.Assert(varTok.StringValue == "VARIABLE");
|
---|
| 212 | VariableTreeNode t = (VariableTreeNode)variable.CreateTreeNode();
|
---|
| 213 | t.Weight = tokens.Dequeue().DoubleValue;
|
---|
| 214 | t.VariableName = tokens.Dequeue().StringValue;
|
---|
| 215 | return t;
|
---|
| 216 | }
|
---|
| 217 |
|
---|
[15131] | 218 | private ISymbolicExpressionTreeNode ParseFactor(Queue<Token> tokens) {
|
---|
| 219 | Token tok = tokens.Dequeue();
|
---|
| 220 | Debug.Assert(tok.StringValue == "FACTOR");
|
---|
| 221 | FactorVariableTreeNode t = (FactorVariableTreeNode)(new FactorVariable()).CreateTreeNode(); // create a new symbol each time on purpose
|
---|
| 222 | var varNameTok = tokens.Dequeue();
|
---|
| 223 | Debug.Assert(tok.Symbol == TokenSymbol.SYMB);
|
---|
| 224 | t.VariableName = varNameTok.StringValue;
|
---|
| 225 |
|
---|
| 226 | var weights = new List<double>();
|
---|
| 227 | while (tokens.Peek().Symbol == TokenSymbol.NUMBER) {
|
---|
| 228 | weights.Add(tokens.Dequeue().DoubleValue);
|
---|
| 229 | }
|
---|
| 230 |
|
---|
| 231 | t.Weights = weights.ToArray();
|
---|
| 232 |
|
---|
| 233 | // create a set of (virtual) values to match the number of weights
|
---|
| 234 | t.Symbol.VariableNames = new string[] { t.VariableName };
|
---|
| 235 | t.Symbol.VariableValues = new[]
|
---|
| 236 | { new KeyValuePair<string, Dictionary<string,int>>(
|
---|
| 237 | t.VariableName,
|
---|
| 238 | weights.Select((_, i) => Tuple.Create(_,i)).ToDictionary(tup=>"X" + tup.Item2, tup=>tup.Item2)) };
|
---|
| 239 | return t;
|
---|
| 240 | }
|
---|
| 241 |
|
---|
| 242 | private ISymbolicExpressionTreeNode ParseBinaryFactor(Queue<Token> tokens) {
|
---|
| 243 | Token tok = tokens.Dequeue();
|
---|
| 244 | Debug.Assert(tok.StringValue == "BINFACTOR");
|
---|
| 245 | var t = (BinaryFactorVariableTreeNode)binFactorVar.CreateTreeNode();
|
---|
| 246 | var varNameTok = tokens.Dequeue();
|
---|
| 247 | Debug.Assert(varNameTok.Symbol == TokenSymbol.SYMB);
|
---|
| 248 | t.VariableName = varNameTok.StringValue;
|
---|
| 249 |
|
---|
| 250 | var varValTok = tokens.Dequeue();
|
---|
| 251 | Debug.Assert(varValTok.Symbol == TokenSymbol.SYMB);
|
---|
| 252 | t.VariableValue = varValTok.StringValue;
|
---|
| 253 |
|
---|
| 254 | var weightTok = tokens.Dequeue();
|
---|
| 255 | Debug.Assert(weightTok.Symbol == TokenSymbol.NUMBER);
|
---|
| 256 | t.Weight = weightTok.DoubleValue;
|
---|
| 257 |
|
---|
| 258 | return t;
|
---|
| 259 | }
|
---|
| 260 |
|
---|
| 261 |
|
---|
[6769] | 262 | private ISymbolicExpressionTreeNode ParseLaggedVariable(Queue<Token> tokens) {
|
---|
| 263 | Token varTok = tokens.Dequeue();
|
---|
| 264 | Debug.Assert(varTok.StringValue == "LAGVARIABLE");
|
---|
| 265 | LaggedVariableTreeNode t = (LaggedVariableTreeNode)laggedVariable.CreateTreeNode();
|
---|
| 266 | t.Weight = tokens.Dequeue().DoubleValue;
|
---|
| 267 | t.VariableName = tokens.Dequeue().StringValue;
|
---|
| 268 | t.Lag = (int)tokens.Dequeue().DoubleValue;
|
---|
| 269 | return t;
|
---|
| 270 | }
|
---|
| 271 |
|
---|
[5574] | 272 | private ISymbolicExpressionTreeNode CreateTree(Token token) {
|
---|
| 273 | if (token.Symbol != TokenSymbol.SYMB) throw new FormatException("Expected function symbol, but got: " + token.StringValue);
|
---|
| 274 | return knownSymbols[token.StringValue].CreateTreeNode();
|
---|
| 275 | }
|
---|
| 276 |
|
---|
| 277 | private void Expect(Token token, Queue<Token> tokens) {
|
---|
| 278 | Token cur = tokens.Dequeue();
|
---|
| 279 | if (!token.Equals(cur)) throw new FormatException("Expected: " + token.StringValue + ", but got: " + cur.StringValue);
|
---|
| 280 | }
|
---|
| 281 | }
|
---|
| 282 | }
|
---|