#region License Information
/* HeuristicLab
* Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
public class SymbolicExpressionImporter {
private const string VARSTART = "VAR";
private const string LAGGEDVARSTART = "LAGVARIABLE";
private const string INTEGRALSTART = "INTEG";
private const string DEFUNSTART = "DEFUN";
private const string ARGSTART = "ARG";
private const string INVOKESTART = "CALL";
private const string TIMELAGSTART = "LAG";
private Dictionary knownSymbols = new Dictionary()
{
{"+", new Addition()},
{"/", new Division()},
{"*", new Multiplication()},
{"-", new Subtraction()},
{"ABS", new Absolute() },
{"EXP", new Exponential()},
{"LOG", new Logarithm()},
{"POW", new Power()},
{"ROOT", new Root()},
{"SQR", new Square()},
{"SQRT", new SquareRoot()},
{"CUBE", new Cube()},
{"CUBEROOT", new CubeRoot()},
{"SIN",new Sine()},
{"COS", new Cosine()},
{"TAN", new Tangent()},
{"TANH", new HyperbolicTangent ()},
{"AIRYA", new AiryA()},
{"AIRYB", new AiryB()},
{"BESSEL", new Bessel()},
{"COSINT", new CosineIntegral()},
{"SININT", new SineIntegral()},
{"HYPCOSINT", new HyperbolicCosineIntegral()},
{"HYPSININT", new HyperbolicSineIntegral()},
{"FRESNELSININT", new FresnelSineIntegral()},
{"FRESNELCOSINT", new FresnelCosineIntegral()},
{"NORM", new Norm()},
{"ERF", new Erf()},
{"GAMMA", new Gamma()},
{"PSI", new Psi()},
{"DAWSON", new Dawson()},
{"EXPINT", new ExponentialIntegralEi()},
{"AQ", new AnalyticQuotient() },
{"MEAN", new Average()},
{"IF", new IfThenElse()},
{">", new GreaterThan()},
{"<", new LessThan()},
{"AND", new And()},
{"OR", new Or()},
{"NOT", new Not()},
{"XOR", new Xor()},
{"DIFF", new Derivative()},
{"PROG", new ProgramRootSymbol()},
{"MAIN", new StartSymbol()},
{"FACTOR", new FactorVariable() },
{"BINFACTOR", new BinaryFactorVariable()}
};
Number number = new Number();
Variable variable = new Variable();
LaggedVariable laggedVariable = new LaggedVariable();
Defun defun = new Defun();
TimeLag timeLag = new TimeLag();
Integral integral = new Integral();
BinaryFactorVariable binFactorVar = new BinaryFactorVariable();
ProgramRootSymbol programRootSymbol = new ProgramRootSymbol();
StartSymbol startSymbol = new StartSymbol();
public ISymbolicExpressionTree Import(string str) {
str = str.Replace("(", " ( ").Replace(")", " ) ")
.Replace("<", " < ").Replace(">", " > ")
.Replace("=", " = ");
ISymbolicExpressionTreeNode root = programRootSymbol.CreateTreeNode();
ISymbolicExpressionTreeNode start = startSymbol.CreateTreeNode();
ISymbolicExpressionTreeNode mainBranch = ParseSexp(new Queue(GetTokenStream(str)));
if (mainBranch.Symbol is ProgramRootSymbol) {
// when a root symbol was parsed => use main branch as root
root = mainBranch;
} else {
// only a main branch was given => insert the main branch into the default tree template
root.AddSubtree(start);
start.AddSubtree(mainBranch);
}
return new SymbolicExpressionTree(root);
}
private IEnumerable GetTokenStream(string str) {
return
from strToken in str.Split(new string[] { " " }, StringSplitOptions.RemoveEmptyEntries).AsEnumerable()
let t = Token.Parse(strToken)
where t != null
select t;
}
private ISymbolicExpressionTreeNode ParseSexp(Queue tokens) {
if (tokens.Peek().Symbol == TokenSymbol.LPAR) {
ISymbolicExpressionTreeNode tree;
Expect(Token.LPAR, tokens);
if (tokens.Peek().StringValue.StartsWith(VARSTART)) {
tree = ParseVariable(tokens);
} else if (tokens.Peek().StringValue.StartsWith(LAGGEDVARSTART)) {
tree = ParseLaggedVariable(tokens);
} else if (tokens.Peek().StringValue.StartsWith(TIMELAGSTART)) {
tree = ParseTimeLag(tokens);
tree.AddSubtree(ParseSexp(tokens));
} else if (tokens.Peek().StringValue.StartsWith(INTEGRALSTART)) {
tree = ParseIntegral(tokens);
tree.AddSubtree(ParseSexp(tokens));
} else if (tokens.Peek().StringValue.StartsWith(DEFUNSTART)) {
tree = ParseDefun(tokens);
while (!tokens.Peek().Equals(Token.RPAR)) {
tree.AddSubtree(ParseSexp(tokens));
}
} else if (tokens.Peek().StringValue.StartsWith(ARGSTART)) {
tree = ParseArgument(tokens);
} else if (tokens.Peek().StringValue.StartsWith(INVOKESTART)) {
tree = ParseInvoke(tokens);
while (!tokens.Peek().Equals(Token.RPAR)) {
tree.AddSubtree(ParseSexp(tokens));
}
} else if (tokens.Peek().StringValue.StartsWith("FACTOR")) {
tree = ParseFactor(tokens);
} else if (tokens.Peek().StringValue.StartsWith("BINFACTOR")) {
tree = ParseBinaryFactor(tokens);
} else {
Token curToken = tokens.Dequeue();
tree = CreateTree(curToken);
while (!tokens.Peek().Equals(Token.RPAR)) {
tree.AddSubtree(ParseSexp(tokens));
}
}
Expect(Token.RPAR, tokens);
return tree;
} else if (tokens.Peek().Symbol == TokenSymbol.CONSTANT) {
var value = tokens.Dequeue().DoubleValue;
var constant = new Constant() { Value = value };
return constant.CreateTreeNode();
} else if (tokens.Peek().Symbol == TokenSymbol.LBRACKET) {
Expect(Token.LBRACKET, tokens);
Expect(Token.NUM, tokens);
var t = (NumberTreeNode)number.CreateTreeNode();
if (tokens.Peek().Symbol == TokenSymbol.EQ) {
Expect(Token.EQ, tokens);
var initValToken = tokens.Dequeue();
if(initValToken.Symbol == TokenSymbol.CONSTANT) {
t.Value = initValToken.DoubleValue;
} else {
throw new FormatException("Expected a real value");
}
}
Expect(Token.RBRACKET, tokens);
return t;
} else throw new FormatException("Expected function or number symbol");
}
private ISymbolicExpressionTreeNode ParseInvoke(Queue tokens) {
Token invokeTok = tokens.Dequeue();
Debug.Assert(invokeTok.StringValue == "CALL");
InvokeFunction invokeSym = new InvokeFunction(tokens.Dequeue().StringValue);
ISymbolicExpressionTreeNode invokeNode = invokeSym.CreateTreeNode();
return invokeNode;
}
private ISymbolicExpressionTreeNode ParseArgument(Queue tokens) {
Token argTok = tokens.Dequeue();
Debug.Assert(argTok.StringValue == "ARG");
Argument argument = new Argument((int)tokens.Dequeue().DoubleValue);
ISymbolicExpressionTreeNode argNode = argument.CreateTreeNode();
return argNode;
}
private ISymbolicExpressionTreeNode ParseDefun(Queue tokens) {
Token defTok = tokens.Dequeue();
Debug.Assert(defTok.StringValue == "DEFUN");
DefunTreeNode t = (DefunTreeNode)defun.CreateTreeNode();
t.FunctionName = tokens.Dequeue().StringValue;
return t;
}
private ISymbolicExpressionTreeNode ParseTimeLag(Queue tokens) {
Token varTok = tokens.Dequeue();
Debug.Assert(varTok.StringValue == "LAG");
LaggedTreeNode t = (LaggedTreeNode)timeLag.CreateTreeNode();
t.Lag = (int)tokens.Dequeue().DoubleValue;
return t;
}
private ISymbolicExpressionTreeNode ParseIntegral(Queue tokens) {
Token varTok = tokens.Dequeue();
Debug.Assert(varTok.StringValue == "INTEGRAL");
LaggedTreeNode t = (LaggedTreeNode)integral.CreateTreeNode();
t.Lag = (int)tokens.Dequeue().DoubleValue;
return t;
}
private ISymbolicExpressionTreeNode ParseVariable(Queue tokens) {
Token varTok = tokens.Dequeue();
Debug.Assert(varTok.StringValue == "VARIABLE");
VariableTreeNode t = (VariableTreeNode)variable.CreateTreeNode();
t.Weight = tokens.Dequeue().DoubleValue;
t.VariableName = tokens.Dequeue().StringValue;
return t;
}
private ISymbolicExpressionTreeNode ParseFactor(Queue tokens) {
Token tok = tokens.Dequeue();
Debug.Assert(tok.StringValue == "FACTOR");
FactorVariableTreeNode t = (FactorVariableTreeNode)(new FactorVariable()).CreateTreeNode(); // create a new symbol each time on purpose
var varNameTok = tokens.Dequeue();
Debug.Assert(tok.Symbol == TokenSymbol.SYMB);
t.VariableName = varNameTok.StringValue;
var weights = new List();
while (tokens.Peek().Symbol == TokenSymbol.CONSTANT) {
weights.Add(tokens.Dequeue().DoubleValue);
}
t.Weights = weights.ToArray();
// create a set of (virtual) values to match the number of weights
t.Symbol.VariableNames = new string[] { t.VariableName };
t.Symbol.VariableValues = new[]
{ new KeyValuePair>(
t.VariableName,
weights.Select((_, i) => Tuple.Create(_,i)).ToDictionary(tup=>"X" + tup.Item2, tup=>tup.Item2)) };
return t;
}
private ISymbolicExpressionTreeNode ParseBinaryFactor(Queue tokens) {
Token tok = tokens.Dequeue();
Debug.Assert(tok.StringValue == "BINFACTOR");
var t = (BinaryFactorVariableTreeNode)binFactorVar.CreateTreeNode();
var varNameTok = tokens.Dequeue();
Debug.Assert(varNameTok.Symbol == TokenSymbol.SYMB);
t.VariableName = varNameTok.StringValue;
var varValTok = tokens.Dequeue();
Debug.Assert(varValTok.Symbol == TokenSymbol.SYMB);
t.VariableValue = varValTok.StringValue;
var weightTok = tokens.Dequeue();
Debug.Assert(weightTok.Symbol == TokenSymbol.CONSTANT);
t.Weight = weightTok.DoubleValue;
return t;
}
private ISymbolicExpressionTreeNode ParseLaggedVariable(Queue tokens) {
Token varTok = tokens.Dequeue();
Debug.Assert(varTok.StringValue == "LAGVARIABLE");
LaggedVariableTreeNode t = (LaggedVariableTreeNode)laggedVariable.CreateTreeNode();
t.Weight = tokens.Dequeue().DoubleValue;
t.VariableName = tokens.Dequeue().StringValue;
t.Lag = (int)tokens.Dequeue().DoubleValue;
return t;
}
private ISymbolicExpressionTreeNode CreateTree(Token token) {
if (token.Symbol != TokenSymbol.SYMB &&
token.Symbol != TokenSymbol.LBRACKET && // LBRACKET and RBRACKET are used for and as LT, GT operators
token.Symbol != TokenSymbol.RBRACKET
) throw new FormatException("Expected function symbol, but got: " + token.StringValue);
return knownSymbols[token.StringValue].CreateTreeNode();
}
private void Expect(Token token, Queue tokens) {
Token cur = tokens.Dequeue();
if (!token.Equals(cur)) throw new FormatException("Expected: " + token.StringValue + ", but got: " + cur.StringValue);
}
}
}