using System; using System.Collections.Generic; using System.Linq; using System.Text; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Parameters; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis.Symbolic; using HeuristicLab.Problems.DataAnalysis; using System.Threading; using HeuristicLab.Problems.TradeRules.Symbols; namespace HeuristicLab.Problems.TradeRules { [StorableClass] [Item("Interpreter", "Represents a grammar for Trading Problems")] public sealed class Interpreter : ParameterizedNamedItem, ITradeRulesExpresionTree { private const string CheckExpressionsWithIntervalArithmeticParameterName = "CheckExpressionsWithIntervalArithmetic"; private const string EvaluatedSolutionsParameterName = "EvaluatedSolutions"; private int initialTraining; private int initialTest; [ThreadStatic] private static Dictionary signalCache; [ThreadStatic] private static Dictionary firstEMACache; [ThreadStatic] private static Dictionary secondEMACache; [ThreadStatic] private static Dictionary RSIPositiveCache; [ThreadStatic] private static Dictionary RSINegativeCache; [ThreadStatic] private static Dictionary RSICache; [ThreadStatic] private static Dictionary RSIOutputCache; #region private classes //This class manipulate the instructions of the stack private class InterpreterState { private double[] argumentStack; private int argumentStackPointer; private Instruction[] code; private int pc; public int ProgramCounter { get { return pc; } set { pc = value; } } internal InterpreterState(Instruction[] code, int argumentStackSize) { this.code = code; this.pc = 0; if (argumentStackSize > 0) { this.argumentStack = new double[argumentStackSize]; } this.argumentStackPointer = 0; } internal void Reset() { this.pc = 0; this.argumentStackPointer = 0; } internal Instruction NextInstruction() { return code[pc++]; } private void Push(double val) { argumentStack[argumentStackPointer++] = val; } private double Pop() { return argumentStack[--argumentStackPointer]; } internal void CreateStackFrame(double[] argValues) { // push in reverse order to make indexing easier for (int i = argValues.Length - 1; i >= 0; i--) { argumentStack[argumentStackPointer++] = argValues[i]; } Push(argValues.Length); } internal void RemoveStackFrame() { int size = (int)Pop(); argumentStackPointer -= size; } internal double GetStackFrameValue(ushort index) { // layout of stack: // [0] <- argumentStackPointer // [StackFrameSize = N + 1] // [Arg0] <- argumentStackPointer - 2 - 0 // [Arg1] <- argumentStackPointer - 2 - 1 // [...] // [ArgN] <- argumentStackPointer - 2 - N // return argumentStack[argumentStackPointer - index - 2]; } } //Operation codes private class OpCodes { public const byte Add = 1; public const byte Sub = 2; public const byte Mul = 3; public const byte GT = 5; public const byte LT = 6; public const byte AND = 7; public const byte OR = 8; public const byte NOT = 9; public const byte BOOLEAN = 10; public const byte Average = 11; public const byte MACD = 12; public const byte Variable = 13; public const byte Constant = 14; public const byte ConstantInt = 16; public const byte BoolConstant = 15; public const byte Max = 17; public const byte Min = 18; public const byte Lag = 19; public const byte RSI = 20; } #endregion #region IStatefulItem public void InitializeState() { EvaluatedSolutions.Value = 0; } public void ClearState() { } #endregion private Dictionary symbolToOpcode = new Dictionary() { { typeof(Addition), OpCodes.Add }, { typeof(Subtraction), OpCodes.Sub }, { typeof(Multiplication), OpCodes.Mul }, { typeof(Constant), OpCodes.Constant }, { typeof(BoolConstant), OpCodes.BoolConstant }, { typeof(ConstantInt), OpCodes.ConstantInt }, { typeof(GreaterThan), OpCodes.GT }, { typeof(LessThan), OpCodes.LT }, { typeof(And), OpCodes.AND }, { typeof(Or), OpCodes.OR }, { typeof(Not), OpCodes.NOT}, { typeof(AverageTrade), OpCodes.Average}, { typeof(MACD), OpCodes.MACD}, { typeof(RSI), OpCodes.RSI}, { typeof(Max), OpCodes.Max}, { typeof(Min), OpCodes.Min}, { typeof(Lag), OpCodes.Lag}, { typeof(HeuristicLab.Problems.DataAnalysis.Symbolic.Variable), OpCodes.Variable }, }; public override bool CanChangeName { get { return false; } } public override bool CanChangeDescription { get { return false; } } #region parameter properties public IValueParameter CheckExpressionsWithIntervalArithmeticParameter { get { return (IValueParameter)Parameters[CheckExpressionsWithIntervalArithmeticParameterName]; } } public IValueParameter EvaluatedSolutionsParameter { get { return (IValueParameter)Parameters[EvaluatedSolutionsParameterName]; } } #endregion #region properties public BoolValue CheckExpressionsWithIntervalArithmetic { get { return CheckExpressionsWithIntervalArithmeticParameter.Value; } set { CheckExpressionsWithIntervalArithmeticParameter.Value = value; } } public IntValue EvaluatedSolutions { get { return EvaluatedSolutionsParameter.Value; } set { EvaluatedSolutionsParameter.Value = value; } } #endregion private double Evaluate(Dataset dataset, ref int row, InterpreterState state) { Instruction currentInstr = state.NextInstruction(); switch (currentInstr.opCode) { case OpCodes.Add: { double s = Evaluate(dataset, ref row, state); for (int i = 1; i < currentInstr.nArguments; i++) { s += Evaluate(dataset, ref row, state); } return s; } case OpCodes.Sub: { double s = Evaluate(dataset, ref row, state); for (int i = 1; i < currentInstr.nArguments; i++) { s -= Evaluate(dataset, ref row, state); } if (currentInstr.nArguments == 1) s = -s; return s; } case OpCodes.Mul: { double p = Evaluate(dataset, ref row, state); for (int i = 1; i < currentInstr.nArguments; i++) { p *= Evaluate(dataset, ref row, state); } return p; } case OpCodes.Average: { double sum = Evaluate(dataset, ref row, state); int integerValue = (int) Math.Floor(sum); if (integerValue > 100) integerValue = 100; if (row < integerValue) { string variableName = dataset.GetValue(row, 3); double inferiorValue = Convert.ToDouble(variableName); return inferiorValue/(row+1); } else { string variableName = dataset.GetValue(row, 3); double meanValue1 = Convert.ToDouble(variableName); string variableName2 = dataset.GetValue((row - integerValue), 3); double meanValue2 = Convert.ToDouble(variableName2); return (meanValue1 - meanValue2) / integerValue; } } case OpCodes.AND: { double result = Evaluate(dataset, ref row, state); for (int i = 1; i < currentInstr.nArguments; i++) { if (result > 0.0) result = Evaluate(dataset, ref row, state); else { SkipInstructions(state); } } return result > 0.0 ? 1.0 : -1.0; } case OpCodes.OR: { double result = Evaluate(dataset, ref row, state); for (int i = 1; i < currentInstr.nArguments; i++) { if (result <= 0.0) result = Evaluate(dataset, ref row, state); else { SkipInstructions(state); } } return result > 0.0 ? 1.0 : -1.0; } case OpCodes.NOT: { return Evaluate(dataset, ref row, state) > 0.0 ? -1.0 : 1.0; } case OpCodes.BOOLEAN: { var booleanTreeNode = currentInstr.dynamicNode as BoolConstantTreeNode; return booleanTreeNode.Value; } case OpCodes.GT: { double x = Evaluate(dataset, ref row, state); double y = Evaluate(dataset, ref row, state); if (x > y) return 1.0; else return -1.0; } case OpCodes.LT: { double x = Evaluate(dataset, ref row, state); double y = Evaluate(dataset, ref row, state); if (x < y) return 1.0; else return -1.0; } case OpCodes.Variable: { if (row < 0 || row >= dataset.Rows) return double.NaN; var variableTreeNode = (VariableTreeNode)currentInstr.dynamicNode; return ((IList)currentInstr.iArg0)[row] * variableTreeNode.Weight; } case OpCodes.Constant: { var constTreeNode = currentInstr.dynamicNode as ConstantTreeNode; return constTreeNode.Value; } case OpCodes.BoolConstant: { var boolConstTreeNode = currentInstr.dynamicNode as BoolConstantTreeNode; return boolConstTreeNode.Value; } case OpCodes.ConstantInt: { var constIntTreeNode = currentInstr.dynamicNode as ConstantIntTreeNode; return constIntTreeNode.Value; } case OpCodes.Max: { int n = (int) Evaluate(dataset, ref row, state); double max = Double.NegativeInfinity; int i = Math.Min(n,row); while(i>=0) { int position = row - i; string variableName = dataset.GetValue(position, 2); double intValue = Convert.ToDouble(variableName); if (intValue>max) max = intValue; i--; } return max; } case OpCodes.Min: { int n = (int)Evaluate(dataset, ref row, state); double min = Double.NegativeInfinity; int i = Math.Min(n, row); while (i >= 0) { int position = row - i; string variableName = dataset.GetValue(position, 2); double intValue = Convert.ToDouble(variableName); if (intValue < min) min = intValue; i--; } return min; } case OpCodes.Lag: { int n = (int) Evaluate(dataset, ref row, state); if (n>row) return 0; int position = row - n; string variableName = dataset.GetValue(position, 2); double intValue = Convert.ToDouble(variableName); return intValue; } case OpCodes.MACD: { var MACDNode = currentInstr.dynamicNode; //Taking the number of the days for each EMA double firstEMA = Evaluate(dataset, ref row, state); double secondEMA = Evaluate(dataset, ref row, state); double signal = Evaluate(dataset, ref row, state); //Calculating the factor for each EMA double factor = 2.0 / (firstEMA + 1.0); double factor2 = 2.0 / (secondEMA + 1.0); double factor3 = 2.0 / (signal + 1.0); //Initiation of the variables double firstElementEMA = -1000000; double secondElementEMA = -100000; double signalValue = -100000; double macd = 0; //Check if this MACD has previous values and retrieve them if (firstEMACache.ContainsKey(MACDNode)) firstElementEMA = firstEMACache[MACDNode]; if (secondEMACache.ContainsKey(MACDNode)) secondElementEMA = secondEMACache[MACDNode]; if (signalCache.ContainsKey(MACDNode)) signalValue = signalCache[MACDNode]; //Calculate the first value in the training for the two EMAs and the signal. if (row <= initialTraining || row == initialTest) { double[] meanValues = dataset.GetDoubleValues("\"Close\"", Enumerable.Range(0, (row + 1))).ToArray(); firstElementEMA = meanValues[0]; secondElementEMA = meanValues[0]; double max = (Math.Max(firstEMA, secondEMA) - 1);//The first macd happens when the longest EMA has its first value. We need -1 because row begin in 0. for (int i = 1; i <= row; i++) { firstElementEMA = meanValues[i] * factor + (1 - factor) * firstElementEMA; secondElementEMA = meanValues[i] * factor2 + (1 - factor2) * secondElementEMA; if (i == max) signalValue = firstElementEMA - secondElementEMA;//First signal equals to macd. else if (i > max)//Calculation for the next signals { macd = firstElementEMA - secondElementEMA; signalValue = macd * factor3 + (1 - factor3) * signalValue; } } } else //The rest of the rows are calculating with the standard EMA formula { //Retrieve the dataset values string variableName = dataset.GetValue(row, 2); double meanValue1 = Convert.ToDouble(variableName); //Calculating EMA firstElementEMA = meanValue1 * factor + (1 - factor) * firstElementEMA; secondElementEMA = meanValue1 * factor2 + (1 - factor2) * secondElementEMA; //Calculating signal macd = firstElementEMA - secondElementEMA; signalValue = macd * factor3 + (1 - factor3) * signalValue; } //Save the values for the next iteration firstEMACache[MACDNode] = firstElementEMA; secondEMACache[MACDNode] = secondElementEMA; signalCache[MACDNode] = signalValue; macd = firstElementEMA - secondElementEMA; return macd > signalValue ? 1.0 : -1.0; } case OpCodes.RSI: { //Taking the number of the days for EMA double numberOfDays = Evaluate(dataset, ref row, state); //Calculate the factor for the EMA double factor = 1.0 / numberOfDays; double positiveEMA = 0; double negativeEMA = 0; double yesterdayRSI = double.NegativeInfinity; double todayRSI = double.NegativeInfinity; double outputRSI = double.NegativeInfinity; //Retrieve EMA values if (RSIPositiveCache.ContainsKey(currentInstr.dynamicNode)) positiveEMA = RSIPositiveCache[currentInstr.dynamicNode]; if (RSINegativeCache.ContainsKey(currentInstr.dynamicNode)) negativeEMA = RSINegativeCache[currentInstr.dynamicNode]; if (RSICache.ContainsKey(currentInstr.dynamicNode)) yesterdayRSI = RSICache[currentInstr.dynamicNode]; if (RSIOutputCache.ContainsKey(currentInstr.dynamicNode)) outputRSI = RSIOutputCache[currentInstr.dynamicNode]; if (row == initialTraining || row == initialTest) { double[] closeValues = dataset.GetDoubleValues("\"Close\"", Enumerable.Range(0, (row + 1))).ToArray(); outputRSI = -1.0; for (int i = 1; i <= row; i++) { if (numberOfDays>=i) { if ((closeValues[i] - closeValues[i - 1]) > 0) positiveEMA = ((closeValues[i] - closeValues[i - 1])+positiveEMA); else negativeEMA = Math.Abs(closeValues[i] - closeValues[i - 1]) + negativeEMA; if (numberOfDays == i) { positiveEMA = positiveEMA/numberOfDays; negativeEMA = negativeEMA / numberOfDays; yesterdayRSI = 100 - (100 / (1 + (positiveEMA / negativeEMA))); } }else{ if ((closeValues[i] - closeValues[i - 1]) > 0) { positiveEMA = (closeValues[i]-closeValues[i-1]) * factor + (1 - factor) * positiveEMA; negativeEMA = 0 * factor + (1 - factor) * negativeEMA; } else { positiveEMA = 0 * factor + (1 - factor) * positiveEMA; negativeEMA = Math.Abs(closeValues[i] - closeValues[i - 1]) * factor + (1 - factor) * negativeEMA; } todayRSI = 100 - (100 / (1 + (positiveEMA / negativeEMA))); if ((yesterdayRSI < 30) && (todayRSI > 30)) outputRSI = 1.0; else if ((yesterdayRSI > 70) && (todayRSI < 70)) outputRSI = -1.0; yesterdayRSI = todayRSI; } } } else { string todayCloseString = dataset.GetValue(row, 2); string yesterdayCloseString = dataset.GetValue((row - 1), 2); double todayClose = Convert.ToDouble(todayCloseString); double yesterdayClose = Convert.ToDouble(yesterdayCloseString); //Calculating EMA if ((todayClose - yesterdayClose) > 0) { positiveEMA = (todayClose-yesterdayClose) * factor + (1 - factor) * positiveEMA; negativeEMA = 0 * factor + (1 - factor) * negativeEMA; } else { positiveEMA = 0 * factor + (1 - factor) * positiveEMA; negativeEMA = Math.Abs(todayClose - yesterdayClose) * factor + (1 - factor) * negativeEMA; } todayRSI = 100 - (100 / (1 + (positiveEMA / negativeEMA))); if ((yesterdayRSI < 30) && (todayRSI > 30)) outputRSI = 1.0; else if ((yesterdayRSI > 70) && (todayRSI < 70)) outputRSI = -1.0; } //Save positive and negative EMA for the next iteration RSIPositiveCache[currentInstr.dynamicNode] = positiveEMA; RSINegativeCache[currentInstr.dynamicNode] = negativeEMA; RSICache[currentInstr.dynamicNode] = todayRSI; RSIOutputCache[currentInstr.dynamicNode] = outputRSI; return outputRSI; } default: throw new NotSupportedException(); } } private byte MapSymbolToOpCode(ISymbolicExpressionTreeNode treeNode) { if (symbolToOpcode.ContainsKey(treeNode.Symbol.GetType())) return symbolToOpcode[treeNode.Symbol.GetType()]; else throw new NotSupportedException("Symbol: " + treeNode.Symbol); } // skips a whole branch private void SkipInstructions(InterpreterState state) { int i = 1; while (i > 0) { i += state.NextInstruction().nArguments; i--; } } [StorableConstructor] private Interpreter(bool deserializing) : base(deserializing) { } private Interpreter(Interpreter original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new Interpreter(this, cloner); } public Interpreter() : base("SymbolicDataAnalysisExpressionTreeInterpreter", "Interpreter for symbolic expression trees including automatically defined functions.") { Parameters.Add(new ValueParameter(CheckExpressionsWithIntervalArithmeticParameterName, "Switch that determines if the interpreter checks the validity of expressions with interval arithmetic before evaluating the expression.", new BoolValue(false))); Parameters.Add(new ValueParameter(EvaluatedSolutionsParameterName, "A counter for the total number of solutions the interpreter has evaluated", new IntValue(0))); } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { if (!Parameters.ContainsKey(EvaluatedSolutionsParameterName)) Parameters.Add(new ValueParameter(EvaluatedSolutionsParameterName, "A counter for the total number of solutions the interpreter has evaluated", new IntValue(0))); } public void setInitialTraining(int initialTraining) { this.initialTraining = initialTraining; } public void setInitialTest(int initialTest) { this.initialTest = initialTest; } public void clearVariables() { signalCache.Clear(); firstEMACache.Clear(); secondEMACache.Clear(); RSIPositiveCache.Clear(); RSINegativeCache.Clear(); RSICache.Clear(); RSIOutputCache.Clear(); } //Take the symbolic expression values of the tree public IEnumerable GetSymbolicExpressionTreeValues(ISymbolicExpressionTree tree, Dataset dataset, IEnumerable rows) { if (CheckExpressionsWithIntervalArithmetic.Value) throw new NotSupportedException("Interval arithmetic is not yet supported in the symbolic data analysis interpreter."); EvaluatedSolutions.Value++; // increment the evaluated solutions counter var compiler = new SymbolicExpressionTreeCompiler(); Instruction[] code = compiler.Compile(tree, MapSymbolToOpCode);//Take the type of symbol int necessaryArgStackSize = 0; for (int i = 0; i < code.Length; i++) { Instruction instr = code[i]; if (instr.opCode == OpCodes.Variable) { var variableTreeNode = instr.dynamicNode as VariableTreeNode; instr.iArg0 = dataset.GetReadOnlyDoubleValues(variableTreeNode.VariableName); code[i] = instr; } } if (signalCache == null) signalCache = new Dictionary(); if (firstEMACache == null) firstEMACache = new Dictionary(); if (secondEMACache == null) secondEMACache = new Dictionary(); if (RSIPositiveCache == null) RSIPositiveCache = new Dictionary(); if (RSINegativeCache == null) RSINegativeCache = new Dictionary(); if (RSICache == null) RSICache = new Dictionary(); if (RSIOutputCache == null) RSIOutputCache = new Dictionary(); var state = new InterpreterState(code, necessaryArgStackSize); //Evaluate each row of the datase foreach (var rowEnum in rows) { int row = rowEnum; state.Reset(); if (row < initialTraining) yield return -1; else yield return Evaluate(dataset, ref row, state); } } } }