#region License Information /* HeuristicLab * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Parameters; using HEAL.Attic; using MathNet.Numerics; using MathNet.Numerics.Statistics; using DoubleVector = MathNet.Numerics.LinearAlgebra.Vector; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { [StorableType("DE68A1D9-5AFC-4DDD-AB62-29F3B8FC28E0")] [Item("SymbolicDataAnalysisExpressionTreeVectorInterpreter", "Interpreter for symbolic expression trees including vector arithmetic.")] public class SymbolicDataAnalysisExpressionTreeVectorInterpreter : ParameterizedNamedItem, ISymbolicDataAnalysisExpressionTreeInterpreter { [StorableType("2612504E-AD5F-4AE2-B60E-98A5AB59E164")] public enum Aggregation { Mean, Median, Sum, First, L1Norm, L2Norm, NaN, Exception } public static double Aggregate(Aggregation aggregation, DoubleVector vector) { switch (aggregation) { case Aggregation.Mean: return Statistics.Mean(vector); case Aggregation.Median: return Statistics.Median(vector); case Aggregation.Sum: return vector.Sum(); case Aggregation.First: return vector.First(); case Aggregation.L1Norm: return vector.L1Norm(); case Aggregation.L2Norm: return vector.L2Norm(); case Aggregation.NaN: return double.NaN; case Aggregation.Exception: throw new InvalidOperationException("Result of the tree is not a scalar."); default: throw new ArgumentOutOfRangeException(nameof(aggregation), aggregation, null); } } [StorableType("73DCBB45-916F-4139-8ADC-57BA610A1B66")] public enum VectorLengthStrategy { ExceptionIfDifferent, FillShorterWithNaN, FillShorterWithNeutralElement, CutLonger, ResampleToLonger, ResampleToShorter, CycleShorter } #region Implementation VectorLengthStrategy public static (DoubleVector, DoubleVector) ExceptionIfDifferent(DoubleVector lhs, DoubleVector rhs) { if (lhs.Count != rhs.Count) throw new InvalidOperationException($"Vector Lengths incompatible ({lhs.Count} vs. {rhs.Count}"); return (lhs, rhs); } public static (DoubleVector, DoubleVector) FillShorter(DoubleVector lhs, DoubleVector rhs, double fillElement) { var targetLength = Math.Max(lhs.Count, rhs.Count); DoubleVector PadVector(DoubleVector v) { if (v.Count == targetLength) return v; var p = DoubleVector.Build.Dense(targetLength, fillElement); v.CopySubVectorTo(p, 0, 0, v.Count); return p; } return (PadVector(lhs), PadVector(rhs)); } public static (DoubleVector, DoubleVector) CutLonger(DoubleVector lhs, DoubleVector rhs) { var targetLength = Math.Min(lhs.Count, rhs.Count); DoubleVector CutVector(DoubleVector v) { if (v.Count == targetLength) return v; return v.SubVector(0, targetLength); } return (CutVector(lhs), CutVector(rhs)); } private static DoubleVector ResampleToLength(DoubleVector v, int targetLength) { if (v.Count == targetLength) return v; var indices = Enumerable.Range(0, v.Count).Select(x => (double)x); var interpolation = Interpolate.Linear(indices, v); var resampledIndices = Enumerable.Range(0, targetLength).Select(i => (double)i / targetLength * v.Count); var interpolatedValues = resampledIndices.Select(interpolation.Interpolate); return DoubleVector.Build.DenseOfEnumerable(interpolatedValues); } public static (DoubleVector, DoubleVector) ResampleToLonger(DoubleVector lhs, DoubleVector rhs) { var maxLength = Math.Max(lhs.Count, rhs.Count); return (ResampleToLength(lhs, maxLength), ResampleToLength(rhs, maxLength)); } public static (DoubleVector, DoubleVector) ResampleToShorter(DoubleVector lhs, DoubleVector rhs) { var minLength = Math.Min(lhs.Count, rhs.Count); return (ResampleToLength(lhs, minLength), ResampleToLength(rhs, minLength)); } public static (DoubleVector, DoubleVector) CycleShorter(DoubleVector lhs, DoubleVector rhs) { var targetLength = Math.Max(lhs.Count, rhs.Count); DoubleVector CycleVector(DoubleVector v) { if (v.Count == targetLength) return v; var cycledValues = Enumerable.Range(0, targetLength).Select(i => v[i % v.Count]); return DoubleVector.Build.DenseOfEnumerable(cycledValues); } return (CycleVector(lhs), CycleVector(rhs)); } #endregion public static (DoubleVector lhs, DoubleVector rhs) ApplyVectorLengthStrategy(VectorLengthStrategy strategy, DoubleVector lhs, DoubleVector rhs, double neutralElement = double.NaN) { switch (strategy) { case VectorLengthStrategy.ExceptionIfDifferent: return ExceptionIfDifferent(lhs, rhs); case VectorLengthStrategy.FillShorterWithNaN: return FillShorter(lhs, rhs, double.NaN); case VectorLengthStrategy.FillShorterWithNeutralElement: return FillShorter(lhs, rhs, neutralElement); case VectorLengthStrategy.CutLonger: return CutLonger(lhs, rhs); case VectorLengthStrategy.ResampleToLonger: return ResampleToLonger(lhs, rhs); case VectorLengthStrategy.ResampleToShorter: return ResampleToShorter(lhs, rhs); case VectorLengthStrategy.CycleShorter: return CycleShorter(lhs, rhs); default: throw new ArgumentOutOfRangeException(nameof(strategy), strategy, null); } } #region Aggregation Symbols private static Type[] AggregationSymbols = new[] { typeof(Sum), typeof(Mean), typeof(Length), typeof(StandardDeviation), typeof(Variance), typeof(EuclideanDistance), typeof(Covariance) }; #endregion private const string EvaluatedSolutionsParameterName = "EvaluatedSolutions"; private const string FinalAggregationParameterName = "FinalAggregation"; private const string DifferentVectorLengthStrategyParameterName = "DifferentVectorLengthStrategy"; public override bool CanChangeName { get { return false; } } public override bool CanChangeDescription { get { return false; } } #region parameter properties public IFixedValueParameter EvaluatedSolutionsParameter { get { return (IFixedValueParameter)Parameters[EvaluatedSolutionsParameterName]; } } public IFixedValueParameter> FinalAggregationParameter { get { return (IFixedValueParameter>)Parameters[FinalAggregationParameterName]; } } public IFixedValueParameter> DifferentVectorLengthStrategyParameter { get { return (IFixedValueParameter>)Parameters[DifferentVectorLengthStrategyParameterName]; } } #endregion #region properties public int EvaluatedSolutions { get { return EvaluatedSolutionsParameter.Value.Value; } set { EvaluatedSolutionsParameter.Value.Value = value; } } public Aggregation FinalAggregation { get { return FinalAggregationParameter.Value.Value; } set { FinalAggregationParameter.Value.Value = value; } } public VectorLengthStrategy DifferentVectorLengthStrategy { get { return DifferentVectorLengthStrategyParameter.Value.Value; } set { DifferentVectorLengthStrategyParameter.Value.Value = value; } } #endregion [StorableConstructor] protected SymbolicDataAnalysisExpressionTreeVectorInterpreter(StorableConstructorFlag _) : base(_) { } protected SymbolicDataAnalysisExpressionTreeVectorInterpreter(SymbolicDataAnalysisExpressionTreeVectorInterpreter original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new SymbolicDataAnalysisExpressionTreeVectorInterpreter(this, cloner); } public SymbolicDataAnalysisExpressionTreeVectorInterpreter() : this("SymbolicDataAnalysisExpressionTreeVectorInterpreter", "Interpreter for symbolic expression trees including vector arithmetic.") { } protected SymbolicDataAnalysisExpressionTreeVectorInterpreter(string name, string description) : base(name, description) { Parameters.Add(new FixedValueParameter(EvaluatedSolutionsParameterName, "A counter for the total number of solutions the interpreter has evaluated", new IntValue(0))); Parameters.Add(new FixedValueParameter>(FinalAggregationParameterName, "If root node of the expression tree results in a Vector it is aggregated according to this parameter", new EnumValue(Aggregation.Mean))); Parameters.Add(new FixedValueParameter>(DifferentVectorLengthStrategyParameterName, "", new EnumValue(VectorLengthStrategy.ExceptionIfDifferent))); } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { if (!Parameters.ContainsKey(FinalAggregationParameterName)) { Parameters.Add(new FixedValueParameter>(FinalAggregationParameterName, "If root node of the expression tree results in a Vector it is aggregated according to this parameter", new EnumValue(Aggregation.Mean))); } if (!Parameters.ContainsKey(DifferentVectorLengthStrategyParameterName)) { Parameters.Add(new FixedValueParameter>(DifferentVectorLengthStrategyParameterName, "", new EnumValue(VectorLengthStrategy.ExceptionIfDifferent))); } } #region IStatefulItem public void InitializeState() { EvaluatedSolutions = 0; } public void ClearState() { } #endregion private readonly object syncRoot = new object(); public IEnumerable GetSymbolicExpressionTreeValues(ISymbolicExpressionTree tree, IDataset dataset, IEnumerable rows) { lock (syncRoot) { EvaluatedSolutions++; // increment the evaluated solutions counter } var state = PrepareInterpreterState(tree, dataset); foreach (var rowEnum in rows) { int row = rowEnum; var result = Evaluate(dataset, ref row, state); if (result.IsScalar) yield return result.Scalar; else if (result.IsVector) { yield return Aggregate(FinalAggregation, result.Vector); } else yield return double.NaN; state.Reset(); } } public IEnumerable> GetIntermediateNodeValues(ISymbolicExpressionTree tree, IDataset dataset, IEnumerable rows) { var state = PrepareInterpreterState(tree, dataset); foreach (var rowEnum in rows) { int row = rowEnum; var traceDict = new Dictionary(); var result = Evaluate(dataset, ref row, state, traceDict); traceDict.Add(tree.Root.GetSubtree(0), result); // Add StartSymbol yield return traceDict; state.Reset(); } } private static InterpreterState PrepareInterpreterState(ISymbolicExpressionTree tree, IDataset dataset) { Instruction[] code = SymbolicExpressionTreeCompiler.Compile(tree, OpCodes.MapSymbolToOpCode); int necessaryArgStackSize = 0; foreach (Instruction instr in code) { if (instr.opCode == OpCodes.Variable) { var variableTreeNode = (VariableTreeNode)instr.dynamicNode; if (dataset.VariableHasType(variableTreeNode.VariableName)) instr.data = dataset.GetReadOnlyDoubleValues(variableTreeNode.VariableName); else if (dataset.VariableHasType(variableTreeNode.VariableName)) instr.data = dataset.GetReadOnlyDoubleVectorValues(variableTreeNode.VariableName); else throw new NotSupportedException($"Type of variable {variableTreeNode.VariableName} is not supported."); } else if (instr.opCode == OpCodes.FactorVariable) { var factorTreeNode = instr.dynamicNode as FactorVariableTreeNode; instr.data = dataset.GetReadOnlyStringValues(factorTreeNode.VariableName); } else if (instr.opCode == OpCodes.BinaryFactorVariable) { var factorTreeNode = instr.dynamicNode as BinaryFactorVariableTreeNode; instr.data = dataset.GetReadOnlyStringValues(factorTreeNode.VariableName); } else if (instr.opCode == OpCodes.LagVariable) { var laggedVariableTreeNode = (LaggedVariableTreeNode)instr.dynamicNode; instr.data = dataset.GetReadOnlyDoubleValues(laggedVariableTreeNode.VariableName); } else if (instr.opCode == OpCodes.VariableCondition) { var variableConditionTreeNode = (VariableConditionTreeNode)instr.dynamicNode; instr.data = dataset.GetReadOnlyDoubleValues(variableConditionTreeNode.VariableName); } else if (instr.opCode == OpCodes.Call) { necessaryArgStackSize += instr.nArguments + 1; } } return new InterpreterState(code, necessaryArgStackSize); } public struct EvaluationResult { public double Scalar { get; } public bool IsScalar => !double.IsNaN(Scalar); public DoubleVector Vector { get; } public bool IsVector => !(Vector.Count == 1 && double.IsNaN(Vector[0])); public bool IsNaN => !IsScalar && !IsVector; public EvaluationResult(double scalar) { Scalar = scalar; Vector = NaNVector; } public EvaluationResult(DoubleVector vector) { if (vector == null) throw new ArgumentNullException(nameof(vector)); Vector = vector; Scalar = double.NaN; } public override string ToString() { if (IsScalar) return Scalar.ToString(); if (IsVector) return Vector.ToVectorString(); return "NaN"; } private static readonly DoubleVector NaNVector = DoubleVector.Build.Dense(1, double.NaN); public static readonly EvaluationResult NaN = new EvaluationResult(double.NaN); } private static EvaluationResult ArithmeticApply(EvaluationResult lhs, EvaluationResult rhs, Func lengthStrategy, Func ssFunc = null, Func svFunc = null, Func vsFunc = null, Func vvFunc = null) { if (lhs.IsScalar && rhs.IsScalar && ssFunc != null) return new EvaluationResult(ssFunc(lhs.Scalar, rhs.Scalar)); if (lhs.IsScalar && rhs.IsVector && svFunc != null) return new EvaluationResult(svFunc(lhs.Scalar, rhs.Vector)); if (lhs.IsVector && rhs.IsScalar && vsFunc != null) return new EvaluationResult(vsFunc(lhs.Vector, rhs.Scalar)); if (lhs.IsVector && rhs.IsVector && vvFunc != null) { if (lhs.Vector.Count == rhs.Vector.Count) { return new EvaluationResult(vvFunc(lhs.Vector, rhs.Vector)); } else { var (lhsVector, rhsVector) = lengthStrategy(lhs.Vector, rhs.Vector); return new EvaluationResult(vvFunc(lhsVector, rhsVector)); } } return EvaluationResult.NaN; } private static EvaluationResult FunctionApply(EvaluationResult val, Func sFunc = null, Func vFunc = null) { if (val.IsScalar && sFunc != null) return new EvaluationResult(sFunc(val.Scalar)); if (val.IsVector && vFunc != null) return new EvaluationResult(vFunc(val.Vector)); return EvaluationResult.NaN; } private static EvaluationResult AggregateApply(EvaluationResult val, Func sFunc = null, Func vFunc = null) { if (val.IsScalar && sFunc != null) return new EvaluationResult(sFunc(val.Scalar)); if (val.IsVector && vFunc != null) return new EvaluationResult(vFunc(val.Vector)); return EvaluationResult.NaN; } private static EvaluationResult WindowedAggregateApply(EvaluationResult val, WindowedSymbolTreeNode node, Func sFunc = null, Func vFunc = null) { // Parameters are interpreted as start and end with wrapping var start = node.Offset; var end = node.Length; DoubleVector SubVector(DoubleVector v) { int startIdx = (int)Math.Round(start * v.Count); int endIdx = (int)Math.Round(end * v.Count); int size = v.Count; if (startIdx < endIdx) { return v.SubVector(startIdx, count: endIdx - startIdx); } else { // wrap around var resultVector = DoubleVector.Build.Dense(size: size - (startIdx - endIdx)); v.CopySubVectorTo(resultVector, startIdx, 0, size - startIdx); // copy [startIdx:size] to [0:size-startIdx] v.CopySubVectorTo(resultVector, 0, size - startIdx, endIdx); // copy [0:endIdx] to [size-startIdx:size] return resultVector; } } if (val.IsScalar && sFunc != null) return new EvaluationResult(sFunc(val.Scalar)); if (val.IsVector && vFunc != null) return new EvaluationResult(vFunc(SubVector(val.Vector))); return EvaluationResult.NaN; } private static EvaluationResult WindowedFunctionApply(EvaluationResult val, IWindowedSymbolTreeNode node, Func sFunc = null, Func vFunc = null) { // Parameters are interpreted as start and end with wrapping var start = node.Offset; var end = node.Length; DoubleVector SubVector(DoubleVector v) { int startIdx = (int)Math.Round(start * v.Count); int endIdx = (int)Math.Round(end * v.Count); int size = v.Count; if (startIdx < endIdx) { return v.SubVector(startIdx, count: endIdx - startIdx); } else { // wrap around var resultVector = DoubleVector.Build.Dense(size: size - (startIdx - endIdx)); v.CopySubVectorTo(resultVector, startIdx, 0, size - startIdx); // copy [startIdx:size] to [0:size-startIdx] v.CopySubVectorTo(resultVector, 0, size - startIdx, endIdx); // copy [0:endIdx] to [size-startIdx:size] return resultVector; } } if (val.IsScalar && sFunc != null) return new EvaluationResult(sFunc(val.Scalar)); if (val.IsVector && vFunc != null) return new EvaluationResult(vFunc(SubVector(val.Vector))); return EvaluationResult.NaN; } private static EvaluationResult AggregateMultipleApply(EvaluationResult lhs, EvaluationResult rhs, Func lengthStrategy, Func ssFunc = null, Func svFunc = null, Func vsFunc = null, Func vvFunc = null) { if (lhs.IsScalar && rhs.IsScalar && ssFunc != null) return new EvaluationResult(ssFunc(lhs.Scalar, rhs.Scalar)); if (lhs.IsScalar && rhs.IsVector && svFunc != null) return new EvaluationResult(svFunc(lhs.Scalar, rhs.Vector)); if (lhs.IsVector && rhs.IsScalar && vsFunc != null) return new EvaluationResult(vsFunc(lhs.Vector, rhs.Scalar)); if (lhs.IsVector && rhs.IsVector && vvFunc != null) { if (lhs.Vector.Count == rhs.Vector.Count) { return new EvaluationResult(vvFunc(lhs.Vector, rhs.Vector)); } else { var (lhsVector, rhsVector) = lengthStrategy(lhs.Vector, rhs.Vector); return new EvaluationResult(vvFunc(lhsVector, rhsVector)); } } return EvaluationResult.NaN; } public virtual Type GetNodeType(ISymbolicExpressionTreeNode node) { if (node.DataType != null) return node.DataType; if (AggregationSymbols.Contains(node.Symbol.GetType())) return typeof(double); var argumentTypes = node.Subtrees.Select(GetNodeType); if (argumentTypes.Any(t => t == typeof(DoubleVector))) return typeof(DoubleVector); return typeof(double); } public virtual EvaluationResult Evaluate(IDataset dataset, ref int row, InterpreterState state, IDictionary traceDict = null) { void TraceEvaluation(Instruction instr, EvaluationResult result) { traceDict?.Add(instr.dynamicNode, result); } Instruction currentInstr = state.NextInstruction(); switch (currentInstr.opCode) { case OpCodes.Add: { var cur = Evaluate(dataset, ref row, state, traceDict); for (int i = 1; i < currentInstr.nArguments; i++) { var op = Evaluate(dataset, ref row, state, traceDict); cur = ArithmeticApply(cur, op, (lhs, rhs) => ApplyVectorLengthStrategy(DifferentVectorLengthStrategy, lhs, rhs, 0.0), (s1, s2) => s1 + s2, (s1, v2) => s1 + v2, (v1, s2) => v1 + s2, (v1, v2) => v1 + v2); } TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Sub: { var cur = Evaluate(dataset, ref row, state, traceDict); for (int i = 1; i < currentInstr.nArguments; i++) { var op = Evaluate(dataset, ref row, state, traceDict); cur = ArithmeticApply(cur, op, (lhs, rhs) => ApplyVectorLengthStrategy(DifferentVectorLengthStrategy, lhs, rhs, 0.0), (s1, s2) => s1 - s2, (s1, v2) => s1 - v2, (v1, s2) => v1 - s2, (v1, v2) => v1 - v2); } if (currentInstr.nArguments == 1) cur = FunctionApply(cur, s => -s, v => -v); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Mul: { var cur = Evaluate(dataset, ref row, state, traceDict); for (int i = 1; i < currentInstr.nArguments; i++) { var op = Evaluate(dataset, ref row, state, traceDict); cur = ArithmeticApply(cur, op, (lhs, rhs) => ApplyVectorLengthStrategy(DifferentVectorLengthStrategy, lhs, rhs, 1.0), (s1, s2) => s1 * s2, (s1, v2) => s1 * v2, (v1, s2) => v1 * s2, (v1, v2) => v1.PointwiseMultiply(v2)); } TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Div: { var cur = Evaluate(dataset, ref row, state, traceDict); for (int i = 1; i < currentInstr.nArguments; i++) { var op = Evaluate(dataset, ref row, state, traceDict); cur = ArithmeticApply(cur, op, (lhs, rhs) => ApplyVectorLengthStrategy(DifferentVectorLengthStrategy, lhs, rhs, 1.0), (s1, s2) => s1 / s2, (s1, v2) => s1 / v2, (v1, s2) => v1 / s2, (v1, v2) => v1 / v2); } if (currentInstr.nArguments == 1) cur = FunctionApply(cur, s => 1 / s, v => 1 / v); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Absolute: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = FunctionApply(cur, Math.Abs, DoubleVector.Abs); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Tanh: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = FunctionApply(cur, Math.Tanh, DoubleVector.Tanh); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Cos: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = FunctionApply(cur, Math.Cos, DoubleVector.Cos); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Sin: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = FunctionApply(cur, Math.Sin, DoubleVector.Sin); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Tan: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = FunctionApply(cur, Math.Tan, DoubleVector.Tan); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Square: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = FunctionApply(cur, s => Math.Pow(s, 2), v => v.PointwisePower(2)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Cube: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = FunctionApply(cur, s => Math.Pow(s, 3), v => v.PointwisePower(3)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Power: { var x = Evaluate(dataset, ref row, state, traceDict); var y = Evaluate(dataset, ref row, state, traceDict); var cur = ArithmeticApply(x, y, (lhs, rhs) => lhs.Count < rhs.Count ? CutLonger(lhs, rhs) : ApplyVectorLengthStrategy(DifferentVectorLengthStrategy, lhs, rhs, 1.0), (s1, s2) => Math.Pow(s1, Math.Round(s2)), (s1, v2) => DoubleVector.Build.Dense(v2.Count, s1).PointwisePower(DoubleVector.Round(v2)), (v1, s2) => v1.PointwisePower(Math.Round(s2)), (v1, v2) => v1.PointwisePower(DoubleVector.Round(v2))); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.SquareRoot: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = FunctionApply(cur, s => Math.Sqrt(s), v => DoubleVector.Sqrt(v)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.CubeRoot: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = FunctionApply(cur, s => s < 0 ? -Math.Pow(-s, 1.0 / 3.0) : Math.Pow(s, 1.0 / 3.0), v => v.Map(s => s < 0 ? -Math.Pow(-s, 1.0 / 3.0) : Math.Pow(s, 1.0 / 3.0))); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Root: { var x = Evaluate(dataset, ref row, state, traceDict); var y = Evaluate(dataset, ref row, state, traceDict); var cur = ArithmeticApply(x, y, (lhs, rhs) => lhs.Count < rhs.Count ? CutLonger(lhs, rhs) : ApplyVectorLengthStrategy(DifferentVectorLengthStrategy, lhs, rhs, 1.0), (s1, s2) => Math.Pow(s1, 1.0 / Math.Round(s2)), (s1, v2) => DoubleVector.Build.Dense(v2.Count, s1).PointwisePower(1.0 / DoubleVector.Round(v2)), (v1, s2) => v1.PointwisePower(1.0 / Math.Round(s2)), (v1, v2) => v1.PointwisePower(1.0 / DoubleVector.Round(v2))); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Exp: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = FunctionApply(cur, s => Math.Exp(s), v => DoubleVector.Exp(v)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Log: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = FunctionApply(cur, s => Math.Log(s), v => DoubleVector.Log(v)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Sum: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => s, v => v.Sum()); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Mean: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => s, v => Statistics.Mean(v)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.StandardDeviation: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => Statistics.PopulationStandardDeviation(v)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Length: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 1, v => v.Count); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Min: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => s, v => Statistics.Minimum(v)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Max: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => s, v => Statistics.Maximum(v)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Variance: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => Statistics.PopulationVariance(v)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Skewness: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => double.NaN, v => Statistics.PopulationSkewness(v)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Kurtosis: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => double.NaN, v => Statistics.PopulationKurtosis(v)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.EuclideanDistance: { var x1 = Evaluate(dataset, ref row, state, traceDict); var x2 = Evaluate(dataset, ref row, state, traceDict); var cur = AggregateMultipleApply(x1, x2, (lhs, rhs) => ApplyVectorLengthStrategy(DifferentVectorLengthStrategy, lhs, rhs, 0.0), (s1, s2) => s1 - s2, (s1, v2) => Math.Sqrt((s1 - v2).PointwisePower(2).Sum()), (v1, s2) => Math.Sqrt((v1 - s2).PointwisePower(2).Sum()), (v1, v2) => Math.Sqrt((v1 - v2).PointwisePower(2).Sum())); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Covariance: { var x1 = Evaluate(dataset, ref row, state, traceDict); var x2 = Evaluate(dataset, ref row, state, traceDict); var cur = AggregateMultipleApply(x1, x2, (lhs, rhs) => ApplyVectorLengthStrategy(DifferentVectorLengthStrategy, lhs, rhs, 0.0), (s1, s2) => 0, (s1, v2) => 0, (v1, s2) => 0, (v1, v2) => Statistics.PopulationCovariance(v1, v2)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.SubVector: { var cur = Evaluate(dataset, ref row, state, traceDict); return WindowedFunctionApply(cur, (WindowedSymbolTreeNode)currentInstr.dynamicNode, s => s, v => v); } case OpCodes.Variable: { if (row < 0 || row >= dataset.Rows) return EvaluationResult.NaN; var variableTreeNode = (VariableTreeNode)currentInstr.dynamicNode; if (currentInstr.data is IList doubleList) { var cur = new EvaluationResult(doubleList[row] * variableTreeNode.Weight); TraceEvaluation(currentInstr, cur); return cur; } if (currentInstr.data is IList doubleVectorList) { var cur = new EvaluationResult(doubleVectorList[row] * variableTreeNode.Weight); TraceEvaluation(currentInstr, cur); return cur; } throw new NotSupportedException($"Unsupported type of variable: {currentInstr.data.GetType().GetPrettyName()}"); } case OpCodes.BinaryFactorVariable: { if (row < 0 || row >= dataset.Rows) return EvaluationResult.NaN; var factorVarTreeNode = currentInstr.dynamicNode as BinaryFactorVariableTreeNode; var cur = new EvaluationResult(((IList)currentInstr.data)[row] == factorVarTreeNode.VariableValue ? factorVarTreeNode.Weight : 0); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.FactorVariable: { if (row < 0 || row >= dataset.Rows) return EvaluationResult.NaN; var factorVarTreeNode = currentInstr.dynamicNode as FactorVariableTreeNode; var cur = new EvaluationResult(factorVarTreeNode.GetValue(((IList)currentInstr.data)[row])); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Constant: { var constTreeNode = (ConstantTreeNode)currentInstr.dynamicNode; var cur = new EvaluationResult(constTreeNode.Value); TraceEvaluation(currentInstr, cur); return cur; } #region Time Series Symbols case OpCodes.Median: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => s, v => Statistics.Median(v)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.Quantile: { var cur = Evaluate(dataset, ref row, state, traceDict); var q = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => s, v => Statistics.Quantile(v, q.Scalar)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.AbsoluteEnergy: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => s * s, v => v.PointwisePower(2.0).Sum()); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.BinnedEntropy: { var cur = Evaluate(dataset, ref row, state, traceDict); var m = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => { int bins = Math.Max((int)Math.Round(m.Scalar), 1); double minValue = v.Minimum(); double maxValue = v.Maximum(); double intervalWidth = (maxValue - minValue) / bins; int totalValues = v.Count; double sum = 0; for (int i = 0; i < Math.Max(bins, v.Count); i++) { double binMin = minValue * i; double binMax = binMin + intervalWidth; double countBin = v.Map(e => (e > binMin && e < binMax) ? 1.0 : 0.0).Sum(); double percBin = countBin / totalValues; sum += percBin * Math.Log(percBin); } return sum; }); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.HasLargeStandardDeviation: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => Statistics.PopulationStandardDeviation(v) > (Statistics.Maximum(v) - Statistics.Minimum(v)) / 2 ? 1.0 : 0.0); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.HasVarianceLargerThanStd: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => Statistics.PopulationVariance(v) > Statistics.StandardDeviation(v) ? 1.0 : 0.0); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.IsSymmetricLooking: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => Math.Abs(Statistics.Mean(v) - Statistics.Median(v)) < (Statistics.Maximum(v) - Statistics.Minimum(v)) / 2 ? 1.0 : 0.0); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.NumberDataPointsAboveMean: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => { double mean = Statistics.Mean(v); return v.Map(e => e > mean ? 1.0 : 0.0).Sum(); }); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.NumberDataPointsAboveMedian: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => { double median = Statistics.Median(v); return v.Map(e => e > median ? 1.0 : 0.0).Sum(); }); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.NumberDataPointsBelowMean: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => { double mean = Statistics.Mean(v); return v.Map(e => e < mean ? 1.0 : 0.0).Sum(); }); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.NumberDataPointsBelowMedian: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => { double median = Statistics.Median(v); return v.Map(e => e < median ? 1.0 : 0.0).Sum(); }); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.ArimaModelCoefficients: { var cur = Evaluate(dataset, ref row, state, traceDict); var i = Evaluate(dataset, ref row, state, traceDict); var k = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => throw new NotImplementedException("")); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.ContinuousWaveletTransformationCoefficients: { var cur = Evaluate(dataset, ref row, state, traceDict); var a = Evaluate(dataset, ref row, state, traceDict); var b = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => throw new NotImplementedException("")); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.FastFourierTransformationCoefficient: { var cur = Evaluate(dataset, ref row, state, traceDict); var k = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => throw new NotImplementedException("")); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.FirstIndexMax: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => (double)v.MaximumIndex() / v.Count); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.FirstIndexMin: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => (double)v.MinimumIndex() / v.Count); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.LastIndexMax: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => (double)(v.Count - DoubleVector.Build.DenseOfEnumerable(v.Reverse()).MaximumIndex()) / v.Count); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.LastIndexMin: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => (double)(v.Count - DoubleVector.Build.DenseOfEnumerable(v.Reverse()).MinimumIndex()) / v.Count); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.LongestStrikeAboveMean: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => LongestStrikeAbove(v, Statistics.Mean(v))); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.LongestStrikeAboveMedian: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => LongestStrikeAbove(v, Statistics.Median(v))); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.LongestStrikeBelowMean: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => LongestStrikeBelow(v, Statistics.Mean(v))); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.LongestStrikeBelowMedian: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => LongestStrikeBelow(v, Statistics.Median(v))); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.LongestStrikePositive: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => LongestStrikeAbove(v, 0)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.LongestStrikeNegative: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => LongestStrikeAbove(v, 0)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.LongestStrikeZero: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => LongestStrikeEqual(v, 0)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.MeanAbsoluteChange: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => { double sum = 0.0; for (int i = 0; i < v.Count - 1; i++) { sum += Math.Abs(v[i + 1] - v[i]); } return sum / v.Count; }); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.MeanAbsoluteChangeQuantiles: { var cur = Evaluate(dataset, ref row, state, traceDict); var ql = Evaluate(dataset, ref row, state, traceDict); var qu = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => { var lowerBound = Statistics.Quantile(v, ql.Scalar); var upperBound = Statistics.Quantile(v, qu.Scalar); var inBounds = v.Select(e => e > lowerBound && e < upperBound).ToList(); double sum = 0.0; int count = 0; for (int i = 0; i < v.Count - 1; i++) { if (inBounds[i] && inBounds[i + 1]) { sum += Math.Abs(v[i + 1] - v[i]); count++; } } return sum / count; }); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.MeanAutocorrelation: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => { double sum = 0.0; double mean = Statistics.Mean(v); for (int l = 0; l < v.Count; l++) { for (int i = 0; i < v.Count - l; i++) { sum += (v[i] - mean) * (v[i + l] - mean); } } return sum / (v.Count - 1) / Statistics.PopulationVariance(v); }); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.LaggedAutocorrelation: { var cur = Evaluate(dataset, ref row, state, traceDict); var lVal = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => { double sum = 0.0; int l = Math.Max((int)Math.Round(lVal.Scalar), 0); double mean = Statistics.Mean(v); for (int i = 0; i < v.Count - l; i++) { sum += (v[i] - mean) * (v[i + l] - mean); } return sum / Statistics.PopulationVariance(v); }); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.MeanSecondDerivateCentral: { var cur = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => { double sum = 0.0; for (int i = 1; i < v.Count - 1; i++) { sum += (v[i - 1] - 2 * v[i] + v[i + 1]) / 2; } return sum / (v.Count - 2); }); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.NumberPeaksOfSize: { var cur = Evaluate(dataset, ref row, state, traceDict); var l = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => CountNumberOfPeaks(v, l.Scalar)); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.LargeNumberOfPeaks: { var cur = Evaluate(dataset, ref row, state, traceDict); var l = Evaluate(dataset, ref row, state, traceDict); var m = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => CountNumberOfPeaks(v, l.Scalar) > m.Scalar ? 1.0 : 0.0); TraceEvaluation(currentInstr, cur); return cur; } case OpCodes.TimeReversalAsymmetryStatistic: { var cur = Evaluate(dataset, ref row, state, traceDict); var l = Evaluate(dataset, ref row, state, traceDict); cur = AggregateApply(cur, s => 0, v => { int lag = Math.Max((int)Math.Round(l.Scalar), 0); double sum = 0.0; for (int i = 0; i < v.Count - 2 * lag; i++) { sum += Math.Pow(v[i + 2 * lag], 2) * v[i + lag] - v[i + lag] * Math.Pow(v[i], 2); } return sum / (v.Count - 2 * lag); }); TraceEvaluation(currentInstr, cur); return cur; } #endregion default: throw new NotSupportedException($"Unsupported OpCode: {currentInstr.opCode}"); } } private static int LongestStrikeAbove(DoubleVector v, double threshold) { int longestStrike = 0, currentStrike = 0; for (int i = 0; i < v.Count; i++) { if (v[i] > threshold) { currentStrike++; longestStrike = Math.Max(longestStrike, currentStrike); } else currentStrike = 0; } return longestStrike; } private static int LongestStrikeBelow(DoubleVector v, double threshold) { int longestStrike = 0, currentStrike = 0; for (int i = 0; i < v.Count; i++) { if (v[i] < threshold) { currentStrike++; longestStrike = Math.Max(longestStrike, currentStrike); } else currentStrike = 0; } return longestStrike; } private static int LongestStrikeEqual(DoubleVector v, double value, double epsilon = double.Epsilon) { int longestStrike = 0, currentStrike = 0; for (int i = 0; i < v.Count; i++) { if (v[i].IsAlmost(epsilon)) { currentStrike++; longestStrike = Math.Max(longestStrike, currentStrike); } else currentStrike = 0; } return longestStrike; } private static int CountNumberOfPeaks(DoubleVector v, double heightDifference) { int count = 0; for (int i = 0; i < v.Count; i++) { bool largerThanPrev = i == 0 || v[i] > v[i - 1] + heightDifference; bool largerThanNext = i == v.Count - 1 || v[i] > v[i + 1] + heightDifference; if (largerThanPrev && largerThanNext) count++; } return count; } } }