Changeset 17830


Ignore:
Timestamp:
02/01/21 18:33:07 (12 months ago)
Author:
pfleck
Message:

#3040 First draft additional vector aggregation symbols (distribution characteristics & time series dynamics)

Location:
branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4
Files:
1 added
3 edited
1 copied

Legend:

Unmodified
Added
Removed
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Grammars/TypeCoherentVectorTimeSeriesExpressionGrammar.cs

    r17824 r17830  
    2929
    3030namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
    31   [StorableType("7EC7B4A7-0E27-4011-B983-B0E15A6944EC")]
    32   [Item("TypeCoherentVectorExpressionGrammar", "Represents a grammar for functional expressions in which special syntactic constraints are enforced so that vector and scalar expressions are not mixed.")]
    33   public class TypeCoherentVectorExpressionGrammar : DataAnalysisGrammar, ISymbolicDataAnalysisGrammar {
     31  [StorableType("88895D71-3D3F-44A7-A531-D5D59963AABE")]
     32  [Item("TypeCoherentVectorTimeSeriesExpressionGrammar", "Represents a grammar for functional expressions in which special syntactic constraints are enforced so that vector and scalar expressions are not mixed.")]
     33  public class TypeCoherentVectorTimeSeriesExpressionGrammar : DataAnalysisGrammar, ISymbolicDataAnalysisGrammar {
    3434    private const string ArithmeticFunctionsName = "Arithmetic Functions";
    3535    private const string TrigonometricFunctionsName = "Trigonometric Functions";
     
    4040    private const string VectorStatisticsName = "Vector Statistics";
    4141    private const string VectorDistancesName = "Vector Distances";
     42    private const string VectorDistributionCharacteristicsName = "Distribution Characteristics";
     43    private const string VectorTimeSeriesDynamicsName = "Time Series Dynamics";
    4244    private const string ScalarSymbolsName = "Scalar Symbols";
    4345
     
    5456
    5557    [StorableConstructor]
    56     protected TypeCoherentVectorExpressionGrammar(StorableConstructorFlag _) : base(_) { }
    57     protected TypeCoherentVectorExpressionGrammar(TypeCoherentVectorExpressionGrammar original, Cloner cloner) : base(original, cloner) { }
    58     public TypeCoherentVectorExpressionGrammar()
    59       : base(ItemAttribute.GetName(typeof(TypeCoherentVectorExpressionGrammar)), ItemAttribute.GetDescription(typeof(TypeCoherentVectorExpressionGrammar))) {
     58    protected TypeCoherentVectorTimeSeriesExpressionGrammar(StorableConstructorFlag _) : base(_) { }
     59    protected TypeCoherentVectorTimeSeriesExpressionGrammar(TypeCoherentVectorTimeSeriesExpressionGrammar original, Cloner cloner) : base(original, cloner) { }
     60    public TypeCoherentVectorTimeSeriesExpressionGrammar()
     61      : base(ItemAttribute.GetName(typeof(TypeCoherentVectorTimeSeriesExpressionGrammar)), ItemAttribute.GetDescription(typeof(TypeCoherentVectorTimeSeriesExpressionGrammar))) {
    6062      Initialize();
    6163    }
    6264    public override IDeepCloneable Clone(Cloner cloner) {
    63       return new TypeCoherentVectorExpressionGrammar(this, cloner);
     65      return new TypeCoherentVectorTimeSeriesExpressionGrammar(this, cloner);
    6466    }
    6567
     
    8688
    8789      var constant = new Constant { MinValue = -20, MaxValue = 20 };
     90      var constantZeroToOne = new Constant { Name = "Constant [0-1]", MinValue = 0, MaxValue = 1 };
    8891      var variable = new Variable();
    8992      var binFactorVariable = new BinaryFactorVariable();
     
    9194
    9295      var mean = new Mean();
     96      var median = new Median() { Enabled = false };
    9397      var sd = new StandardDeviation();
    9498      var sum = new Sum();
     
    96100      var min = new Min() { Enabled = false };
    97101      var max = new Max() { Enabled = false };
     102      var quantile = new Quantile() { Enabled = false };
    98103      var variance = new Variance() { Enabled = false };
    99104      var skewness = new Skewness() { Enabled = false };
     
    124129
    125130      var vectorvariable = new Variable() { Name = "Vector Variable" };
     131
     132      #region TimeSeries Symbols
     133      var absoluteEnergy = new AbsoluteEnergy() { Enabled = false };
     134      var binnedEntropy = new BinnedEntropy() { Enabled = false };
     135      var hasLargeStandardDeviation = new HasLargeStandardDeviation() { Enabled = false };
     136      var hasVarianceLargerThanStd = new HasVarianceLargerThanStd() { Enabled = false };
     137      var isSymmetricLooking = new IsSymmetricLooking() { Enabled = false };
     138      var numberDataPointsAboveMean = new NumberDataPointsAboveMean() { Enabled = false };
     139      var numberDataPointsAboveMedian = new NumberDataPointsAboveMedian() { Enabled = false };
     140      var numberDataPointsBelowMean = new NumberDataPointsBelowMean() { Enabled = false };
     141      var numberDataPointsBelowMedian = new NumberDataPointsBelowMedian() { Enabled = false };
     142
     143      var arimaModelCoefficients = new ArimaModelCoefficients() { Enabled = false };
     144      var continuousWaveletTransformationCoefficients = new ContinuousWaveletTransformationCoefficients() { Enabled = false };
     145      var fastFourierTransformationCoefficient = new FastFourierTransformationCoefficient() { Enabled = false };
     146      var firstIndexMax = new FirstIndexMax() { Enabled = false };
     147      var firstIndexMin = new FirstIndexMin() { Enabled = false };
     148      var lastIndexMax = new LastIndexMax() { Enabled = false };
     149      var lastIndexMin = new LastIndexMin() { Enabled = false };
     150      var longestStrikeAboveMean = new LongestStrikeAboveMean() { Enabled = false };
     151      var longestStrikeAboveMedian = new LongestStrikeAboveMedian() { Enabled = false };
     152      var longestStrikeBelowMean = new LongestStrikeBelowMean() { Enabled = false };
     153      var longestStrikeBelowMedian = new LongestStrikeBelowMedian() { Enabled = false };
     154      var longestStrikePositive = new LongestStrikePositive() { Enabled = false };
     155      var longestStrikeNegative = new LongestStrikeNegative() { Enabled = false };
     156      var longestStrikeZero = new LongestStrikeZero() { Enabled = false };
     157      var meanAbsoluteChange = new MeanAbsoluteChange() { Enabled = false };
     158      var meanAbsoluteChangeQuantiles = new MeanAbsoluteChangeQuantiles() { Enabled = false };
     159      var meanAutocorrelation = new MeanAutocorrelation() { Enabled = false };
     160      var laggedAutocorrelation = new LaggedAutocorrelation() { Enabled = false };
     161      var meanSecondDerivateCentral = new MeanSecondDerivateCentral() { Enabled = false };
     162      var numberPeaksOfSize = new NumberPeaksOfSize() { Enabled = false };
     163      var largeNumberOfPeaks = new LargeNumberOfPeaks() { Enabled = false };
     164      var timeReversalAsymmetryStatistic = new TimeReversalAsymmetryStatistic() { Enabled = false };
     165      #endregion
    126166      #endregion
    127167
     
    135175      var exponentialAndLogarithmicSymbols = new GroupSymbol(ExponentialFunctionsName, new List<ISymbol> { exp, log });
    136176      var powerSymbols = new GroupSymbol(PowerFunctionsName, new List<ISymbol> { square, sqrt, cube, cubeRoot, power, root });
    137       var terminalSymbols = new GroupSymbol(TerminalsName, new List<ISymbol> { constant, variable, binFactorVariable, factorVariable });
    138       var statisticsSymbols = new GroupSymbol(VectorStatisticsName, new List<ISymbol> { mean, sd, sum, length, min, max, variance, skewness, kurtosis });
     177      var terminalSymbols = new GroupSymbol(TerminalsName, new List<ISymbol> { constant, constantZeroToOne, variable, binFactorVariable, factorVariable });
     178      var statisticsSymbols = new GroupSymbol(VectorStatisticsName, new List<ISymbol> { mean, median, sd, sum, length, min, max, quantile, variance, skewness, kurtosis });
    139179      var distancesSymbols = new GroupSymbol(VectorDistancesName, new List<ISymbol> { euclideanDistance, covariance });
    140       var aggregationSymbols = new GroupSymbol(VectorAggregationName, new List<ISymbol> { statisticsSymbols, distancesSymbols });
     180      var distributionCharacteristicsSymbols = new GroupSymbol(VectorDistributionCharacteristicsName, new List<ISymbol> {
     181        absoluteEnergy, binnedEntropy, hasLargeStandardDeviation, hasVarianceLargerThanStd, isSymmetricLooking,
     182        numberDataPointsAboveMean, numberDataPointsAboveMedian, numberDataPointsBelowMean, numberDataPointsBelowMedian
     183      });
     184      var timeSeriesDynamicsSymbols = new GroupSymbol(VectorTimeSeriesDynamicsName, new List<ISymbol> {
     185        arimaModelCoefficients, continuousWaveletTransformationCoefficients, fastFourierTransformationCoefficient,
     186        firstIndexMax, firstIndexMin, lastIndexMax, lastIndexMin,
     187        longestStrikeAboveMean, longestStrikeAboveMedian, longestStrikeBelowMean, longestStrikeBelowMedian, longestStrikePositive, longestStrikePositive, longestStrikeNegative, longestStrikeZero,
     188        meanAbsoluteChange, meanAbsoluteChangeQuantiles, meanAutocorrelation, laggedAutocorrelation, meanSecondDerivateCentral, meanSecondDerivateCentral,
     189        numberPeaksOfSize, largeNumberOfPeaks, timeReversalAsymmetryStatistic
     190      });
     191      var aggregationSymbols = new GroupSymbol(VectorAggregationName, new List<ISymbol> { statisticsSymbols, distancesSymbols, distributionCharacteristicsSymbols, timeSeriesDynamicsSymbols });
    141192      var scalarSymbols = new GroupSymbol(ScalarSymbolsName, new List<ISymbol>() { arithmeticSymbols, trigonometricSymbols, exponentialAndLogarithmicSymbols, powerSymbols, terminalSymbols, aggregationSymbols });
    142193
     
    170221      SetSubtreeCount(exponentialAndLogarithmicSymbols, 1, 1);
    171222      SetSubtreeCount(terminalSymbols, 0, 0);
    172       SetSubtreeCount(statisticsSymbols, 1, 1);
     223      foreach (var sy in new Symbol[] { mean, median, sd, sum, length, min, max, variance, skewness, kurtosis })
     224        SetSubtreeCount(sy, 1, 1);
     225      SetSubtreeCount(quantile, 2, 2);
    173226      SetSubtreeCount(distancesSymbols, 2, 2);
     227      #region TimeSeries symbols
     228      foreach (var sy in new Symbol[] {
     229        absoluteEnergy, hasLargeStandardDeviation, hasVarianceLargerThanStd, isSymmetricLooking,
     230         numberDataPointsAboveMean, numberDataPointsAboveMedian, numberDataPointsBelowMean, numberDataPointsBelowMedian
     231      }) SetSubtreeCount(sy, 1, 1);
     232      foreach (var sy in new Symbol[] { binnedEntropy })
     233        SetSubtreeCount(sy, 2, 2);
     234
     235      foreach (var sy in new Symbol[] {
     236        firstIndexMax, firstIndexMin, lastIndexMax, lastIndexMin,
     237        longestStrikeAboveMean, longestStrikeAboveMedian, longestStrikeBelowMean, longestStrikeBelowMedian,
     238        longestStrikePositive, longestStrikeNegative, longestStrikeZero,
     239        meanAbsoluteChange, meanAutocorrelation, meanSecondDerivateCentral
     240      }) SetSubtreeCount(sy, 1, 1);
     241      foreach (var sy in new Symbol[] {
     242        fastFourierTransformationCoefficient, laggedAutocorrelation, numberPeaksOfSize, timeReversalAsymmetryStatistic
     243      }) SetSubtreeCount(sy, 2, 2);
     244      foreach (var sy in new Symbol[] {
     245        arimaModelCoefficients, continuousWaveletTransformationCoefficients,
     246        meanAbsoluteChangeQuantiles, largeNumberOfPeaks
     247      }) SetSubtreeCount(sy, 3, 3);
     248      #endregion
    174249
    175250      SetSubtreeCount(vectorarithmeticSymbols, 2, 2);
     
    197272      AddAllowedChildSymbol(power, constant, 1);
    198273      AddAllowedChildSymbol(root, constant, 1);
    199       AddAllowedChildSymbol(aggregationSymbols, vectorSymbols);
    200       AddAllowedChildSymbol(statisticsSymbols, subvector);
     274      AddAllowedChildSymbol(aggregationSymbols, vectorSymbols, 0);
     275      AddAllowedChildSymbol(statisticsSymbols, subvector, 0);
     276      AddAllowedChildSymbol(quantile, constantZeroToOne, 1);
     277      AddAllowedChildSymbol(distancesSymbols, vectorSymbols, 1);
     278      AddAllowedChildSymbol(distributionCharacteristicsSymbols, vectorSymbols, 0);
     279      AddAllowedChildSymbol(distributionCharacteristicsSymbols, subvector, 0);
     280      AddAllowedChildSymbol(distributionCharacteristicsSymbols, constantZeroToOne, 1);
     281      AddAllowedChildSymbol(timeSeriesDynamicsSymbols, vectorSymbols, 0);
     282      AddAllowedChildSymbol(timeSeriesDynamicsSymbols, subvector, 0);
     283      AddAllowedChildSymbol(timeSeriesDynamicsSymbols, constantZeroToOne, 1);
     284      AddAllowedChildSymbol(timeSeriesDynamicsSymbols, constantZeroToOne, 2);
    201285
    202286      AddAllowedChildSymbol(vectorarithmeticSymbols, vectorSymbols);
     
    205289      AddAllowedChildSymbol(vectorexponentialAndLogarithmicSymbols, vectorSymbols);
    206290      AddAllowedChildSymbol(vectorpowerSymbols, vectorSymbols, 0);
    207       AddAllowedChildSymbol(vectorpower, constant, 1);
    208       AddAllowedChildSymbol(vectorroot, constant, 1);
     291      AddAllowedChildSymbol(vectorpower, constantZeroToOne, 1);
     292      AddAllowedChildSymbol(vectorroot, constantZeroToOne, 1);
    209293
    210294      AddAllowedChildSymbol(subvector, vectorSymbols);
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.csproj

    r17786 r17830  
    181181    <Compile Include="Grammars\FullFunctionalVectorExpressionGrammar.cs" />
    182182    <Compile Include="Grammars\TypeCoherentVectorExpressionGrammar.cs" />
     183    <Compile Include="Grammars\TypeCoherentVectorTimeSeriesExpressionGrammar.cs" />
    183184    <Compile Include="Hashing\HashExtensions.cs" />
    184185    <Compile Include="Hashing\HashUtil.cs" />
     
    260261    <Compile Include="Symbols\Skewness.cs" />
    261262    <Compile Include="Symbols\SubVector.cs" />
     263    <Compile Include="Symbols\TimeSeriesSymbols.cs" />
    262264    <Compile Include="Symbols\WindowedSymbolTreeNode.cs" />
    263265    <Compile Include="Symbols\WindowedSymbol.cs" />
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interpreter/OpCodes.cs

    r17726 r17830  
    9191    SubVector = 64,
    9292
     93    #region Time Series Symbols
     94    Median = 100,
     95    Quantile = 101,
     96
     97    AbsoluteEnergy = 102,
     98    BinnedEntropy = 103,
     99    HasLargeStandardDeviation = 104,
     100    HasVarianceLargerThanStd = 105,
     101    IsSymmetricLooking = 106,
     102    NumberDataPointsAboveMean = 107,
     103    NumberDataPointsAboveMedian = 108,
     104    NumberDataPointsBelowMean = 109,
     105    NumberDataPointsBelowMedian = 110,
     106
     107    ArimaModelCoefficients = 111,
     108    ContinuousWaveletTransformationCoefficients = 112,
     109    FastFourierTransformationCoefficient = 113,
     110    FirstIndexMax = 124,
     111    FirstIndexMin = 125,
     112    LastIndexMax = 126,
     113    LastIndexMin = 127,
     114    LongestStrikeAboveMean = 128,
     115    LongestStrikeAboveMedian = 129,
     116    LongestStrikeBelowMean = 130,
     117    LongestStrikeBelowMedian = 131,
     118    LongestStrikePositive = 132,
     119    LongestStrikeNegative = 133,
     120    LongestStrikeZero = 134,
     121    MeanAbsoluteChange = 135,
     122    MeanAbsoluteChangeQuantiles = 136,
     123    MeanAutocorrelation = 137,
     124    LaggedAutocorrelation = 138,
     125    MeanSecondDerivateCentral = 139,
     126    NumberPeaksOfSize = 140,
     127    LargeNumberOfPeaks = 141,
     128    TimeReversalAsymmetryStatistic = 142
     129    #endregion
    93130  }
    94131  public static class OpCodes {
     
    158195    public const byte Covariance = (byte)OpCode.Covariance;
    159196    public const byte SubVector = (byte)OpCode.SubVector;
     197    #region Time Series Symbols
     198    public const byte Median = (byte)OpCode.Median;
     199    public const byte Quantile = (byte)OpCode.Quantile;
     200
     201    public const byte AbsoluteEnergy = (byte)OpCode.AbsoluteEnergy;
     202    public const byte BinnedEntropy = (byte)OpCode.BinnedEntropy;
     203    public const byte HasLargeStandardDeviation = (byte)OpCode.HasLargeStandardDeviation;
     204    public const byte HasVarianceLargerThanStd = (byte)OpCode.HasVarianceLargerThanStd;
     205    public const byte IsSymmetricLooking = (byte)OpCode.IsSymmetricLooking;
     206    public const byte NumberDataPointsAboveMean = (byte)OpCode.NumberDataPointsAboveMean;
     207    public const byte NumberDataPointsAboveMedian = (byte)OpCode.NumberDataPointsAboveMedian;
     208    public const byte NumberDataPointsBelowMean = (byte)OpCode.NumberDataPointsBelowMean;
     209    public const byte NumberDataPointsBelowMedian = (byte)OpCode.NumberDataPointsBelowMedian;
     210
     211    public const byte ArimaModelCoefficients = (byte)OpCode.ArimaModelCoefficients;
     212    public const byte ContinuousWaveletTransformationCoefficients = (byte)OpCode.ContinuousWaveletTransformationCoefficients;
     213    public const byte FastFourierTransformationCoefficient = (byte)OpCode.FastFourierTransformationCoefficient;
     214    public const byte FirstIndexMax = (byte)OpCode.FirstIndexMax;
     215    public const byte FirstIndexMin = (byte)OpCode.FirstIndexMin;
     216    public const byte LastIndexMax = (byte)OpCode.LastIndexMax;
     217    public const byte LastIndexMin = (byte)OpCode.LastIndexMin;
     218    public const byte LongestStrikeAboveMean = (byte)OpCode.LongestStrikeAboveMean;
     219    public const byte LongestStrikeAboveMedian = (byte)OpCode.LongestStrikeAboveMedian;
     220    public const byte LongestStrikeBelowMean = (byte)OpCode.LongestStrikeBelowMean;
     221    public const byte LongestStrikeBelowMedian = (byte)OpCode.LongestStrikeBelowMedian;
     222    public const byte LongestStrikePositive = (byte)OpCode.LongestStrikePositive;
     223    public const byte LongestStrikeNegative = (byte)OpCode.LongestStrikeNegative;
     224    public const byte LongestStrikeZero = (byte)OpCode.LongestStrikeZero;
     225    public const byte MeanAbsoluteChange = (byte)OpCode.MeanAbsoluteChange;
     226    public const byte MeanAbsoluteChangeQuantiles = (byte)OpCode.MeanAbsoluteChangeQuantiles;
     227    public const byte MeanAutocorrelation = (byte)OpCode.MeanAutocorrelation;
     228    public const byte LaggedAutocorrelation = (byte)OpCode.LaggedAutocorrelation;
     229    public const byte MeanSecondDerivateCentral = (byte)OpCode.MeanSecondDerivateCentral;
     230    public const byte NumberPeaksOfSize = (byte)OpCode.NumberPeaksOfSize;
     231    public const byte LargeNumberOfPeaks = (byte)OpCode.LargeNumberOfPeaks;
     232    public const byte TimeReversalAsymmetryStatistic = (byte)OpCode.TimeReversalAsymmetryStatistic;
     233    #endregion
    160234
    161235
     
    226300      { typeof(Covariance), OpCodes.Covariance },
    227301      { typeof(SubVector), OpCodes.SubVector },
     302
     303      #region Time Series Symbols
     304      { typeof(Median), OpCodes.Median },
     305      { typeof(Quantile), OpCodes.Quantile },
     306
     307      { typeof(AbsoluteEnergy), OpCodes.AbsoluteEnergy },
     308      { typeof(BinnedEntropy), OpCodes.BinnedEntropy },
     309      { typeof(HasLargeStandardDeviation), OpCodes.HasLargeStandardDeviation },
     310      { typeof(HasVarianceLargerThanStd), OpCodes.HasVarianceLargerThanStd },
     311      { typeof(IsSymmetricLooking), OpCodes.IsSymmetricLooking },
     312      { typeof(NumberDataPointsAboveMean), OpCodes.NumberDataPointsAboveMean },
     313      { typeof(NumberDataPointsAboveMedian), OpCodes.NumberDataPointsAboveMedian },
     314      { typeof(NumberDataPointsBelowMean), OpCodes.NumberDataPointsBelowMean },
     315      { typeof(NumberDataPointsBelowMedian), OpCodes.NumberDataPointsBelowMedian },
     316
     317      { typeof(ArimaModelCoefficients), OpCodes.ArimaModelCoefficients },
     318      { typeof(ContinuousWaveletTransformationCoefficients), OpCodes.ContinuousWaveletTransformationCoefficients },
     319      { typeof(FastFourierTransformationCoefficient), OpCodes.FastFourierTransformationCoefficient },
     320      { typeof(FirstIndexMax), OpCodes.FirstIndexMax },
     321      { typeof(FirstIndexMin), OpCodes.FirstIndexMin },
     322      { typeof(LastIndexMax), OpCodes.LastIndexMax },
     323      { typeof(LastIndexMin), OpCodes.LastIndexMin },
     324      { typeof(LongestStrikeAboveMean), OpCodes.LongestStrikeAboveMean },
     325      { typeof(LongestStrikeAboveMedian), OpCodes.LongestStrikeAboveMedian },
     326      { typeof(LongestStrikeBelowMean), OpCodes.LongestStrikeBelowMean },
     327      { typeof(LongestStrikeBelowMedian), OpCodes.LongestStrikeBelowMedian },
     328      { typeof(LongestStrikePositive), OpCodes.LongestStrikePositive },
     329      { typeof(LongestStrikeNegative), OpCodes.LongestStrikeNegative },
     330      { typeof(LongestStrikeZero), OpCodes.LongestStrikeZero },
     331      { typeof(MeanAbsoluteChange), OpCodes.MeanAbsoluteChange },
     332      { typeof(MeanAbsoluteChangeQuantiles), OpCodes.MeanAbsoluteChangeQuantiles },
     333      { typeof(MeanAutocorrelation), OpCodes.MeanAutocorrelation },
     334      { typeof(LaggedAutocorrelation), OpCodes.LaggedAutocorrelation },
     335      { typeof(MeanSecondDerivateCentral), OpCodes.MeanSecondDerivateCentral },
     336      { typeof(NumberPeaksOfSize), OpCodes.NumberPeaksOfSize },
     337      { typeof(LargeNumberOfPeaks), OpCodes.LargeNumberOfPeaks },
     338      { typeof(TimeReversalAsymmetryStatistic), OpCodes.TimeReversalAsymmetryStatistic },             
     339      #endregion
    228340    };
    229341
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interpreter/SymbolicDataAnalysisExpressionTreeVectorInterpreter.cs

    r17786 r17830  
    209209
    210210    public SymbolicDataAnalysisExpressionTreeVectorInterpreter()
    211       : this("SymbolicDataAnalysisExpressionTreeVectorInterpreter", "Interpreter for symbolic expression trees including vector arithmetic.") {
    212     }
     211      : this("SymbolicDataAnalysisExpressionTreeVectorInterpreter", "Interpreter for symbolic expression trees including vector arithmetic.") { }
    213212
    214213    protected SymbolicDataAnalysisExpressionTreeVectorInterpreter(string name, string description)
     
    752751          }
    753752
     753        #region Time Series Symbols
     754        case OpCodes.Median: {
     755            var cur = Evaluate(dataset, ref row, state, traceDict);
     756            cur = AggregateApply(cur,
     757              s => s,
     758              v => Statistics.Median(v));
     759            TraceEvaluation(currentInstr, cur);
     760            return cur;
     761          }
     762        case OpCodes.Quantile: {
     763            var cur = Evaluate(dataset, ref row, state, traceDict);
     764            var q = Evaluate(dataset, ref row, state, traceDict);
     765            cur = AggregateApply(cur,
     766              s => s,
     767              v => Statistics.Quantile(v, q.Scalar));
     768            TraceEvaluation(currentInstr, cur);
     769            return cur;
     770          }
     771
     772        case OpCodes.AbsoluteEnergy: {
     773            var cur = Evaluate(dataset, ref row, state, traceDict);
     774            cur = AggregateApply(cur,
     775              s => s * s,
     776              v => v.PointwisePower(2.0).Sum());
     777            TraceEvaluation(currentInstr, cur);
     778            return cur;
     779          }
     780
     781        case OpCodes.BinnedEntropy: {
     782            var cur = Evaluate(dataset, ref row, state, traceDict);
     783            var m = Evaluate(dataset, ref row, state, traceDict);
     784            cur = AggregateApply(cur,
     785              s => 0,
     786              v => {
     787                int bins = (int)Math.Round(m.Scalar);
     788                double minValue = v.Minimum();
     789                double maxValue = v.Maximum();
     790                double intervalWidth = (maxValue - minValue) / bins;
     791                int totalValues = v.Count;
     792                double sum = 0;
     793                for (int i = 0; i < Math.Max(bins, v.Count); i++) {
     794                  double binMin = minValue * i;
     795                  double binMax = binMin + intervalWidth;
     796                  double countBin = v.Map(e => (e > binMin && e < binMax) ? 1 : 0).Sum();
     797                  double percBin = countBin / totalValues;
     798                  sum += percBin * Math.Log(percBin);
     799                }
     800
     801                return sum;
     802              });
     803            TraceEvaluation(currentInstr, cur);
     804            return cur;
     805          }
     806        case OpCodes.HasLargeStandardDeviation: {
     807            var cur = Evaluate(dataset, ref row, state, traceDict);
     808            cur = AggregateApply(cur,
     809              s => 0,
     810              v => Statistics.PopulationStandardDeviation(v) > (Statistics.Maximum(v) - Statistics.Minimum(v)) / 2 ? 1 : 0);
     811            TraceEvaluation(currentInstr, cur);
     812            return cur;
     813          }
     814        case OpCodes.HasVarianceLargerThanStd: {
     815            var cur = Evaluate(dataset, ref row, state, traceDict);
     816            cur = AggregateApply(cur,
     817              s => 0,
     818              v => Statistics.PopulationVariance(v) > Statistics.StandardDeviation(v) ? 1 : 0);
     819            TraceEvaluation(currentInstr, cur);
     820            return cur;
     821          }
     822        case OpCodes.IsSymmetricLooking: {
     823            var cur = Evaluate(dataset, ref row, state, traceDict);
     824            cur = AggregateApply(cur,
     825              s => 0,
     826              v => Math.Abs(Statistics.Mean(v) - Statistics.Median(v)) < (Statistics.Maximum(v) - Statistics.Minimum(v)) / 2 ? 1 : 0);
     827            TraceEvaluation(currentInstr, cur);
     828            return cur;
     829          }
     830        case OpCodes.NumberDataPointsAboveMean: {
     831            var cur = Evaluate(dataset, ref row, state, traceDict);
     832            cur = AggregateApply(cur,
     833              s => 0,
     834              v => {
     835                double mean = Statistics.Mean(v);
     836                return v.Map(e => e > mean ? 1 : 0).Sum();
     837              });
     838            TraceEvaluation(currentInstr, cur);
     839            return cur;
     840          }
     841        case OpCodes.NumberDataPointsAboveMedian: {
     842            var cur = Evaluate(dataset, ref row, state, traceDict);
     843            cur = AggregateApply(cur,
     844              s => 0,
     845              v => {
     846                double median = Statistics.Median(v);
     847                return v.Map(e => e > median ? 1 : 0).Sum();
     848              });
     849            TraceEvaluation(currentInstr, cur);
     850            return cur;
     851          }
     852        case OpCodes.NumberDataPointsBelowMean: {
     853            var cur = Evaluate(dataset, ref row, state, traceDict);
     854            cur = AggregateApply(cur,
     855              s => 0,
     856              v => {
     857                double mean = Statistics.Mean(v);
     858                return v.Map(e => e < mean ? 1 : 0).Sum();
     859              });
     860            TraceEvaluation(currentInstr, cur);
     861            return cur;
     862          }
     863        case OpCodes.NumberDataPointsBelowMedian: {
     864            var cur = Evaluate(dataset, ref row, state, traceDict);
     865            cur = AggregateApply(cur,
     866              s => 0,
     867              v => {
     868                double median = Statistics.Median(v);
     869                return v.Map(e => e < median ? 1 : 0).Sum();
     870              });
     871            TraceEvaluation(currentInstr, cur);
     872            return cur;
     873          }
     874
     875        case OpCodes.ArimaModelCoefficients: {
     876            var cur = Evaluate(dataset, ref row, state, traceDict);
     877            var i = Evaluate(dataset, ref row, state, traceDict);
     878            var k = Evaluate(dataset, ref row, state, traceDict);
     879            cur = AggregateApply(cur,
     880              s => 0,
     881              v => throw new NotImplementedException(""));
     882            TraceEvaluation(currentInstr, cur);
     883            return cur;
     884          }
     885        case OpCodes.ContinuousWaveletTransformationCoefficients: {
     886            var cur = Evaluate(dataset, ref row, state, traceDict);
     887            var a = Evaluate(dataset, ref row, state, traceDict);
     888            var b = Evaluate(dataset, ref row, state, traceDict);
     889            cur = AggregateApply(cur,
     890              s => 0,
     891              v => throw new NotImplementedException(""));
     892            TraceEvaluation(currentInstr, cur);
     893            return cur;
     894          }
     895        case OpCodes.FastFourierTransformationCoefficient: {
     896            var cur = Evaluate(dataset, ref row, state, traceDict);
     897            var k = Evaluate(dataset, ref row, state, traceDict);
     898            cur = AggregateApply(cur,
     899              s => 0,
     900              v => throw new NotImplementedException(""));
     901            TraceEvaluation(currentInstr, cur);
     902            return cur;
     903          }
     904        case OpCodes.FirstIndexMax: {
     905            var cur = Evaluate(dataset, ref row, state, traceDict);
     906            cur = AggregateApply(cur,
     907              s => 0,
     908              v => (double)v.MaximumIndex() / v.Count);
     909            TraceEvaluation(currentInstr, cur);
     910            return cur;
     911          }
     912        case OpCodes.FirstIndexMin: {
     913            var cur = Evaluate(dataset, ref row, state, traceDict);
     914            cur = AggregateApply(cur,
     915              s => 0,
     916              v => (double)v.MinimumIndex() / v.Count);
     917            TraceEvaluation(currentInstr, cur);
     918            return cur;
     919          }
     920        case OpCodes.LastIndexMax: {
     921            var cur = Evaluate(dataset, ref row, state, traceDict);
     922            cur = AggregateApply(cur,
     923              s => 0,
     924              v => (double)(v.Count - DoubleVector.Build.DenseOfEnumerable(v.Reverse()).MaximumIndex()) / v.Count);
     925
     926            TraceEvaluation(currentInstr, cur);
     927            return cur;
     928          }
     929        case OpCodes.LastIndexMin: {
     930            var cur = Evaluate(dataset, ref row, state, traceDict);
     931            cur = AggregateApply(cur,
     932              s => 0,
     933              v => (double)(v.Count - DoubleVector.Build.DenseOfEnumerable(v.Reverse()).MinimumIndex()) / v.Count);
     934            TraceEvaluation(currentInstr, cur);
     935            return cur;
     936          }
     937        case OpCodes.LongestStrikeAboveMean: {
     938            var cur = Evaluate(dataset, ref row, state, traceDict);
     939            cur = AggregateApply(cur,
     940              s => 0,
     941              v => LongestStrikeAbove(v, Statistics.Mean(v)));
     942            TraceEvaluation(currentInstr, cur);
     943            return cur;
     944          }
     945        case OpCodes.LongestStrikeAboveMedian: {
     946            var cur = Evaluate(dataset, ref row, state, traceDict);
     947            cur = AggregateApply(cur,
     948              s => 0,
     949              v => LongestStrikeAbove(v, Statistics.Median(v)));
     950            TraceEvaluation(currentInstr, cur);
     951            return cur;
     952          }
     953        case OpCodes.LongestStrikeBelowMean: {
     954            var cur = Evaluate(dataset, ref row, state, traceDict);
     955            cur = AggregateApply(cur,
     956              s => 0,
     957              v => LongestStrikeBelow(v, Statistics.Mean(v)));
     958            TraceEvaluation(currentInstr, cur);
     959            return cur;
     960          }
     961        case OpCodes.LongestStrikeBelowMedian: {
     962            var cur = Evaluate(dataset, ref row, state, traceDict);
     963            cur = AggregateApply(cur,
     964              s => 0,
     965              v => LongestStrikeBelow(v, Statistics.Median(v)));
     966            TraceEvaluation(currentInstr, cur);
     967            return cur;
     968          }
     969        case OpCodes.LongestStrikePositive: {
     970            var cur = Evaluate(dataset, ref row, state, traceDict);
     971            cur = AggregateApply(cur,
     972              s => 0,
     973              v => LongestStrikeAbove(v, 0));
     974            TraceEvaluation(currentInstr, cur);
     975            return cur;
     976          }
     977        case OpCodes.LongestStrikeNegative: {
     978            var cur = Evaluate(dataset, ref row, state, traceDict);
     979            cur = AggregateApply(cur,
     980              s => 0,
     981              v => LongestStrikeAbove(v, 0));
     982            TraceEvaluation(currentInstr, cur);
     983            return cur;
     984          }
     985        case OpCodes.LongestStrikeZero: {
     986            var cur = Evaluate(dataset, ref row, state, traceDict);
     987            cur = AggregateApply(cur,
     988              s => 0,
     989              v => LongestStrikeEqual(v, 0));
     990            TraceEvaluation(currentInstr, cur);
     991            return cur;
     992          }
     993        case OpCodes.MeanAbsoluteChange: {
     994            var cur = Evaluate(dataset, ref row, state, traceDict);
     995            cur = AggregateApply(cur,
     996              s => 0,
     997              v => {
     998                double sum = 0.0;
     999                for (int i = 0; i < v.Count - 1; i++) {
     1000                  sum += Math.Abs(v[i + 1] - v[i]);
     1001                }
     1002
     1003                return sum / v.Count;
     1004              });
     1005            TraceEvaluation(currentInstr, cur);
     1006            return cur;
     1007          }
     1008        case OpCodes.MeanAbsoluteChangeQuantiles: {
     1009            var cur = Evaluate(dataset, ref row, state, traceDict);
     1010            var ql = Evaluate(dataset, ref row, state, traceDict);
     1011            var qu = Evaluate(dataset, ref row, state, traceDict);
     1012            cur = AggregateApply(cur,
     1013              s => 0,
     1014              v => {
     1015                var lowerBound = Statistics.Quantile(v, ql.Scalar);
     1016                var upperBound = Statistics.Quantile(v, qu.Scalar);
     1017                var inBounds = v.Select(e => e > lowerBound && e < upperBound).ToList();
     1018                double sum = 0.0;
     1019                int count = 0;
     1020                for (int i = 0; i < v.Count - 1; i++) {
     1021                  if (inBounds[i] && inBounds[i + 1]) {
     1022                    sum += Math.Abs(v[i + 1] - v[i]);
     1023                    count++;
     1024                  }
     1025                }
     1026
     1027                return sum / count;
     1028              });
     1029            TraceEvaluation(currentInstr, cur);
     1030            return cur;
     1031          }
     1032        case OpCodes.MeanAutocorrelation: {
     1033            var cur = Evaluate(dataset, ref row, state, traceDict);
     1034            cur = AggregateApply(cur,
     1035              s => 0,
     1036              v => {
     1037                double sum = 0.0;
     1038                double mean = Statistics.Mean(v);
     1039                for (int l = 0; l < v.Count; l++) {
     1040                  for (int i = 0; i < v.Count - l; i++) {
     1041                    sum += (v[i] - mean) * (v[i + l] - mean);
     1042                  }
     1043                }
     1044
     1045                return sum / (v.Count - 1) / Statistics.PopulationVariance(v);
     1046              });
     1047            TraceEvaluation(currentInstr, cur);
     1048            return cur;
     1049          }
     1050        case OpCodes.LaggedAutocorrelation: {
     1051            var cur = Evaluate(dataset, ref row, state, traceDict);
     1052            var lVal = Evaluate(dataset, ref row, state, traceDict);
     1053            cur = AggregateApply(cur,
     1054              s => 0,
     1055              v => {
     1056                double sum = 0.0;
     1057                int l = (int)Math.Round(lVal.Scalar);
     1058                double mean = Statistics.Mean(v);
     1059                for (int i = 0; i < v.Count - l; i++) {
     1060                  sum += (v[i] - mean) * (v[i + l] - mean);
     1061                }
     1062
     1063                return sum / Statistics.PopulationVariance(v);
     1064              });
     1065            TraceEvaluation(currentInstr, cur);
     1066            return cur;
     1067          }
     1068        case OpCodes.MeanSecondDerivateCentral: {
     1069            var cur = Evaluate(dataset, ref row, state, traceDict);
     1070            cur = AggregateApply(cur,
     1071              s => 0,
     1072              v => {
     1073                double sum = 0.0;
     1074                for (int i = 1; i < v.Count - 1; i++) {
     1075                  sum += (v[i - 1] - 2 * v[i] + v[i + 1]) / 2;
     1076                }
     1077
     1078                return sum / (v.Count - 2);
     1079              });
     1080            TraceEvaluation(currentInstr, cur);
     1081            return cur;
     1082          }
     1083        case OpCodes.NumberPeaksOfSize: {
     1084            var cur = Evaluate(dataset, ref row, state, traceDict);
     1085            var l = Evaluate(dataset, ref row, state, traceDict);
     1086            cur = AggregateApply(cur,
     1087              s => 0,
     1088              v => CountNumberOfPeaks(v, l.Scalar));
     1089            TraceEvaluation(currentInstr, cur);
     1090            return cur;
     1091          }
     1092        case OpCodes.LargeNumberOfPeaks: {
     1093            var cur = Evaluate(dataset, ref row, state, traceDict);
     1094            var l = Evaluate(dataset, ref row, state, traceDict);
     1095            var m = Evaluate(dataset, ref row, state, traceDict);
     1096            cur = AggregateApply(cur,
     1097              s => 0,
     1098              v => CountNumberOfPeaks(v, l.Scalar) > m.Scalar ? 1.0 : 0.0);
     1099            TraceEvaluation(currentInstr, cur);
     1100            return cur;
     1101          }
     1102        case OpCodes.TimeReversalAsymmetryStatistic: {
     1103            var cur = Evaluate(dataset, ref row, state, traceDict);
     1104            var l = Evaluate(dataset, ref row, state, traceDict);
     1105            cur = AggregateApply(cur,
     1106              s => 0,
     1107              v => {
     1108                int lag = (int)Math.Round(l.Scalar);
     1109                double sum = 0.0;
     1110                for (int i = 0; i < v.Count - 2 * lag; i++) {
     1111                  sum += Math.Pow(v[i + 2 * lag], 2) * v[i + lag] - v[i + lag] * Math.Pow(v[i], 2);
     1112                }
     1113
     1114                return sum / (v.Count - 2 * lag);
     1115              });
     1116            TraceEvaluation(currentInstr, cur);
     1117            return cur;
     1118          }
     1119        #endregion
     1120
    7541121        default:
    7551122          throw new NotSupportedException($"Unsupported OpCode: {currentInstr.opCode}");
    7561123      }
    7571124    }
     1125
     1126    private static int LongestStrikeAbove(DoubleVector v, double threshold) {
     1127      int longestStrike = 0, currentStrike = 0;
     1128      for (int i = 0; i < v.Count; i++) {
     1129        if (v[i] > threshold) {
     1130          currentStrike++;
     1131          longestStrike = Math.Max(longestStrike, currentStrike);
     1132        } else
     1133          currentStrike = 0;
     1134      }
     1135      return longestStrike;
     1136    }
     1137    private static int LongestStrikeBelow(DoubleVector v, double threshold) {
     1138      int longestStrike = 0, currentStrike = 0;
     1139      for (int i = 0; i < v.Count; i++) {
     1140        if (v[i] < threshold) {
     1141          currentStrike++;
     1142          longestStrike = Math.Max(longestStrike, currentStrike);
     1143        } else
     1144          currentStrike = 0;
     1145      }
     1146      return longestStrike;
     1147    }
     1148
     1149    private static int LongestStrikeEqual(DoubleVector v, double value, double epsilon = double.Epsilon) {
     1150      int longestStrike = 0, currentStrike = 0;
     1151      for (int i = 0; i < v.Count; i++) {
     1152        if (v[i].IsAlmost(epsilon)) {
     1153          currentStrike++;
     1154          longestStrike = Math.Max(longestStrike, currentStrike);
     1155        } else
     1156          currentStrike = 0;
     1157      }
     1158      return longestStrike;
     1159    }
     1160    private static int CountNumberOfPeaks(DoubleVector v, double heightDifference) {
     1161      int count = 0;
     1162      for (int i = 0; i < v.Count; i++) {
     1163        bool largerThanPrev = i == 0 || v[i] > v[i - 1] + heightDifference;
     1164        bool largerThanNext = i == v.Count - 1 || v[i] > v[i + 1] + heightDifference;
     1165        if (largerThanPrev && largerThanNext)
     1166          count++;
     1167      }
     1168      return count;
     1169    }
    7581170  }
    7591171}
Note: See TracChangeset for help on using the changeset viewer.