Free cookie consent management tool by TermsFeed Policy Generator

source: branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Grammars/TypeCoherentVectorTimeSeriesExpressionGrammar.cs @ 18060

Last change on this file since 18060 was 18060, checked in by pfleck, 3 years ago

#3040 Added a subvector symbol with ranges as subtrees.

File size: 19.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HEAL.Attic;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using DoubleVector = MathNet.Numerics.LinearAlgebra.Vector<double>;
29
30namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
31  [StorableType("88895D71-3D3F-44A7-A531-D5D59963AABE")]
32  [Item("TypeCoherentVectorTimeSeriesExpressionGrammar", "Represents a grammar for functional expressions in which special syntactic constraints are enforced so that vector and scalar expressions are not mixed.")]
33  public class TypeCoherentVectorTimeSeriesExpressionGrammar : DataAnalysisGrammar, ISymbolicDataAnalysisGrammar {
34    private const string ArithmeticFunctionsName = "Arithmetic Functions";
35    private const string TrigonometricFunctionsName = "Trigonometric Functions";
36    private const string ExponentialFunctionsName = "Exponential and Logarithmic Functions";
37    private const string PowerFunctionsName = "Power Functions";
38    private const string TerminalsName = "Terminals";
39    private const string VectorAggregationName = "Aggregations";
40    private const string VectorStatisticsName = "Vector Statistics";
41    private const string VectorDistancesName = "Vector Distances";
42    private const string VectorDistributionCharacteristicsName = "Distribution Characteristics";
43    private const string VectorTimeSeriesDynamicsName = "Time Series Dynamics";
44    private const string ScalarSymbolsName = "Scalar Symbols";
45
46    private const string VectorArithmeticFunctionsName = "Vector Arithmetic Functions";
47    private const string VectorTrigonometricFunctionsName = "Vector Trigonometric Functions";
48    private const string VectorExponentialFunctionsName = "Vector Exponential and Logarithmic Functions";
49    private const string VectorPowerFunctionsName = "Vector Power Functions";
50    private const string VectorTerminalsName = "Vector Terminals";
51    private const string VectorSymbolsName = "Vector Symbols";
52
53    private const string VectorManipulationSymbolsName = "Vector Manipulation Symbols";
54    private const string VectorSubVectorSymbolsName = "Vector SubVector Symbols";
55
56    private const string RealValuedSymbolsName = "Real Valued Symbols";
57
58    [StorableConstructor]
59    protected TypeCoherentVectorTimeSeriesExpressionGrammar(StorableConstructorFlag _) : base(_) { }
60    protected TypeCoherentVectorTimeSeriesExpressionGrammar(TypeCoherentVectorTimeSeriesExpressionGrammar original, Cloner cloner) : base(original, cloner) { }
61    public TypeCoherentVectorTimeSeriesExpressionGrammar()
62      : base(ItemAttribute.GetName(typeof(TypeCoherentVectorTimeSeriesExpressionGrammar)), ItemAttribute.GetDescription(typeof(TypeCoherentVectorTimeSeriesExpressionGrammar))) {
63      Initialize();
64    }
65    public override IDeepCloneable Clone(Cloner cloner) {
66      return new TypeCoherentVectorTimeSeriesExpressionGrammar(this, cloner);
67    }
68
69    private void Initialize() {
70      #region scalar symbol declaration
71      var add = new Addition();
72      var sub = new Subtraction();
73      var mul = new Multiplication();
74      var div = new Division();
75
76      var sin = new Sine();
77      var cos = new Cosine();
78      var tan = new Tangent();
79
80      var exp = new Exponential();
81      var log = new Logarithm();
82
83      var square = new Square();
84      var sqrt = new SquareRoot();
85      var cube = new Cube();
86      var cubeRoot = new CubeRoot();
87      var power = new Power();
88      var root = new Root();
89
90      var constant = new Constant { MinValue = -20, MaxValue = 20 };
91      var constantZeroToOne = new Constant { Name = "Constant [0-1]", MinValue = 0, MaxValue = 1 };
92      var variable = new Variable();
93      var binFactorVariable = new BinaryFactorVariable();
94      var factorVariable = new FactorVariable();
95
96      var mean = new Mean();
97      var median = new Median() { Enabled = false };
98      var sd = new StandardDeviation();
99      var sum = new Sum();
100      var length = new Length() { Enabled = false };
101      var min = new Min() { Enabled = false };
102      var max = new Max() { Enabled = false };
103      var quantile = new Quantile() { Enabled = false };
104      var variance = new Variance() { Enabled = false };
105      var skewness = new Skewness() { Enabled = false };
106      var kurtosis = new Kurtosis() { Enabled = false };
107      var euclideanDistance = new EuclideanDistance() { Enabled = false };
108      var covariance = new Covariance() { Enabled = false };
109      #endregion
110
111      #region vector symbol declaration
112      var vectoradd = new Addition() { Name = "Vector Addition" };
113      var vectorsub = new Subtraction() { Name = "Vector Subtraction" };
114      var vectormul = new Multiplication() { Name = "Vector Multiplication" };
115      var vectordiv = new Division() { Name = "Vector Division" };
116
117      var vectorsin = new Sine() { Name = "Vector Sine" };
118      var vectorcos = new Cosine() { Name = "Vector Cosine" };
119      var vectortan = new Tangent() { Name = "Vector Tangent" };
120
121      var vectorexp = new Exponential() { Name = "Vector Exponential" };
122      var vectorlog = new Logarithm() { Name = "Vector Logarithm" };
123
124      var vectorsquare = new Square() { Name = "Vector Square" };
125      var vectorsqrt = new SquareRoot() { Name = "Vector SquareRoot" };
126      var vectorcube = new Cube() { Name = "Vector Cube" };
127      var vectorcubeRoot = new CubeRoot() { Name = "Vector CubeRoot" };
128      var vectorpower = new Power() { Name = "Vector Power" };
129      var vectorroot = new Root() { Name = "Vector Root" };
130
131      var vectorvariable = new Variable() { Name = "Vector Variable" };
132
133      #region TimeSeries Symbols
134      var absoluteEnergy = new AbsoluteEnergy() { Enabled = false };
135      var binnedEntropy = new BinnedEntropy() { Enabled = false };
136      var hasLargeStandardDeviation = new HasLargeStandardDeviation() { Enabled = false };
137      var hasVarianceLargerThanStd = new HasVarianceLargerThanStd() { Enabled = false };
138      var isSymmetricLooking = new IsSymmetricLooking() { Enabled = false };
139      var numberDataPointsAboveMean = new NumberDataPointsAboveMean() { Enabled = false };
140      var numberDataPointsAboveMedian = new NumberDataPointsAboveMedian() { Enabled = false };
141      var numberDataPointsBelowMean = new NumberDataPointsBelowMean() { Enabled = false };
142      var numberDataPointsBelowMedian = new NumberDataPointsBelowMedian() { Enabled = false };
143
144      var arimaModelCoefficients = new ArimaModelCoefficients() { Enabled = false };
145      var continuousWaveletTransformationCoefficients = new ContinuousWaveletTransformationCoefficients() { Enabled = false };
146      var fastFourierTransformationCoefficient = new FastFourierTransformationCoefficient() { Enabled = false };
147      var firstIndexMax = new FirstIndexMax() { Enabled = false };
148      var firstIndexMin = new FirstIndexMin() { Enabled = false };
149      var lastIndexMax = new LastIndexMax() { Enabled = false };
150      var lastIndexMin = new LastIndexMin() { Enabled = false };
151      var longestStrikeAboveMean = new LongestStrikeAboveMean() { Enabled = false };
152      var longestStrikeAboveMedian = new LongestStrikeAboveMedian() { Enabled = false };
153      var longestStrikeBelowMean = new LongestStrikeBelowMean() { Enabled = false };
154      var longestStrikeBelowMedian = new LongestStrikeBelowMedian() { Enabled = false };
155      var longestStrikePositive = new LongestStrikePositive() { Enabled = false };
156      var longestStrikeNegative = new LongestStrikeNegative() { Enabled = false };
157      var longestStrikeZero = new LongestStrikeZero() { Enabled = false };
158      var meanAbsoluteChange = new MeanAbsoluteChange() { Enabled = false };
159      var meanAbsoluteChangeQuantiles = new MeanAbsoluteChangeQuantiles() { Enabled = false };
160      var meanAutocorrelation = new MeanAutocorrelation() { Enabled = false };
161      var laggedAutocorrelation = new LaggedAutocorrelation() { Enabled = false };
162      var meanSecondDerivateCentral = new MeanSecondDerivateCentral() { Enabled = false };
163      var numberPeaksOfSize = new NumberPeaksOfSize() { Enabled = false };
164      var largeNumberOfPeaks = new LargeNumberOfPeaks() { Enabled = false };
165      var timeReversalAsymmetryStatistic = new TimeReversalAsymmetryStatistic() { Enabled = false };
166      #endregion
167      #endregion
168
169      #region vector manipulation symbol declaration
170      var subvectorLocal = new SubVector();
171      var subvectorSubtree = new SubVectorSubtree();
172      #endregion
173
174      #region group symbol declaration
175      var arithmeticSymbols = new GroupSymbol(ArithmeticFunctionsName, new List<ISymbol>() { add, sub, mul, div });
176      var trigonometricSymbols = new GroupSymbol(TrigonometricFunctionsName, new List<ISymbol>() { sin, cos, tan });
177      var exponentialAndLogarithmicSymbols = new GroupSymbol(ExponentialFunctionsName, new List<ISymbol> { exp, log });
178      var powerSymbols = new GroupSymbol(PowerFunctionsName, new List<ISymbol> { square, sqrt, cube, cubeRoot, power, root });
179      var terminalSymbols = new GroupSymbol(TerminalsName, new List<ISymbol> { constant, constantZeroToOne, variable, binFactorVariable, factorVariable });
180      var statisticsSymbols = new GroupSymbol(VectorStatisticsName, new List<ISymbol> { mean, median, sd, sum, length, min, max, quantile, variance, skewness, kurtosis });
181      var distancesSymbols = new GroupSymbol(VectorDistancesName, new List<ISymbol> { euclideanDistance, covariance });
182      var distributionCharacteristicsSymbols = new GroupSymbol(VectorDistributionCharacteristicsName, new List<ISymbol> {
183        absoluteEnergy, binnedEntropy, hasLargeStandardDeviation, hasVarianceLargerThanStd, isSymmetricLooking,
184        numberDataPointsAboveMean, numberDataPointsAboveMedian, numberDataPointsBelowMean, numberDataPointsBelowMedian
185      });
186      var timeSeriesDynamicsSymbols = new GroupSymbol(VectorTimeSeriesDynamicsName, new List<ISymbol> {
187        arimaModelCoefficients, continuousWaveletTransformationCoefficients, fastFourierTransformationCoefficient,
188        firstIndexMax, firstIndexMin, lastIndexMax, lastIndexMin,
189        longestStrikeAboveMean, longestStrikeAboveMedian, longestStrikeBelowMean, longestStrikeBelowMedian, longestStrikePositive, longestStrikePositive, longestStrikeNegative, longestStrikeZero,
190        meanAbsoluteChange, meanAbsoluteChangeQuantiles, meanAutocorrelation, laggedAutocorrelation, meanSecondDerivateCentral, meanSecondDerivateCentral,
191        numberPeaksOfSize, largeNumberOfPeaks, timeReversalAsymmetryStatistic
192      });
193      var aggregationSymbols = new GroupSymbol(VectorAggregationName, new List<ISymbol> { statisticsSymbols, distancesSymbols, distributionCharacteristicsSymbols, timeSeriesDynamicsSymbols });
194      var scalarSymbols = new GroupSymbol(ScalarSymbolsName, new List<ISymbol>() { arithmeticSymbols, trigonometricSymbols, exponentialAndLogarithmicSymbols, powerSymbols, terminalSymbols, aggregationSymbols });
195
196      var vectorarithmeticSymbols = new GroupSymbol(VectorArithmeticFunctionsName, new List<ISymbol>() { vectoradd, vectorsub, vectormul, vectordiv });
197      var vectortrigonometricSymbols = new GroupSymbol(VectorTrigonometricFunctionsName, new List<ISymbol>() { vectorsin, vectorcos, vectortan });
198      var vectorexponentialAndLogarithmicSymbols = new GroupSymbol(VectorExponentialFunctionsName, new List<ISymbol> { vectorexp, vectorlog });
199      var vectorpowerSymbols = new GroupSymbol(VectorPowerFunctionsName, new List<ISymbol> { vectorsquare, vectorsqrt, vectorcube, vectorcubeRoot, vectorpower, vectorroot });
200      var vectorterminalSymbols = new GroupSymbol(VectorTerminalsName, new List<ISymbol> { vectorvariable });
201      var vectorSymbols = new GroupSymbol(VectorSymbolsName, new List<ISymbol>() { vectorarithmeticSymbols, vectortrigonometricSymbols, vectorexponentialAndLogarithmicSymbols, vectorpowerSymbols, vectorterminalSymbols });
202
203      var vectorSubVectorSymbols = new GroupSymbol(VectorSubVectorSymbolsName, new List<ISymbol>() { subvectorLocal, subvectorSubtree });
204      var vectorManipulationSymbols = new GroupSymbol(VectorManipulationSymbolsName, new List<ISymbol>() { vectorSubVectorSymbols });
205
206      //var realValuedSymbols = new GroupSymbol(RealValuedSymbolsName, new List<ISymbol> { scalarSymbols, vectorSymbols });
207      #endregion
208
209      //AddSymbol(realValuedSymbols);
210      AddSymbol(scalarSymbols);
211      AddSymbol(vectorSymbols);
212      AddSymbol(vectorManipulationSymbols);
213
214      #region subtree count configuration
215      SetSubtreeCount(arithmeticSymbols, 2, 2);
216      SetSubtreeCount(trigonometricSymbols, 1, 1);
217      SetSubtreeCount(exponentialAndLogarithmicSymbols, 1, 1);
218      SetSubtreeCount(square, 1, 1);
219      SetSubtreeCount(sqrt, 1, 1);
220      SetSubtreeCount(cube, 1, 1);
221      SetSubtreeCount(cubeRoot, 1, 1);
222      SetSubtreeCount(power, 2, 2);
223      SetSubtreeCount(root, 2, 2);
224      SetSubtreeCount(exponentialAndLogarithmicSymbols, 1, 1);
225      SetSubtreeCount(terminalSymbols, 0, 0);
226      foreach (var sy in new Symbol[] { mean, median, sd, sum, length, min, max, variance, skewness, kurtosis })
227        SetSubtreeCount(sy, 1, 1);
228      SetSubtreeCount(quantile, 2, 2);
229      SetSubtreeCount(distancesSymbols, 2, 2);
230      #region TimeSeries symbols
231      foreach (var sy in new Symbol[] {
232        absoluteEnergy, hasLargeStandardDeviation, hasVarianceLargerThanStd, isSymmetricLooking,
233         numberDataPointsAboveMean, numberDataPointsAboveMedian, numberDataPointsBelowMean, numberDataPointsBelowMedian
234      }) SetSubtreeCount(sy, 1, 1);
235      foreach (var sy in new Symbol[] { binnedEntropy })
236        SetSubtreeCount(sy, 2, 2);
237
238      foreach (var sy in new Symbol[] {
239        firstIndexMax, firstIndexMin, lastIndexMax, lastIndexMin,
240        longestStrikeAboveMean, longestStrikeAboveMedian, longestStrikeBelowMean, longestStrikeBelowMedian,
241        longestStrikePositive, longestStrikeNegative, longestStrikeZero,
242        meanAbsoluteChange, meanAutocorrelation, meanSecondDerivateCentral
243      }) SetSubtreeCount(sy, 1, 1);
244      foreach (var sy in new Symbol[] {
245        fastFourierTransformationCoefficient, laggedAutocorrelation, numberPeaksOfSize, timeReversalAsymmetryStatistic
246      }) SetSubtreeCount(sy, 2, 2);
247      foreach (var sy in new Symbol[] {
248        arimaModelCoefficients, continuousWaveletTransformationCoefficients,
249        meanAbsoluteChangeQuantiles, largeNumberOfPeaks
250      }) SetSubtreeCount(sy, 3, 3);
251      #endregion
252
253      SetSubtreeCount(vectorarithmeticSymbols, 2, 2);
254      SetSubtreeCount(vectortrigonometricSymbols, 1, 1);
255      SetSubtreeCount(vectorexponentialAndLogarithmicSymbols, 1, 1);
256      SetSubtreeCount(vectorsquare, 1, 1);
257      SetSubtreeCount(vectorsqrt, 1, 1);
258      SetSubtreeCount(vectorcube, 1, 1);
259      SetSubtreeCount(vectorcubeRoot, 1, 1);
260      SetSubtreeCount(vectorpower, 2, 2);
261      SetSubtreeCount(vectorroot, 2, 2);
262      SetSubtreeCount(vectorexponentialAndLogarithmicSymbols, 1, 1);
263      SetSubtreeCount(vectorterminalSymbols, 0, 0);
264
265      SetSubtreeCount(subvectorLocal, 1, 1);
266      SetSubtreeCount(subvectorSubtree, 3, 3);
267      #endregion
268
269      #region allowed child symbols configuration
270      AddAllowedChildSymbol(StartSymbol, scalarSymbols);
271
272      AddAllowedChildSymbol(arithmeticSymbols, scalarSymbols);
273      AddAllowedChildSymbol(trigonometricSymbols, scalarSymbols);
274      AddAllowedChildSymbol(exponentialAndLogarithmicSymbols, scalarSymbols);
275      AddAllowedChildSymbol(powerSymbols, scalarSymbols, 0);
276      AddAllowedChildSymbol(power, constant, 1);
277      AddAllowedChildSymbol(root, constant, 1);
278      AddAllowedChildSymbol(aggregationSymbols, vectorSymbols, 0);
279      AddAllowedChildSymbol(statisticsSymbols, vectorSubVectorSymbols, 0);
280      AddAllowedChildSymbol(quantile, constantZeroToOne, 1);
281      AddAllowedChildSymbol(distancesSymbols, vectorSymbols, 1);
282      AddAllowedChildSymbol(distributionCharacteristicsSymbols, vectorSymbols, 0);
283      AddAllowedChildSymbol(distributionCharacteristicsSymbols, vectorSubVectorSymbols, 0);
284      AddAllowedChildSymbol(distributionCharacteristicsSymbols, constantZeroToOne, 1);
285      AddAllowedChildSymbol(timeSeriesDynamicsSymbols, vectorSymbols, 0);
286      AddAllowedChildSymbol(timeSeriesDynamicsSymbols, vectorSubVectorSymbols, 0);
287      AddAllowedChildSymbol(timeSeriesDynamicsSymbols, constantZeroToOne, 1);
288      AddAllowedChildSymbol(timeSeriesDynamicsSymbols, constantZeroToOne, 2);
289
290      AddAllowedChildSymbol(vectorarithmeticSymbols, vectorSymbols);
291      AddAllowedChildSymbol(vectorarithmeticSymbols, scalarSymbols);
292      AddAllowedChildSymbol(vectortrigonometricSymbols, vectorSymbols);
293      AddAllowedChildSymbol(vectorexponentialAndLogarithmicSymbols, vectorSymbols);
294      AddAllowedChildSymbol(vectorpowerSymbols, vectorSymbols, 0);
295      AddAllowedChildSymbol(vectorpower, constantZeroToOne, 1);
296      AddAllowedChildSymbol(vectorroot, constantZeroToOne, 1);
297
298      AddAllowedChildSymbol(subvectorLocal, vectorSymbols);
299      AddAllowedChildSymbol(subvectorSubtree, vectorSymbols, 0);
300      AddAllowedChildSymbol(subvectorSubtree, scalarSymbols, 1);
301      AddAllowedChildSymbol(subvectorSubtree, scalarSymbols, 2);
302      #endregion
303
304      #region default enabled/disabled
305      var disabledByDefault = new[] {
306        TrigonometricFunctionsName, ExponentialFunctionsName, PowerFunctionsName,
307        VectorTrigonometricFunctionsName, VectorExponentialFunctionsName, VectorPowerFunctionsName,
308        VectorManipulationSymbolsName
309      };
310      foreach (var grp in Symbols.Where(sym => disabledByDefault.Contains(sym.Name)))
311        grp.Enabled = false;
312      #endregion
313    }
314
315    public override void ConfigureVariableSymbols(IDataAnalysisProblemData problemData) {
316      base.ConfigureVariableSymbols(problemData);
317
318      var dataset = problemData.Dataset;
319      foreach (var varSymbol in Symbols.OfType<VariableBase>().Where(sym => sym.Name == "Variable")) {
320        if (!varSymbol.Fixed) {
321          varSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => dataset.VariableHasType<double>(x));
322          varSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => dataset.VariableHasType<double>(x));
323          varSymbol.VariableDataType = typeof(double);
324        }
325      }
326      foreach (var varSymbol in Symbols.OfType<VariableBase>().Where(sym => sym.Name == "Vector Variable")) {
327        if (!varSymbol.Fixed) {
328          varSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => dataset.VariableHasType<DoubleVector>(x));
329          varSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => dataset.VariableHasType<DoubleVector>(x));
330          varSymbol.VariableDataType = typeof(DoubleVector);
331        }
332      }
333    }
334  }
335}
Note: See TracBrowser for help on using the repository browser.