Free cookie consent management tool by TermsFeed Policy Generator

source: branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interpreter/SymbolicDataAnalysisExpressionTreeVectorInterpreter.cs @ 17556

Last change on this file since 17556 was 17556, checked in by pfleck, 4 years ago

#3040 Some corner cases for empty or length-one vectors now return NaN.

File size: 20.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Parameters;
29using HEAL.Attic;
30using MathNet.Numerics.Statistics;
31
32using DoubleVector = MathNet.Numerics.LinearAlgebra.Vector<double>;
33
34namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
35  [StorableType("DE68A1D9-5AFC-4DDD-AB62-29F3B8FC28E0")]
36  [Item("SymbolicDataAnalysisExpressionTreeVectorInterpreter", "Interpreter for symbolic expression trees including vector arithmetic.")]
37  public class SymbolicDataAnalysisExpressionTreeVectorInterpreter : ParameterizedNamedItem, ISymbolicDataAnalysisExpressionTreeInterpreter {
38    [StorableType("2612504E-AD5F-4AE2-B60E-98A5AB59E164")]
39    public enum Aggregation {
40      Mean,
41      Median,
42      Sum,
43      NaN,
44      Exception
45    }
46
47    private const string EvaluatedSolutionsParameterName = "EvaluatedSolutions";
48    private const string FinalAggregationParameterName = "FinalAggregation";
49
50    public override bool CanChangeName {
51      get { return false; }
52    }
53
54    public override bool CanChangeDescription {
55      get { return false; }
56    }
57
58    #region parameter properties
59    public IFixedValueParameter<IntValue> EvaluatedSolutionsParameter {
60      get { return (IFixedValueParameter<IntValue>)Parameters[EvaluatedSolutionsParameterName]; }
61    }
62    public IFixedValueParameter<EnumValue<Aggregation>> FinalAggregationParameter {
63      get { return (IFixedValueParameter<EnumValue<Aggregation>>)Parameters[FinalAggregationParameterName]; }
64    }
65    #endregion
66
67    #region properties
68    public int EvaluatedSolutions {
69      get { return EvaluatedSolutionsParameter.Value.Value; }
70      set { EvaluatedSolutionsParameter.Value.Value = value; }
71    }
72    public Aggregation FinalAggregation {
73      get { return FinalAggregationParameter.Value.Value; }
74      set { FinalAggregationParameter.Value.Value = value; }
75    }
76    #endregion
77
78    [StorableConstructor]
79    protected SymbolicDataAnalysisExpressionTreeVectorInterpreter(StorableConstructorFlag _) : base(_) { }
80
81    protected SymbolicDataAnalysisExpressionTreeVectorInterpreter(SymbolicDataAnalysisExpressionTreeVectorInterpreter original, Cloner cloner)
82      : base(original, cloner) { }
83
84    public override IDeepCloneable Clone(Cloner cloner) {
85      return new SymbolicDataAnalysisExpressionTreeVectorInterpreter(this, cloner);
86    }
87
88    public SymbolicDataAnalysisExpressionTreeVectorInterpreter()
89      : this("SymbolicDataAnalysisExpressionTreeVectorInterpreter", "Interpreter for symbolic expression trees including vector arithmetic.") {
90    }
91
92    protected SymbolicDataAnalysisExpressionTreeVectorInterpreter(string name, string description)
93      : base(name, description) {
94      Parameters.Add(new FixedValueParameter<IntValue>(EvaluatedSolutionsParameterName, "A counter for the total number of solutions the interpreter has evaluated", new IntValue(0)));
95      Parameters.Add(new FixedValueParameter<EnumValue<Aggregation>>(FinalAggregationParameterName, "If root node of the expression tree results in a Vector it is aggregated according to this parameter", new EnumValue<Aggregation>(Aggregation.Mean)));
96    }
97
98    [StorableHook(HookType.AfterDeserialization)]
99    private void AfterDeserialization() {
100      if (!Parameters.ContainsKey(FinalAggregationParameterName)) {
101        Parameters.Add(new FixedValueParameter<EnumValue<Aggregation>>(FinalAggregationParameterName, "If root node of the expression tree results in a Vector it is aggregated according to this parameter", new EnumValue<Aggregation>(Aggregation.Mean)));
102      }
103    }
104
105    #region IStatefulItem
106    public void InitializeState() {
107      EvaluatedSolutions = 0;
108    }
109
110    public void ClearState() { }
111    #endregion
112
113    private readonly object syncRoot = new object();
114    public IEnumerable<double> GetSymbolicExpressionTreeValues(ISymbolicExpressionTree tree, IDataset dataset, IEnumerable<int> rows) {
115      lock (syncRoot) {
116        EvaluatedSolutions++; // increment the evaluated solutions counter
117      }
118      var state = PrepareInterpreterState(tree, dataset);
119
120      foreach (var rowEnum in rows) {
121        int row = rowEnum;
122        var result = Evaluate(dataset, ref row, state);
123        if (result.IsScalar)
124          yield return result.Scalar;
125        else if (result.IsVector) {
126          if (FinalAggregation == Aggregation.Mean) yield return result.Vector.Mean();
127          else if (FinalAggregation == Aggregation.Median) yield return Statistics.Median(result.Vector);
128          else if (FinalAggregation == Aggregation.Sum) yield return result.Vector.Sum();
129          else if (FinalAggregation == Aggregation.Exception) throw new InvalidOperationException("Result of the tree is not a scalar.");
130          else yield return double.NaN;
131        } else
132          yield return double.NaN;
133        state.Reset();
134      }
135    }
136
137    private static InterpreterState PrepareInterpreterState(ISymbolicExpressionTree tree, IDataset dataset) {
138      Instruction[] code = SymbolicExpressionTreeCompiler.Compile(tree, OpCodes.MapSymbolToOpCode);
139      int necessaryArgStackSize = 0;
140      foreach (Instruction instr in code) {
141        if (instr.opCode == OpCodes.Variable) {
142          var variableTreeNode = (VariableTreeNode)instr.dynamicNode;
143          if (dataset.VariableHasType<double>(variableTreeNode.VariableName))
144            instr.data = dataset.GetReadOnlyDoubleValues(variableTreeNode.VariableName);
145          else if (dataset.VariableHasType<DoubleVector>(variableTreeNode.VariableName))
146            instr.data = dataset.GetReadOnlyDoubleVectorValues(variableTreeNode.VariableName);
147          else throw new NotSupportedException($"Type of variable {variableTreeNode.VariableName} is not supported.");
148        } else if (instr.opCode == OpCodes.FactorVariable) {
149          var factorTreeNode = instr.dynamicNode as FactorVariableTreeNode;
150          instr.data = dataset.GetReadOnlyStringValues(factorTreeNode.VariableName);
151        } else if (instr.opCode == OpCodes.BinaryFactorVariable) {
152          var factorTreeNode = instr.dynamicNode as BinaryFactorVariableTreeNode;
153          instr.data = dataset.GetReadOnlyStringValues(factorTreeNode.VariableName);
154        } else if (instr.opCode == OpCodes.LagVariable) {
155          var laggedVariableTreeNode = (LaggedVariableTreeNode)instr.dynamicNode;
156          instr.data = dataset.GetReadOnlyDoubleValues(laggedVariableTreeNode.VariableName);
157        } else if (instr.opCode == OpCodes.VariableCondition) {
158          var variableConditionTreeNode = (VariableConditionTreeNode)instr.dynamicNode;
159          instr.data = dataset.GetReadOnlyDoubleValues(variableConditionTreeNode.VariableName);
160        } else if (instr.opCode == OpCodes.Call) {
161          necessaryArgStackSize += instr.nArguments + 1;
162        }
163      }
164      return new InterpreterState(code, necessaryArgStackSize);
165    }
166
167
168    public struct EvaluationResult {
169      public double Scalar { get; }
170      public bool IsScalar => !double.IsNaN(Scalar);
171
172      public DoubleVector Vector { get; }
173      public bool IsVector => !(Vector.Count == 1 && double.IsNaN(Vector[0]));
174
175      public bool IsNaN => !IsScalar && !IsVector;
176
177      public EvaluationResult(double scalar) {
178        Scalar = scalar;
179        Vector = NaNVector;
180      }
181      public EvaluationResult(DoubleVector vector) {
182        if (vector == null) throw new ArgumentNullException(nameof(vector));
183        Vector = vector;
184        Scalar = double.NaN;
185      }
186
187      public override string ToString() {
188        if (IsScalar) return Scalar.ToString();
189        if (IsVector) return Vector.ToVectorString();
190        return "NaN";
191      }
192
193      private static readonly DoubleVector NaNVector = DoubleVector.Build.Dense(1, double.NaN);
194      public static readonly EvaluationResult NaN = new EvaluationResult(double.NaN);
195    }
196
197    private static EvaluationResult ArithmeticApply(EvaluationResult lhs, EvaluationResult rhs,
198      Func<double, double, double> ssFunc = null,
199      Func<double, DoubleVector, DoubleVector> svFunc = null,
200      Func<DoubleVector, double, DoubleVector> vsFunc = null,
201      Func<DoubleVector, DoubleVector, DoubleVector> vvFunc = null) {
202      if (lhs.IsScalar && rhs.IsScalar && ssFunc != null) return new EvaluationResult(ssFunc(lhs.Scalar, rhs.Scalar));
203      if (lhs.IsScalar && rhs.IsVector && svFunc != null) return new EvaluationResult(svFunc(lhs.Scalar, rhs.Vector));
204      if (lhs.IsVector && rhs.IsScalar && vsFunc != null) return new EvaluationResult(vsFunc(lhs.Vector, rhs.Scalar));
205      if (lhs.IsVector && rhs.IsVector && vvFunc != null) return new EvaluationResult(vvFunc(lhs.Vector, rhs.Vector));
206      return EvaluationResult.NaN;
207    }
208
209    private static EvaluationResult FunctionApply(EvaluationResult val,
210      Func<double, double> sFunc = null,
211      Func<DoubleVector, DoubleVector> vFunc = null) {
212      if (val.IsScalar && sFunc != null) return new EvaluationResult(sFunc(val.Scalar));
213      if (val.IsVector && vFunc != null) return new EvaluationResult(vFunc(val.Vector));
214      return EvaluationResult.NaN;
215    }
216    private static EvaluationResult AggregateApply(EvaluationResult val,
217      Func<double, double> sFunc = null,
218      Func<DoubleVector, double> vFunc = null) {
219      if (val.IsScalar && sFunc != null) return new EvaluationResult(sFunc(val.Scalar));
220      if (val.IsVector && vFunc != null) return new EvaluationResult(vFunc(val.Vector));
221      return EvaluationResult.NaN;
222    }
223    private static EvaluationResult AggregateMultipleApply(EvaluationResult lhs, EvaluationResult rhs,
224      Func<double, double, double> ssFunc = null,
225      Func<double, DoubleVector, double> svFunc = null,
226      Func<DoubleVector, double, double> vsFunc = null,
227      Func<DoubleVector, DoubleVector, double> vvFunc = null) {
228      if (lhs.IsScalar && rhs.IsScalar && ssFunc != null) return new EvaluationResult(ssFunc(lhs.Scalar, rhs.Scalar));
229      if (lhs.IsScalar && rhs.IsVector && svFunc != null) return new EvaluationResult(svFunc(lhs.Scalar, rhs.Vector));
230      if (lhs.IsVector && rhs.IsScalar && vsFunc != null) return new EvaluationResult(vsFunc(lhs.Vector, rhs.Scalar));
231      if (lhs.IsVector && rhs.IsVector && vvFunc != null) return new EvaluationResult(vvFunc(lhs.Vector, rhs.Vector));
232      return EvaluationResult.NaN;
233    }
234
235    public virtual EvaluationResult Evaluate(IDataset dataset, ref int row, InterpreterState state) {
236      Instruction currentInstr = state.NextInstruction();
237      switch (currentInstr.opCode) {
238        case OpCodes.Add: {
239            var cur = Evaluate(dataset, ref row, state);
240            for (int i = 1; i < currentInstr.nArguments; i++) {
241              var op = Evaluate(dataset, ref row, state);
242              cur = ArithmeticApply(cur, op,
243                (s1, s2) => s1 + s2,
244                (s1, v2) => s1 + v2,
245                (v1, s2) => v1 + s2,
246                (v1, v2) => v1 + v2);
247            }
248            return cur;
249          }
250        case OpCodes.Sub: {
251            var cur = Evaluate(dataset, ref row, state);
252            for (int i = 1; i < currentInstr.nArguments; i++) {
253              var op = Evaluate(dataset, ref row, state);
254              cur = ArithmeticApply(cur, op,
255                (s1, s2) => s1 - s2,
256                (s1, v2) => s1 - v2,
257                (v1, s2) => v1 - s2,
258                (v1, v2) => v1 - v2);
259            }
260            return cur;
261          }
262        case OpCodes.Mul: {
263            var cur = Evaluate(dataset, ref row, state);
264            for (int i = 1; i < currentInstr.nArguments; i++) {
265              var op = Evaluate(dataset, ref row, state);
266              cur = ArithmeticApply(cur, op,
267                (s1, s2) => s1 * s2,
268                (s1, v2) => s1 * v2,
269                (v1, s2) => v1 * s2,
270                (v1, v2) => v1.PointwiseMultiply(v2));
271            }
272            return cur;
273          }
274        case OpCodes.Div: {
275            var cur = Evaluate(dataset, ref row, state);
276            for (int i = 1; i < currentInstr.nArguments; i++) {
277              var op = Evaluate(dataset, ref row, state);
278              cur = ArithmeticApply(cur, op,
279                (s1, s2) => s1 / s2,
280                (s1, v2) => s1 / v2,
281                (v1, s2) => v1 / s2,
282                (v1, v2) => v1 / v2);
283            }
284            return cur;
285          }
286        case OpCodes.Absolute: {
287            var cur = Evaluate(dataset, ref row, state);
288            return FunctionApply(cur, Math.Abs, DoubleVector.Abs);
289          }
290        case OpCodes.Tanh: {
291            var cur = Evaluate(dataset, ref row, state);
292            return FunctionApply(cur, Math.Tanh, DoubleVector.Tanh);
293          }
294        case OpCodes.Cos: {
295            var cur = Evaluate(dataset, ref row, state);
296            return FunctionApply(cur, Math.Cos, DoubleVector.Cos);
297          }
298        case OpCodes.Sin: {
299            var cur = Evaluate(dataset, ref row, state);
300            return FunctionApply(cur, Math.Sin, DoubleVector.Sin);
301          }
302        case OpCodes.Tan: {
303            var cur = Evaluate(dataset, ref row, state);
304            return FunctionApply(cur, Math.Tan, DoubleVector.Tan);
305          }
306        case OpCodes.Square: {
307            var cur = Evaluate(dataset, ref row, state);
308            return FunctionApply(cur,
309              s => Math.Pow(s, 2),
310              v => v.PointwisePower(2));
311          }
312        case OpCodes.Cube: {
313            var cur = Evaluate(dataset, ref row, state);
314            return FunctionApply(cur,
315              s => Math.Pow(s, 3),
316              v => v.PointwisePower(3));
317          }
318        case OpCodes.Power: {
319            var x = Evaluate(dataset, ref row, state);
320            var y = Evaluate(dataset, ref row, state);
321            return ArithmeticApply(x, y,
322              (s1, s2) => Math.Pow(s1, Math.Round(s2)),
323              (s1, v2) => DoubleVector.Build.Dense(v2.Count, s1).PointwisePower(DoubleVector.Round(v2)),
324              (v1, s2) => v1.PointwisePower(Math.Round(s2)),
325              (v1, v2) => v1.PointwisePower(DoubleVector.Round(v2)));
326          }
327        case OpCodes.SquareRoot: {
328            var cur = Evaluate(dataset, ref row, state);
329            return FunctionApply(cur,
330              s => Math.Sqrt(s),
331              v => DoubleVector.Sqrt(v));
332          }
333        case OpCodes.CubeRoot: {
334            var cur = Evaluate(dataset, ref row, state);
335            return FunctionApply(cur,
336              s => s < 0 ? -Math.Pow(-s, 1.0 / 3.0) : Math.Pow(s, 1.0 / 3.0),
337              v => v.Map(s => s < 0 ? -Math.Pow(-s, 1.0 / 3.0) : Math.Pow(s, 1.0 / 3.0)));
338          }
339        case OpCodes.Root: {
340            var x = Evaluate(dataset, ref row, state);
341            var y = Evaluate(dataset, ref row, state);
342            return ArithmeticApply(x, y,
343              (s1, s2) => Math.Pow(s1, 1.0 / Math.Round(s2)),
344              (s1, v2) => DoubleVector.Build.Dense(v2.Count, s1).PointwisePower(1.0 / DoubleVector.Round(v2)),
345              (v1, s2) => v1.PointwisePower(1.0 / Math.Round(s2)),
346              (v1, v2) => v1.PointwisePower(1.0 / DoubleVector.Round(v2)));
347          }
348        case OpCodes.Exp: {
349            var cur = Evaluate(dataset, ref row, state);
350            return FunctionApply(cur,
351              s => Math.Exp(s),
352              v => DoubleVector.Exp(v));
353          }
354        case OpCodes.Log: {
355            var cur = Evaluate(dataset, ref row, state);
356            return FunctionApply(cur,
357              s => Math.Log(s),
358              v => DoubleVector.Log(v));
359          }
360        case OpCodes.Sum: {
361            var cur = Evaluate(dataset, ref row, state);
362            return AggregateApply(cur,
363              s => s,
364              v => v.Sum());
365          }
366        case OpCodes.Mean: {
367            var cur = Evaluate(dataset, ref row, state);
368            return AggregateApply(cur,
369              s => s,
370              v => v.Mean());
371          }
372        case OpCodes.StandardDeviation: {
373            var cur = Evaluate(dataset, ref row, state);
374            return AggregateApply(cur,
375              s => double.NaN,
376              v => Statistics.StandardDeviation(v));
377          }
378        case OpCodes.Length: {
379          var cur = Evaluate(dataset, ref row, state);
380          return AggregateApply(cur,
381            s => 1,
382            v => v.Count);
383        }
384        case OpCodes.Min: {
385          var cur = Evaluate(dataset, ref row, state);
386          return AggregateApply(cur,
387            s => s,
388            v => Statistics.Minimum(v));
389        }
390        case OpCodes.Max: {
391          var cur = Evaluate(dataset, ref row, state);
392          return AggregateApply(cur,
393            s => s,
394            v => Statistics.Maximum(v));
395        }
396        case OpCodes.Variance: {
397          var cur = Evaluate(dataset, ref row, state);
398          return AggregateApply(cur,
399            s => double.NaN,
400            v => Statistics.Variance(v));
401        }
402        case OpCodes.Skewness: {
403          var cur = Evaluate(dataset, ref row, state);
404          return AggregateApply(cur,
405            s => double.NaN,
406            v => Statistics.Skewness(v));
407        }
408        case OpCodes.Kurtosis: {
409          var cur = Evaluate(dataset, ref row, state);
410          return AggregateApply(cur,
411            s => double.NaN,
412            v => Statistics.Kurtosis(v));
413        }
414        case OpCodes.EuclideanDistance: {
415          var x1 = Evaluate(dataset, ref row, state);
416          var x2 = Evaluate(dataset, ref row, state);
417          return AggregateMultipleApply(x1, x2,
418            //(s1, s2) => s1 - s2,
419            //(s1, v2) => Math.Sqrt((s1 - v2).PointwisePower(2).Sum()),
420            //(v1, s2) => Math.Sqrt((v1 - s2).PointwisePower(2).Sum()),
421            vvFunc: (v1, v2) => v1.Count == v2.Count ? Math.Sqrt((v1 - v2).PointwisePower(2).Sum()) : double.NaN);
422        }
423        case OpCodes.Covariance: {
424          var x1 = Evaluate(dataset, ref row, state);
425          var x2 = Evaluate(dataset, ref row, state);
426          return AggregateMultipleApply(x1, x2,
427            //(s1, s2) => 0,
428            //(s1, v2) => 0,
429            //(v1, s2) => 0,
430            vvFunc: (v1, v2) => v1.Count == v2.Count ? Statistics.Covariance(v1, v2) : double.NaN);
431        }
432        case OpCodes.Variable: {
433            if (row < 0 || row >= dataset.Rows) return EvaluationResult.NaN;
434            var variableTreeNode = (VariableTreeNode)currentInstr.dynamicNode;
435            if (currentInstr.data is IList<double> doubleList)
436              return new EvaluationResult(doubleList[row] * variableTreeNode.Weight);
437            if (currentInstr.data is IList<DoubleVector> doubleVectorList)
438              return new EvaluationResult(doubleVectorList[row] * variableTreeNode.Weight);
439            throw new NotSupportedException($"Unsupported type of variable: {currentInstr.data.GetType().GetPrettyName()}");
440          }
441        case OpCodes.BinaryFactorVariable: {
442            if (row < 0 || row >= dataset.Rows) return EvaluationResult.NaN;
443            var factorVarTreeNode = currentInstr.dynamicNode as BinaryFactorVariableTreeNode;
444            return new EvaluationResult(((IList<string>)currentInstr.data)[row] == factorVarTreeNode.VariableValue ? factorVarTreeNode.Weight : 0);
445          }
446        case OpCodes.FactorVariable: {
447            if (row < 0 || row >= dataset.Rows) return EvaluationResult.NaN;
448            var factorVarTreeNode = currentInstr.dynamicNode as FactorVariableTreeNode;
449            return new EvaluationResult(factorVarTreeNode.GetValue(((IList<string>)currentInstr.data)[row]));
450          }
451        case OpCodes.Constant: {
452            var constTreeNode = (ConstantTreeNode)currentInstr.dynamicNode;
453            return new EvaluationResult(constTreeNode.Value);
454          }
455
456        default:
457          throw new NotSupportedException($"Unsupported OpCode: {currentInstr.opCode}");
458      }
459    }
460  }
461}
Note: See TracBrowser for help on using the repository browser.