Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2990_VariableImpactBasedFeatureSelection/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interpreter/SymbolicDataAnalysisExpressionTreeInterpreter.cs

Last change on this file was 16565, checked in by gkronber, 6 years ago

#2520: merged changes from PersistenceOverhaul branch (r16451:16564) into trunk

File size: 23.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Parameters;
29using HEAL.Attic;
30
31namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
32  [StorableType("FB94F333-B32A-44FB-A561-CBDE76693D20")]
33  [Item("SymbolicDataAnalysisExpressionTreeInterpreter", "Interpreter for symbolic expression trees including automatically defined functions.")]
34  public class SymbolicDataAnalysisExpressionTreeInterpreter : ParameterizedNamedItem,
35    ISymbolicDataAnalysisExpressionTreeInterpreter {
36    private const string CheckExpressionsWithIntervalArithmeticParameterName = "CheckExpressionsWithIntervalArithmetic";
37    private const string CheckExpressionsWithIntervalArithmeticParameterDescription = "Switch that determines if the interpreter checks the validity of expressions with interval arithmetic before evaluating the expression.";
38    private const string EvaluatedSolutionsParameterName = "EvaluatedSolutions";
39
40    public override bool CanChangeName {
41      get { return false; }
42    }
43
44    public override bool CanChangeDescription {
45      get { return false; }
46    }
47
48    #region parameter properties
49    public IFixedValueParameter<BoolValue> CheckExpressionsWithIntervalArithmeticParameter {
50      get { return (IFixedValueParameter<BoolValue>)Parameters[CheckExpressionsWithIntervalArithmeticParameterName]; }
51    }
52
53    public IFixedValueParameter<IntValue> EvaluatedSolutionsParameter {
54      get { return (IFixedValueParameter<IntValue>)Parameters[EvaluatedSolutionsParameterName]; }
55    }
56    #endregion
57
58    #region properties
59    public bool CheckExpressionsWithIntervalArithmetic {
60      get { return CheckExpressionsWithIntervalArithmeticParameter.Value.Value; }
61      set { CheckExpressionsWithIntervalArithmeticParameter.Value.Value = value; }
62    }
63
64    public int EvaluatedSolutions {
65      get { return EvaluatedSolutionsParameter.Value.Value; }
66      set { EvaluatedSolutionsParameter.Value.Value = value; }
67    }
68    #endregion
69
70    [StorableConstructor]
71    protected SymbolicDataAnalysisExpressionTreeInterpreter(StorableConstructorFlag _) : base(_) { }
72
73    protected SymbolicDataAnalysisExpressionTreeInterpreter(SymbolicDataAnalysisExpressionTreeInterpreter original,
74      Cloner cloner)
75      : base(original, cloner) { }
76
77    public override IDeepCloneable Clone(Cloner cloner) {
78      return new SymbolicDataAnalysisExpressionTreeInterpreter(this, cloner);
79    }
80
81    public SymbolicDataAnalysisExpressionTreeInterpreter()
82      : base("SymbolicDataAnalysisExpressionTreeInterpreter", "Interpreter for symbolic expression trees including automatically defined functions.") {
83      Parameters.Add(new FixedValueParameter<BoolValue>(CheckExpressionsWithIntervalArithmeticParameterName, "Switch that determines if the interpreter checks the validity of expressions with interval arithmetic before evaluating the expression.", new BoolValue(false)));
84      Parameters.Add(new FixedValueParameter<IntValue>(EvaluatedSolutionsParameterName, "A counter for the total number of solutions the interpreter has evaluated", new IntValue(0)));
85    }
86
87    protected SymbolicDataAnalysisExpressionTreeInterpreter(string name, string description)
88      : base(name, description) {
89      Parameters.Add(new FixedValueParameter<BoolValue>(CheckExpressionsWithIntervalArithmeticParameterName, "Switch that determines if the interpreter checks the validity of expressions with interval arithmetic before evaluating the expression.", new BoolValue(false)));
90      Parameters.Add(new FixedValueParameter<IntValue>(EvaluatedSolutionsParameterName, "A counter for the total number of solutions the interpreter has evaluated", new IntValue(0)));
91    }
92
93    [StorableHook(HookType.AfterDeserialization)]
94    private void AfterDeserialization() {
95      var evaluatedSolutions = new IntValue(0);
96      var checkExpressionsWithIntervalArithmetic = new BoolValue(false);
97      if (Parameters.ContainsKey(EvaluatedSolutionsParameterName)) {
98        var evaluatedSolutionsParameter = (IValueParameter<IntValue>)Parameters[EvaluatedSolutionsParameterName];
99        evaluatedSolutions = evaluatedSolutionsParameter.Value;
100        Parameters.Remove(EvaluatedSolutionsParameterName);
101      }
102      Parameters.Add(new FixedValueParameter<IntValue>(EvaluatedSolutionsParameterName, "A counter for the total number of solutions the interpreter has evaluated", evaluatedSolutions));
103      if (Parameters.ContainsKey(CheckExpressionsWithIntervalArithmeticParameterName)) {
104        var checkExpressionsWithIntervalArithmeticParameter = (IValueParameter<BoolValue>)Parameters[CheckExpressionsWithIntervalArithmeticParameterName];
105        Parameters.Remove(CheckExpressionsWithIntervalArithmeticParameterName);
106        checkExpressionsWithIntervalArithmetic = checkExpressionsWithIntervalArithmeticParameter.Value;
107      }
108      Parameters.Add(new FixedValueParameter<BoolValue>(CheckExpressionsWithIntervalArithmeticParameterName, CheckExpressionsWithIntervalArithmeticParameterDescription, checkExpressionsWithIntervalArithmetic));
109    }
110
111    #region IStatefulItem
112    public void InitializeState() {
113      EvaluatedSolutions = 0;
114    }
115
116    public void ClearState() { }
117    #endregion
118
119    private readonly object syncRoot = new object();
120    public IEnumerable<double> GetSymbolicExpressionTreeValues(ISymbolicExpressionTree tree, IDataset dataset,
121      IEnumerable<int> rows) {
122      if (CheckExpressionsWithIntervalArithmetic) {
123        throw new NotSupportedException("Interval arithmetic is not yet supported in the symbolic data analysis interpreter.");
124      }
125
126      lock (syncRoot) {
127        EvaluatedSolutions++; // increment the evaluated solutions counter
128      }
129      var state = PrepareInterpreterState(tree, dataset);
130
131      foreach (var rowEnum in rows) {
132        int row = rowEnum;
133        yield return Evaluate(dataset, ref row, state);
134        state.Reset();
135      }
136    }
137
138    private static InterpreterState PrepareInterpreterState(ISymbolicExpressionTree tree, IDataset dataset) {
139      Instruction[] code = SymbolicExpressionTreeCompiler.Compile(tree, OpCodes.MapSymbolToOpCode);
140      int necessaryArgStackSize = 0;
141      foreach (Instruction instr in code) {
142        if (instr.opCode == OpCodes.Variable) {
143          var variableTreeNode = (VariableTreeNode)instr.dynamicNode;
144          instr.data = dataset.GetReadOnlyDoubleValues(variableTreeNode.VariableName);
145        } else if (instr.opCode == OpCodes.FactorVariable) {
146          var factorTreeNode = instr.dynamicNode as FactorVariableTreeNode;
147          instr.data = dataset.GetReadOnlyStringValues(factorTreeNode.VariableName);
148        } else if (instr.opCode == OpCodes.BinaryFactorVariable) {
149          var factorTreeNode = instr.dynamicNode as BinaryFactorVariableTreeNode;
150          instr.data = dataset.GetReadOnlyStringValues(factorTreeNode.VariableName);
151        } else if (instr.opCode == OpCodes.LagVariable) {
152          var laggedVariableTreeNode = (LaggedVariableTreeNode)instr.dynamicNode;
153          instr.data = dataset.GetReadOnlyDoubleValues(laggedVariableTreeNode.VariableName);
154        } else if (instr.opCode == OpCodes.VariableCondition) {
155          var variableConditionTreeNode = (VariableConditionTreeNode)instr.dynamicNode;
156          instr.data = dataset.GetReadOnlyDoubleValues(variableConditionTreeNode.VariableName);
157        } else if (instr.opCode == OpCodes.Call) {
158          necessaryArgStackSize += instr.nArguments + 1;
159        }
160      }
161      return new InterpreterState(code, necessaryArgStackSize);
162    }
163
164    public virtual double Evaluate(IDataset dataset, ref int row, InterpreterState state) {
165      Instruction currentInstr = state.NextInstruction();
166      switch (currentInstr.opCode) {
167        case OpCodes.Add: {
168            double s = Evaluate(dataset, ref row, state);
169            for (int i = 1; i < currentInstr.nArguments; i++) {
170              s += Evaluate(dataset, ref row, state);
171            }
172            return s;
173          }
174        case OpCodes.Sub: {
175            double s = Evaluate(dataset, ref row, state);
176            for (int i = 1; i < currentInstr.nArguments; i++) {
177              s -= Evaluate(dataset, ref row, state);
178            }
179            if (currentInstr.nArguments == 1) { s = -s; }
180            return s;
181          }
182        case OpCodes.Mul: {
183            double p = Evaluate(dataset, ref row, state);
184            for (int i = 1; i < currentInstr.nArguments; i++) {
185              p *= Evaluate(dataset, ref row, state);
186            }
187            return p;
188          }
189        case OpCodes.Div: {
190            double p = Evaluate(dataset, ref row, state);
191            for (int i = 1; i < currentInstr.nArguments; i++) {
192              p /= Evaluate(dataset, ref row, state);
193            }
194            if (currentInstr.nArguments == 1) { p = 1.0 / p; }
195            return p;
196          }
197        case OpCodes.Average: {
198            double sum = Evaluate(dataset, ref row, state);
199            for (int i = 1; i < currentInstr.nArguments; i++) {
200              sum += Evaluate(dataset, ref row, state);
201            }
202            return sum / currentInstr.nArguments;
203          }
204        case OpCodes.Absolute: {
205            return Math.Abs(Evaluate(dataset, ref row, state));
206          }
207        case OpCodes.Cos: {
208            return Math.Cos(Evaluate(dataset, ref row, state));
209          }
210        case OpCodes.Sin: {
211            return Math.Sin(Evaluate(dataset, ref row, state));
212          }
213        case OpCodes.Tan: {
214            return Math.Tan(Evaluate(dataset, ref row, state));
215          }
216        case OpCodes.Square: {
217            return Math.Pow(Evaluate(dataset, ref row, state), 2);
218          }
219        case OpCodes.Cube: {
220            return Math.Pow(Evaluate(dataset, ref row, state), 3);
221          }
222        case OpCodes.Power: {
223            double x = Evaluate(dataset, ref row, state);
224            double y = Math.Round(Evaluate(dataset, ref row, state));
225            return Math.Pow(x, y);
226          }
227        case OpCodes.SquareRoot: {
228            return Math.Sqrt(Evaluate(dataset, ref row, state));
229          }
230        case OpCodes.CubeRoot: {
231            return Math.Pow(Evaluate(dataset, ref row, state), 1.0 / 3.0);
232          }
233        case OpCodes.Root: {
234            double x = Evaluate(dataset, ref row, state);
235            double y = Math.Round(Evaluate(dataset, ref row, state));
236            return Math.Pow(x, 1 / y);
237          }
238        case OpCodes.Exp: {
239            return Math.Exp(Evaluate(dataset, ref row, state));
240          }
241        case OpCodes.Log: {
242            return Math.Log(Evaluate(dataset, ref row, state));
243          }
244        case OpCodes.Gamma: {
245            var x = Evaluate(dataset, ref row, state);
246            if (double.IsNaN(x)) { return double.NaN; } else { return alglib.gammafunction(x); }
247          }
248        case OpCodes.Psi: {
249            var x = Evaluate(dataset, ref row, state);
250            if (double.IsNaN(x)) return double.NaN;
251            else if (x <= 0 && (Math.Floor(x) - x).IsAlmost(0)) return double.NaN;
252            return alglib.psi(x);
253          }
254        case OpCodes.Dawson: {
255            var x = Evaluate(dataset, ref row, state);
256            if (double.IsNaN(x)) { return double.NaN; }
257            return alglib.dawsonintegral(x);
258          }
259        case OpCodes.ExponentialIntegralEi: {
260            var x = Evaluate(dataset, ref row, state);
261            if (double.IsNaN(x)) { return double.NaN; }
262            return alglib.exponentialintegralei(x);
263          }
264        case OpCodes.SineIntegral: {
265            double si, ci;
266            var x = Evaluate(dataset, ref row, state);
267            if (double.IsNaN(x)) return double.NaN;
268            else {
269              alglib.sinecosineintegrals(x, out si, out ci);
270              return si;
271            }
272          }
273        case OpCodes.CosineIntegral: {
274            double si, ci;
275            var x = Evaluate(dataset, ref row, state);
276            if (double.IsNaN(x)) return double.NaN;
277            else {
278              alglib.sinecosineintegrals(x, out si, out ci);
279              return ci;
280            }
281          }
282        case OpCodes.HyperbolicSineIntegral: {
283            double shi, chi;
284            var x = Evaluate(dataset, ref row, state);
285            if (double.IsNaN(x)) return double.NaN;
286            else {
287              alglib.hyperbolicsinecosineintegrals(x, out shi, out chi);
288              return shi;
289            }
290          }
291        case OpCodes.HyperbolicCosineIntegral: {
292            double shi, chi;
293            var x = Evaluate(dataset, ref row, state);
294            if (double.IsNaN(x)) return double.NaN;
295            else {
296              alglib.hyperbolicsinecosineintegrals(x, out shi, out chi);
297              return chi;
298            }
299          }
300        case OpCodes.FresnelCosineIntegral: {
301            double c = 0, s = 0;
302            var x = Evaluate(dataset, ref row, state);
303            if (double.IsNaN(x)) return double.NaN;
304            else {
305              alglib.fresnelintegral(x, ref c, ref s);
306              return c;
307            }
308          }
309        case OpCodes.FresnelSineIntegral: {
310            double c = 0, s = 0;
311            var x = Evaluate(dataset, ref row, state);
312            if (double.IsNaN(x)) return double.NaN;
313            else {
314              alglib.fresnelintegral(x, ref c, ref s);
315              return s;
316            }
317          }
318        case OpCodes.AiryA: {
319            double ai, aip, bi, bip;
320            var x = Evaluate(dataset, ref row, state);
321            if (double.IsNaN(x)) return double.NaN;
322            else {
323              alglib.airy(x, out ai, out aip, out bi, out bip);
324              return ai;
325            }
326          }
327        case OpCodes.AiryB: {
328            double ai, aip, bi, bip;
329            var x = Evaluate(dataset, ref row, state);
330            if (double.IsNaN(x)) return double.NaN;
331            else {
332              alglib.airy(x, out ai, out aip, out bi, out bip);
333              return bi;
334            }
335          }
336        case OpCodes.Norm: {
337            var x = Evaluate(dataset, ref row, state);
338            if (double.IsNaN(x)) return double.NaN;
339            else return alglib.normaldistribution(x);
340          }
341        case OpCodes.Erf: {
342            var x = Evaluate(dataset, ref row, state);
343            if (double.IsNaN(x)) return double.NaN;
344            else return alglib.errorfunction(x);
345          }
346        case OpCodes.Bessel: {
347            var x = Evaluate(dataset, ref row, state);
348            if (double.IsNaN(x)) return double.NaN;
349            else return alglib.besseli0(x);
350          }
351
352        case OpCodes.AnalyticQuotient: {
353            var x1 = Evaluate(dataset, ref row, state);
354            var x2 = Evaluate(dataset, ref row, state);
355            return x1 / Math.Pow(1 + x2 * x2, 0.5);
356          }
357        case OpCodes.IfThenElse: {
358            double condition = Evaluate(dataset, ref row, state);
359            double result;
360            if (condition > 0.0) {
361              result = Evaluate(dataset, ref row, state); state.SkipInstructions();
362            } else {
363              state.SkipInstructions(); result = Evaluate(dataset, ref row, state);
364            }
365            return result;
366          }
367        case OpCodes.AND: {
368            double result = Evaluate(dataset, ref row, state);
369            for (int i = 1; i < currentInstr.nArguments; i++) {
370              if (result > 0.0) result = Evaluate(dataset, ref row, state);
371              else {
372                state.SkipInstructions();
373              }
374            }
375            return result > 0.0 ? 1.0 : -1.0;
376          }
377        case OpCodes.OR: {
378            double result = Evaluate(dataset, ref row, state);
379            for (int i = 1; i < currentInstr.nArguments; i++) {
380              if (result <= 0.0) result = Evaluate(dataset, ref row, state);
381              else {
382                state.SkipInstructions();
383              }
384            }
385            return result > 0.0 ? 1.0 : -1.0;
386          }
387        case OpCodes.NOT: {
388            return Evaluate(dataset, ref row, state) > 0.0 ? -1.0 : 1.0;
389          }
390        case OpCodes.XOR: {
391            //mkommend: XOR on multiple inputs is defined as true if the number of positive signals is odd
392            // this is equal to a consecutive execution of binary XOR operations.
393            int positiveSignals = 0;
394            for (int i = 0; i < currentInstr.nArguments; i++) {
395              if (Evaluate(dataset, ref row, state) > 0.0) { positiveSignals++; }
396            }
397            return positiveSignals % 2 != 0 ? 1.0 : -1.0;
398          }
399        case OpCodes.GT: {
400            double x = Evaluate(dataset, ref row, state);
401            double y = Evaluate(dataset, ref row, state);
402            if (x > y) { return 1.0; } else { return -1.0; }
403          }
404        case OpCodes.LT: {
405            double x = Evaluate(dataset, ref row, state);
406            double y = Evaluate(dataset, ref row, state);
407            if (x < y) { return 1.0; } else { return -1.0; }
408          }
409        case OpCodes.TimeLag: {
410            var timeLagTreeNode = (LaggedTreeNode)currentInstr.dynamicNode;
411            row += timeLagTreeNode.Lag;
412            double result = Evaluate(dataset, ref row, state);
413            row -= timeLagTreeNode.Lag;
414            return result;
415          }
416        case OpCodes.Integral: {
417            int savedPc = state.ProgramCounter;
418            var timeLagTreeNode = (LaggedTreeNode)currentInstr.dynamicNode;
419            double sum = 0.0;
420            for (int i = 0; i < Math.Abs(timeLagTreeNode.Lag); i++) {
421              row += Math.Sign(timeLagTreeNode.Lag);
422              sum += Evaluate(dataset, ref row, state);
423              state.ProgramCounter = savedPc;
424            }
425            row -= timeLagTreeNode.Lag;
426            sum += Evaluate(dataset, ref row, state);
427            return sum;
428          }
429
430        //mkommend: derivate calculation taken from:
431        //http://www.holoborodko.com/pavel/numerical-methods/numerical-derivative/smooth-low-noise-differentiators/
432        //one sided smooth differentiatior, N = 4
433        // y' = 1/8h (f_i + 2f_i-1, -2 f_i-3 - f_i-4)
434        case OpCodes.Derivative: {
435            int savedPc = state.ProgramCounter;
436            double f_0 = Evaluate(dataset, ref row, state); row--;
437            state.ProgramCounter = savedPc;
438            double f_1 = Evaluate(dataset, ref row, state); row -= 2;
439            state.ProgramCounter = savedPc;
440            double f_3 = Evaluate(dataset, ref row, state); row--;
441            state.ProgramCounter = savedPc;
442            double f_4 = Evaluate(dataset, ref row, state);
443            row += 4;
444
445            return (f_0 + 2 * f_1 - 2 * f_3 - f_4) / 8; // h = 1
446          }
447        case OpCodes.Call: {
448            // evaluate sub-trees
449            double[] argValues = new double[currentInstr.nArguments];
450            for (int i = 0; i < currentInstr.nArguments; i++) {
451              argValues[i] = Evaluate(dataset, ref row, state);
452            }
453            // push on argument values on stack
454            state.CreateStackFrame(argValues);
455
456            // save the pc
457            int savedPc = state.ProgramCounter;
458            // set pc to start of function 
459            state.ProgramCounter = (ushort)currentInstr.data;
460            // evaluate the function
461            double v = Evaluate(dataset, ref row, state);
462
463            // delete the stack frame
464            state.RemoveStackFrame();
465
466            // restore the pc => evaluation will continue at point after my subtrees 
467            state.ProgramCounter = savedPc;
468            return v;
469          }
470        case OpCodes.Arg: {
471            return state.GetStackFrameValue((ushort)currentInstr.data);
472          }
473        case OpCodes.Variable: {
474            if (row < 0 || row >= dataset.Rows) return double.NaN;
475            var variableTreeNode = (VariableTreeNode)currentInstr.dynamicNode;
476            return ((IList<double>)currentInstr.data)[row] * variableTreeNode.Weight;
477          }
478        case OpCodes.BinaryFactorVariable: {
479            if (row < 0 || row >= dataset.Rows) return double.NaN;
480            var factorVarTreeNode = currentInstr.dynamicNode as BinaryFactorVariableTreeNode;
481            return ((IList<string>)currentInstr.data)[row] == factorVarTreeNode.VariableValue ? factorVarTreeNode.Weight : 0;
482          }
483        case OpCodes.FactorVariable: {
484            if (row < 0 || row >= dataset.Rows) return double.NaN;
485            var factorVarTreeNode = currentInstr.dynamicNode as FactorVariableTreeNode;
486            return factorVarTreeNode.GetValue(((IList<string>)currentInstr.data)[row]);
487          }
488        case OpCodes.LagVariable: {
489            var laggedVariableTreeNode = (LaggedVariableTreeNode)currentInstr.dynamicNode;
490            int actualRow = row + laggedVariableTreeNode.Lag;
491            if (actualRow < 0 || actualRow >= dataset.Rows) { return double.NaN; }
492            return ((IList<double>)currentInstr.data)[actualRow] * laggedVariableTreeNode.Weight;
493          }
494        case OpCodes.Constant: {
495            var constTreeNode = (ConstantTreeNode)currentInstr.dynamicNode;
496            return constTreeNode.Value;
497          }
498
499        //mkommend: this symbol uses the logistic function f(x) = 1 / (1 + e^(-alpha * x) )
500        //to determine the relative amounts of the true and false branch see http://en.wikipedia.org/wiki/Logistic_function
501        case OpCodes.VariableCondition: {
502            if (row < 0 || row >= dataset.Rows) return double.NaN;
503            var variableConditionTreeNode = (VariableConditionTreeNode)currentInstr.dynamicNode;
504            if (!variableConditionTreeNode.Symbol.IgnoreSlope) {
505              double variableValue = ((IList<double>)currentInstr.data)[row];
506              double x = variableValue - variableConditionTreeNode.Threshold;
507              double p = 1 / (1 + Math.Exp(-variableConditionTreeNode.Slope * x));
508
509              double trueBranch = Evaluate(dataset, ref row, state);
510              double falseBranch = Evaluate(dataset, ref row, state);
511
512              return trueBranch * p + falseBranch * (1 - p);
513            } else {
514              // strict threshold
515              double variableValue = ((IList<double>)currentInstr.data)[row];
516              if (variableValue <= variableConditionTreeNode.Threshold) {
517                var left = Evaluate(dataset, ref row, state);
518                state.SkipInstructions();
519                return left;
520              } else {
521                state.SkipInstructions();
522                return Evaluate(dataset, ref row, state);
523              }
524            }
525          }
526        default:
527          throw new NotSupportedException();
528      }
529    }
530  }
531}
Note: See TracBrowser for help on using the repository browser.