Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisExpressionTreeInterpreter.cs @ 6755

Last change on this file since 6755 was 6740, checked in by mkommend, 13 years ago

#1597, #1609, #1640:

  • Corrected TableFileParser to handle empty rows correctly.
  • Refactored DataSet to store values in List<List> instead of a two-dimensional array.
  • Enable importing and storing string and datetime values.
  • Changed data access methods in dataset and adapted all concerning classes.
  • Changed interpreter to store the variable values for all rows during the compilation step.
File size: 18.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Parameters;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30
31namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
32  [StorableClass]
33  [Item("SymbolicDataAnalysisExpressionTreeInterpreter", "Interpreter for symbolic expression trees including automatically defined functions.")]
34  public sealed class SymbolicDataAnalysisExpressionTreeInterpreter : ParameterizedNamedItem, ISymbolicDataAnalysisExpressionTreeInterpreter {
35    private const string CheckExpressionsWithIntervalArithmeticParameterName = "CheckExpressionsWithIntervalArithmetic";
36    #region private classes
37    private class InterpreterState {
38      private double[] argumentStack;
39      private int argumentStackPointer;
40      private Instruction[] code;
41      private int pc;
42      public int ProgramCounter {
43        get { return pc; }
44        set { pc = value; }
45      }
46      internal InterpreterState(Instruction[] code, int argumentStackSize) {
47        this.code = code;
48        this.pc = 0;
49        if (argumentStackSize > 0) {
50          this.argumentStack = new double[argumentStackSize];
51        }
52        this.argumentStackPointer = 0;
53      }
54
55      internal void Reset() {
56        this.pc = 0;
57        this.argumentStackPointer = 0;
58      }
59
60      internal Instruction NextInstruction() {
61        return code[pc++];
62      }
63      private void Push(double val) {
64        argumentStack[argumentStackPointer++] = val;
65      }
66      private double Pop() {
67        return argumentStack[--argumentStackPointer];
68      }
69
70      internal void CreateStackFrame(double[] argValues) {
71        // push in reverse order to make indexing easier
72        for (int i = argValues.Length - 1; i >= 0; i--) {
73          argumentStack[argumentStackPointer++] = argValues[i];
74        }
75        Push(argValues.Length);
76      }
77
78      internal void RemoveStackFrame() {
79        int size = (int)Pop();
80        argumentStackPointer -= size;
81      }
82
83      internal double GetStackFrameValue(ushort index) {
84        // layout of stack:
85        // [0]   <- argumentStackPointer
86        // [StackFrameSize = N + 1]
87        // [Arg0] <- argumentStackPointer - 2 - 0
88        // [Arg1] <- argumentStackPointer - 2 - 1
89        // [...]
90        // [ArgN] <- argumentStackPointer - 2 - N
91        // <Begin of stack frame>
92        return argumentStack[argumentStackPointer - index - 2];
93      }
94    }
95    private class OpCodes {
96      public const byte Add = 1;
97      public const byte Sub = 2;
98      public const byte Mul = 3;
99      public const byte Div = 4;
100
101      public const byte Sin = 5;
102      public const byte Cos = 6;
103      public const byte Tan = 7;
104
105      public const byte Log = 8;
106      public const byte Exp = 9;
107
108      public const byte IfThenElse = 10;
109
110      public const byte GT = 11;
111      public const byte LT = 12;
112
113      public const byte AND = 13;
114      public const byte OR = 14;
115      public const byte NOT = 15;
116
117
118      public const byte Average = 16;
119
120      public const byte Call = 17;
121
122      public const byte Variable = 18;
123      public const byte LagVariable = 19;
124      public const byte Constant = 20;
125      public const byte Arg = 21;
126
127      public const byte Power = 22;
128      public const byte Root = 23;
129      public const byte TimeLag = 24;
130      public const byte Integral = 25;
131      public const byte Derivative = 26;
132
133      public const byte VariableCondition = 27;
134    }
135    #endregion
136
137    private Dictionary<Type, byte> symbolToOpcode = new Dictionary<Type, byte>() {
138      { typeof(Addition), OpCodes.Add },
139      { typeof(Subtraction), OpCodes.Sub },
140      { typeof(Multiplication), OpCodes.Mul },
141      { typeof(Division), OpCodes.Div },
142      { typeof(Sine), OpCodes.Sin },
143      { typeof(Cosine), OpCodes.Cos },
144      { typeof(Tangent), OpCodes.Tan },
145      { typeof(Logarithm), OpCodes.Log },
146      { typeof(Exponential), OpCodes.Exp },
147      { typeof(IfThenElse), OpCodes.IfThenElse },
148      { typeof(GreaterThan), OpCodes.GT },
149      { typeof(LessThan), OpCodes.LT },
150      { typeof(And), OpCodes.AND },
151      { typeof(Or), OpCodes.OR },
152      { typeof(Not), OpCodes.NOT},
153      { typeof(Average), OpCodes.Average},
154      { typeof(InvokeFunction), OpCodes.Call },
155      { typeof(HeuristicLab.Problems.DataAnalysis.Symbolic.Variable), OpCodes.Variable },
156      { typeof(LaggedVariable), OpCodes.LagVariable },
157      { typeof(Constant), OpCodes.Constant },
158      { typeof(Argument), OpCodes.Arg },
159      { typeof(Power),OpCodes.Power},
160      { typeof(Root),OpCodes.Root},
161      { typeof(TimeLag), OpCodes.TimeLag},
162      { typeof(Integral), OpCodes.Integral},
163      { typeof(Derivative), OpCodes.Derivative},
164      { typeof(VariableCondition),OpCodes.VariableCondition}
165    };
166
167    public override bool CanChangeName {
168      get { return false; }
169    }
170    public override bool CanChangeDescription {
171      get { return false; }
172    }
173
174    #region parameter properties
175    public IValueParameter<BoolValue> CheckExpressionsWithIntervalArithmeticParameter {
176      get { return (IValueParameter<BoolValue>)Parameters[CheckExpressionsWithIntervalArithmeticParameterName]; }
177    }
178    #endregion
179
180    #region properties
181    public BoolValue CheckExpressionsWithIntervalArithmetic {
182      get { return CheckExpressionsWithIntervalArithmeticParameter.Value; }
183      set { CheckExpressionsWithIntervalArithmeticParameter.Value = value; }
184    }
185    #endregion
186
187
188    [StorableConstructor]
189    private SymbolicDataAnalysisExpressionTreeInterpreter(bool deserializing) : base(deserializing) { }
190    private SymbolicDataAnalysisExpressionTreeInterpreter(SymbolicDataAnalysisExpressionTreeInterpreter original, Cloner cloner) : base(original, cloner) { }
191    public override IDeepCloneable Clone(Cloner cloner) {
192      return new SymbolicDataAnalysisExpressionTreeInterpreter(this, cloner);
193    }
194
195    public SymbolicDataAnalysisExpressionTreeInterpreter()
196      : base("SymbolicDataAnalysisExpressionTreeInterpreter", "Interpreter for symbolic expression trees including automatically defined functions.") {
197      Parameters.Add(new ValueParameter<BoolValue>(CheckExpressionsWithIntervalArithmeticParameterName, "Switch that determines if the interpreter checks the validity of expressions with interval arithmetic before evaluating the expression.", new BoolValue(false)));
198    }
199
200    public IEnumerable<double> GetSymbolicExpressionTreeValues(ISymbolicExpressionTree tree, Dataset dataset, IEnumerable<int> rows) {
201      if (CheckExpressionsWithIntervalArithmetic.Value)
202        throw new NotSupportedException("Interval arithmetic is not yet supported in the symbolic data analysis interpreter.");
203      var compiler = new SymbolicExpressionTreeCompiler();
204      Instruction[] code = compiler.Compile(tree, MapSymbolToOpCode);
205      int necessaryArgStackSize = 0;
206      for (int i = 0; i < code.Length; i++) {
207        Instruction instr = code[i];
208        if (instr.opCode == OpCodes.Variable) {
209          var variableTreeNode = instr.dynamicNode as VariableTreeNode;
210          instr.iArg0 = dataset.GetReadOnlyDoubleValues(variableTreeNode.VariableName);
211          code[i] = instr;
212        } else if (instr.opCode == OpCodes.LagVariable) {
213          var laggedVariableTreeNode = instr.dynamicNode as LaggedVariableTreeNode;
214          instr.iArg0 = dataset.GetReadOnlyDoubleValues(laggedVariableTreeNode.VariableName);
215          code[i] = instr;
216        } else if (instr.opCode == OpCodes.VariableCondition) {
217          var variableConditionTreeNode = instr.dynamicNode as VariableConditionTreeNode;
218          instr.iArg0 = dataset.GetReadOnlyDoubleValues(variableConditionTreeNode.VariableName);
219        } else if (instr.opCode == OpCodes.Call) {
220          necessaryArgStackSize += instr.nArguments + 1;
221        }
222      }
223      var state = new InterpreterState(code, necessaryArgStackSize);
224
225      foreach (var rowEnum in rows) {
226        int row = rowEnum;
227        state.Reset();
228        yield return Evaluate(dataset, ref row, state);
229      }
230    }
231
232    private double Evaluate(Dataset dataset, ref int row, InterpreterState state) {
233      Instruction currentInstr = state.NextInstruction();
234      switch (currentInstr.opCode) {
235        case OpCodes.Add: {
236            double s = Evaluate(dataset, ref row, state);
237            for (int i = 1; i < currentInstr.nArguments; i++) {
238              s += Evaluate(dataset, ref row, state);
239            }
240            return s;
241          }
242        case OpCodes.Sub: {
243            double s = Evaluate(dataset, ref row, state);
244            for (int i = 1; i < currentInstr.nArguments; i++) {
245              s -= Evaluate(dataset, ref row, state);
246            }
247            if (currentInstr.nArguments == 1) s = -s;
248            return s;
249          }
250        case OpCodes.Mul: {
251            double p = Evaluate(dataset, ref row, state);
252            for (int i = 1; i < currentInstr.nArguments; i++) {
253              p *= Evaluate(dataset, ref row, state);
254            }
255            return p;
256          }
257        case OpCodes.Div: {
258            double p = Evaluate(dataset, ref row, state);
259            for (int i = 1; i < currentInstr.nArguments; i++) {
260              p /= Evaluate(dataset, ref row, state);
261            }
262            if (currentInstr.nArguments == 1) p = 1.0 / p;
263            return p;
264          }
265        case OpCodes.Average: {
266            double sum = Evaluate(dataset, ref row, state);
267            for (int i = 1; i < currentInstr.nArguments; i++) {
268              sum += Evaluate(dataset, ref row, state);
269            }
270            return sum / currentInstr.nArguments;
271          }
272        case OpCodes.Cos: {
273            return Math.Cos(Evaluate(dataset, ref row, state));
274          }
275        case OpCodes.Sin: {
276            return Math.Sin(Evaluate(dataset, ref row, state));
277          }
278        case OpCodes.Tan: {
279            return Math.Tan(Evaluate(dataset, ref row, state));
280          }
281        case OpCodes.Power: {
282            double x = Evaluate(dataset, ref row, state);
283            double y = Math.Round(Evaluate(dataset, ref row, state));
284            return Math.Pow(x, y);
285          }
286        case OpCodes.Root: {
287            double x = Evaluate(dataset, ref row, state);
288            double y = Math.Round(Evaluate(dataset, ref row, state));
289            return Math.Pow(x, 1 / y);
290          }
291        case OpCodes.Exp: {
292            return Math.Exp(Evaluate(dataset, ref row, state));
293          }
294        case OpCodes.Log: {
295            return Math.Log(Evaluate(dataset, ref row, state));
296          }
297        case OpCodes.IfThenElse: {
298            double condition = Evaluate(dataset, ref row, state);
299            double result;
300            if (condition > 0.0) {
301              result = Evaluate(dataset, ref row, state); SkipInstructions(state);
302            } else {
303              SkipInstructions(state); result = Evaluate(dataset, ref row, state);
304            }
305            return result;
306          }
307        case OpCodes.AND: {
308            double result = Evaluate(dataset, ref row, state);
309            for (int i = 1; i < currentInstr.nArguments; i++) {
310              if (result > 0.0) result = Evaluate(dataset, ref row, state);
311              else {
312                SkipInstructions(state);
313              }
314            }
315            return result > 0.0 ? 1.0 : -1.0;
316          }
317        case OpCodes.OR: {
318            double result = Evaluate(dataset, ref row, state);
319            for (int i = 1; i < currentInstr.nArguments; i++) {
320              if (result <= 0.0) result = Evaluate(dataset, ref row, state);
321              else {
322                SkipInstructions(state);
323              }
324            }
325            return result > 0.0 ? 1.0 : -1.0;
326          }
327        case OpCodes.NOT: {
328            return Evaluate(dataset, ref row, state) > 0.0 ? -1.0 : 1.0;
329          }
330        case OpCodes.GT: {
331            double x = Evaluate(dataset, ref row, state);
332            double y = Evaluate(dataset, ref row, state);
333            if (x > y) return 1.0;
334            else return -1.0;
335          }
336        case OpCodes.LT: {
337            double x = Evaluate(dataset, ref row, state);
338            double y = Evaluate(dataset, ref row, state);
339            if (x < y) return 1.0;
340            else return -1.0;
341          }
342        case OpCodes.TimeLag: {
343            var timeLagTreeNode = (LaggedTreeNode)currentInstr.dynamicNode;
344            row += timeLagTreeNode.Lag;
345            double result = Evaluate(dataset, ref row, state);
346            row -= timeLagTreeNode.Lag;
347            return result;
348          }
349        case OpCodes.Integral: {
350            int savedPc = state.ProgramCounter;
351            var timeLagTreeNode = (LaggedTreeNode)currentInstr.dynamicNode;
352            double sum = 0.0;
353            for (int i = 0; i < Math.Abs(timeLagTreeNode.Lag); i++) {
354              row += Math.Sign(timeLagTreeNode.Lag);
355              sum += Evaluate(dataset, ref row, state);
356              state.ProgramCounter = savedPc;
357            }
358            row -= timeLagTreeNode.Lag;
359            sum += Evaluate(dataset, ref row, state);
360            return sum;
361          }
362
363        //mkommend: derivate calculation taken from:
364        //http://www.holoborodko.com/pavel/numerical-methods/numerical-derivative/smooth-low-noise-differentiators/
365        //one sided smooth differentiatior, N = 4
366        // y' = 1/8h (f_i + 2f_i-1, -2 f_i-3 - f_i-4)
367        case OpCodes.Derivative: {
368            int savedPc = state.ProgramCounter;
369            double f_0 = Evaluate(dataset, ref row, state); row--;
370            state.ProgramCounter = savedPc;
371            double f_1 = Evaluate(dataset, ref row, state); row -= 2;
372            state.ProgramCounter = savedPc;
373            double f_3 = Evaluate(dataset, ref row, state); row--;
374            state.ProgramCounter = savedPc;
375            double f_4 = Evaluate(dataset, ref row, state);
376            row += 4;
377
378            return (f_0 + 2 * f_1 - 2 * f_3 - f_4) / 8; // h = 1
379          }
380        case OpCodes.Call: {
381            // evaluate sub-trees
382            double[] argValues = new double[currentInstr.nArguments];
383            for (int i = 0; i < currentInstr.nArguments; i++) {
384              argValues[i] = Evaluate(dataset, ref row, state);
385            }
386            // push on argument values on stack
387            state.CreateStackFrame(argValues);
388
389            // save the pc
390            int savedPc = state.ProgramCounter;
391            // set pc to start of function 
392            state.ProgramCounter = (ushort)currentInstr.iArg0;
393            // evaluate the function
394            double v = Evaluate(dataset, ref row, state);
395
396            // delete the stack frame
397            state.RemoveStackFrame();
398
399            // restore the pc => evaluation will continue at point after my subtrees 
400            state.ProgramCounter = savedPc;
401            return v;
402          }
403        case OpCodes.Arg: {
404            return state.GetStackFrameValue((ushort)currentInstr.iArg0);
405          }
406        case OpCodes.Variable: {
407            if (row < 0 || row >= dataset.Rows)
408              return double.NaN;
409            var variableTreeNode = (VariableTreeNode)currentInstr.dynamicNode;
410            return ((IList<double>)currentInstr.iArg0)[row] * variableTreeNode.Weight;
411          }
412        case OpCodes.LagVariable: {
413            var laggedVariableTreeNode = (LaggedVariableTreeNode)currentInstr.dynamicNode;
414            int actualRow = row + laggedVariableTreeNode.Lag;
415            if (actualRow < 0 || actualRow >= dataset.Rows)
416              return double.NaN;
417            return ((IList<double>)currentInstr.iArg0)[row] * laggedVariableTreeNode.Weight;
418          }
419        case OpCodes.Constant: {
420            var constTreeNode = currentInstr.dynamicNode as ConstantTreeNode;
421            return constTreeNode.Value;
422          }
423
424        //mkommend: this symbol uses the logistic function f(x) = 1 / (1 + e^(-alpha * x) )
425        //to determine the relative amounts of the true and false branch see http://en.wikipedia.org/wiki/Logistic_function
426        case OpCodes.VariableCondition: {
427            if (row < 0 || row >= dataset.Rows)
428              return double.NaN;
429            var variableConditionTreeNode = (VariableConditionTreeNode)currentInstr.dynamicNode;
430            double variableValue = ((IList<double>)currentInstr.iArg0)[row];
431            double x = variableValue - variableConditionTreeNode.Threshold;
432            double p = 1 / (1 + Math.Exp(-variableConditionTreeNode.Slope * x));
433
434            double trueBranch = Evaluate(dataset, ref row, state);
435            double falseBranch = Evaluate(dataset, ref row, state);
436
437            return trueBranch * p + falseBranch * (1 - p);
438          }
439        default: throw new NotSupportedException();
440      }
441    }
442
443    private byte MapSymbolToOpCode(ISymbolicExpressionTreeNode treeNode) {
444      if (symbolToOpcode.ContainsKey(treeNode.Symbol.GetType()))
445        return symbolToOpcode[treeNode.Symbol.GetType()];
446      else
447        throw new NotSupportedException("Symbol: " + treeNode.Symbol);
448    }
449
450    // skips a whole branch
451    private void SkipInstructions(InterpreterState state) {
452      int i = 1;
453      while (i > 0) {
454        i += state.NextInstruction().nArguments;
455        i--;
456      }
457    }
458  }
459}
Note: See TracBrowser for help on using the repository browser.