Free cookie consent management tool by TermsFeed Policy Generator

source: branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Converters/TreeToTensorConverter.cs @ 18239

Last change on this file since 18239 was 18239, checked in by pfleck, 2 years ago

#3040 Updated to newer TensorFlow.NET version.

  • Removed IL Merge from TensorFlow.NET.
  • Temporarily removed DiffSharp.
  • Changed to a locally built Attic with a specific Protobuf version that is compatible with TensorFlow.NET. (Also adapted other versions of nuget dependencies.)
File size: 16.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
26using Tensorflow;
27using Tensorflow.NumPy;
28using static Tensorflow.Binding;
29using DoubleVector = MathNet.Numerics.LinearAlgebra.Vector<double>;
30
31namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
32  public class TreeToTensorConverter {
33
34    //private static readonly TF_DataType DataType = tf.float64;
35    private static readonly TF_DataType DataType = tf.float32;
36
37    public static bool TryPrepareTree(
38      ISymbolicExpressionTree tree,
39      IRegressionProblemData problemData, List<int> rows,
40      bool updateVariableWeights, bool applyLinearScaling,
41      bool eagerEvaluation,
42      out Dictionary<string, Tensor> inputFeatures, out Tensor target,
43      out Dictionary<ISymbolicExpressionTreeNode, ResourceVariable[]> variables) {
44
45      try {
46        var converter = new TreeToTensorConverter(
47          problemData, rows,
48          updateVariableWeights, applyLinearScaling,
49          eagerEvaluation
50        );
51
52        if (eagerEvaluation)
53          tf.enable_eager_execution();
54        else
55          tf.compat.v1.disable_eager_execution();
56
57        converter.PrepareNode(tree.Root.GetSubtree(0));
58
59        inputFeatures = converter.inputFeatures;
60        target = InputFeatureToTensor(problemData.TargetVariable, problemData, rows);
61        variables = converter.variables;
62
63        return true;
64      } catch (NotSupportedException) {
65        inputFeatures = null;
66        target= null;
67        variables = null;
68        return false;
69      }
70
71    }
72
73    public static bool TryEvaluate(
74      ISymbolicExpressionTree tree,
75      Dictionary<string, Tensor> inputFeatures, Dictionary<ISymbolicExpressionTreeNode, ResourceVariable[]> variables,
76      bool makeVariableWeightsVariable, bool addLinearScalingTerms,
77      bool eagerEvaluation,
78      out Tensor prediction) {
79
80      try {
81        var converter = new TreeToTensorConverter(
82          inputFeatures, variables,
83          makeVariableWeightsVariable, addLinearScalingTerms,
84          eagerEvaluation
85        );
86
87        if (eagerEvaluation)
88          tf.enable_eager_execution();
89        else
90          tf.compat.v1.disable_eager_execution();
91
92        prediction = converter.EvaluateNode(tree.Root.GetSubtree(0));
93
94        return true;
95      } catch (NotSupportedException) {
96        prediction = null;
97        return false;
98      }
99
100    }
101
102    //public static bool TryConvert/*Lazy/Graph*/(
103    //  ISymbolicExpressionTree tree,
104    //  Dictionary<string, Tensor> inputFeatures, Dictionary<ISymbolicExpressionTreeNode, ResourceVariable[]> variables,
105    //  bool makeVariableWeightsVariable, bool addLinearScalingTerms,
106    //  out Tensor prediction) {
107
108    //  try {
109    //    var converter = new TreeToTensorConverter(
110    //      inputFeatures, variables,
111    //      makeVariableWeightsVariable, addLinearScalingTerms,
112    //      eagerEvaluation: false
113    //    );
114       
115    //    tf.compat.v1.disable_eager_execution();
116    //    prediction = converter.EvaluateNode(tree.Root.GetSubtree(0));
117    //    return true;
118    //  } catch (NotSupportedException) {
119    //    prediction = null;
120    //    return false;
121    //  }
122    //}
123
124
125    private readonly IDataAnalysisProblemData problemData;
126    private readonly List<int> rows;
127
128    private readonly Dictionary<string, Tensor> inputFeatures = new Dictionary<string, Tensor>();
129    private readonly Dictionary<ISymbolicExpressionTreeNode, ResourceVariable[]> variables = new Dictionary<ISymbolicExpressionTreeNode, ResourceVariable[]>();
130
131    private readonly bool makeVariableWeightsVariable;
132    private readonly bool addLinearScalingTerms;
133    private readonly bool eagerEvaluation;
134
135    private TreeToTensorConverter(
136      IDataAnalysisProblemData problemData, List<int> rows,
137      bool makeVariableWeightsVariable, bool addLinearScalingTerms,
138      bool eagerEvaluation 
139    ) {
140      this.problemData = problemData;
141      this.rows = rows;
142
143      this.makeVariableWeightsVariable = makeVariableWeightsVariable;
144      this.addLinearScalingTerms = addLinearScalingTerms;
145      this.eagerEvaluation = eagerEvaluation;
146    }
147
148    private TreeToTensorConverter(
149      Dictionary<string, Tensor> inputFeatures, Dictionary<ISymbolicExpressionTreeNode, ResourceVariable[]> variables,
150      bool makeVariableWeightsVariable, bool addLinearScalingTerms,
151      bool eagerEvaluation
152    ) {
153      this.inputFeatures = inputFeatures;
154      this.variables = variables;
155
156      this.makeVariableWeightsVariable = makeVariableWeightsVariable;
157      this.addLinearScalingTerms = addLinearScalingTerms;
158      this.eagerEvaluation = eagerEvaluation;
159    }
160   
161    private static Tensor InputFeatureToTensor(string var, IDataAnalysisProblemData problemData, List<int> rows) {
162      if (problemData.Dataset.VariableHasType<double>(var)) {
163        var data = problemData.Dataset.GetDoubleValues(var, rows).Select(x => (float)x).ToArray();
164        return tf.convert_to_tensor(np.array(data).reshape(new Shape(rows.Count, 1)), DataType);
165      } else if (problemData.Dataset.VariableHasType<DoubleVector>(var)) {
166        var data = problemData.Dataset.GetDoubleVectorValues(var, rows).SelectMany(x => x.Select(y => (float)y)).ToArray();
167        return tf.convert_to_tensor(np.array(data).reshape(new Shape(rows.Count, -1)), DataType);
168      } else throw new NotSupportedException($"Type of the variable is not supported: {var}");
169    }
170    private static Tensor InputFeatureToPlaceholder(string var, IDataAnalysisProblemData problemData, List<int> rows) {
171      if (problemData.Dataset.VariableHasType<double>(var)) {
172        return tf.placeholder(DataType, new Shape(rows.Count, 1), name: var);
173      } else if (problemData.Dataset.VariableHasType<DoubleVector>(var)) {
174        //var vectorLength = problemData.Dataset.GetDoubleVectorValues(var, rows).Select(v => v.Count).Distinct().Single();
175        var vectorLength = problemData.Dataset.GetDoubleVectorValue(var, rows[0]).Count;
176        return tf.placeholder(DataType, new Shape(rows.Count, vectorLength), name: var);
177      } else throw new NotSupportedException($"Type of the variable is not supported: {var}");
178    }
179
180    private void PrepareNode(ISymbolicExpressionTreeNode node) {
181      if (node.Symbol is Constant ) {
182        var constantNode = (ConstantTreeNode)node;
183        var value = (float)constantNode.Value;
184        var value_arr = np.array(value).reshape(new Shape(1, 1));
185        var c = tf.Variable(value_arr, name: $"c_{variables.Count}", dtype: DataType);
186        variables.Add(node, new[] { c });
187      } else if (node.Symbol is Variable) {
188        var varNode = (VariableTreeNodeBase)node;
189        if (makeVariableWeightsVariable) {
190          var w_arr = np.array((float)varNode.Weight).reshape(new Shape(1, 1));
191          var w = tf.Variable(w_arr, name: $"w_{varNode.VariableName}", dtype: DataType);
192          variables.Add(node, new[] { w });
193        }
194        if (!inputFeatures.ContainsKey(varNode.VariableName)) {
195          inputFeatures.Add(
196            varNode.VariableName,
197            eagerEvaluation
198              ? InputFeatureToTensor(varNode.VariableName, problemData, rows)
199              : InputFeatureToPlaceholder(varNode.VariableName, problemData, rows));
200        }
201      } else if (node.Symbol is StartSymbol) {
202        if (addLinearScalingTerms) {
203          var alpha_arr = np.array(1.0f).reshape(new Shape(1, 1));
204          var alpha = tf.Variable(alpha_arr, name: "alpha", dtype: DataType);
205          var beta_arr = np.array(0.0f).reshape(new Shape(1, 1));
206          var beta = tf.Variable(beta_arr, name: "beta", dtype: DataType);
207          variables.Add(node, new[] { beta, alpha });
208        }
209      }
210
211      foreach (var subTree in node.Subtrees) {
212        PrepareNode(subTree);
213      }
214    }
215
216
217    private Tensor EvaluateNode(ISymbolicExpressionTreeNode node) {
218      if (node.Symbol is Constant) {
219        return variables[node][0];
220      }
221
222      if (node.Symbol is Variable/* || node.Symbol is BinaryFactorVariable*/) {
223        var varNode = node as VariableTreeNodeBase;
224       
225        var par = inputFeatures[varNode.VariableName]; // eager or placeholder
226        if (makeVariableWeightsVariable) {
227          var w = variables[node][0];
228          return w * par;
229        } else {
230          return varNode.Weight * par;
231        }
232      }
233
234      //if (node.Symbol is FactorVariable) {
235      //  var factorVarNode = node as FactorVariableTreeNode;
236      //  var products = new List<Tensor>();
237      //  foreach (var variableValue in factorVarNode.Symbol.GetVariableValues(factorVarNode.VariableName)) {
238      //    //var par = FindOrCreateParameter(parameters, factorVarNode.VariableName, variableValue);
239      //    var par = tf.placeholder(DataType, new TensorShape(numRows, 1), name: factorVarNode.VariableName);
240      //    parameters.Add(par, factorVarNode.VariableName);
241
242      //    var value = factorVarNode.GetValue(variableValue);
243      //    //initialConstants.Add(value);
244      //    var wVar = (RefVariable)tf.VariableV1(value, name: $"f_{factorVarNode.VariableName}_{variables.Count}", dtype: DataType, shape: new[] { 1, 1 });
245      //    //var wVar = tf.Variable(value, name: $"f_{factorVarNode.VariableName}_{variables.Count}"/*, shape: new[] { 1, 1 }*/);
246      //    variables.Add(wVar);
247
248      //    products.add(wVar * par);
249      //  }
250
251      //  return products.Aggregate((a, b) => a + b);
252      //}
253
254      if (node.Symbol is Addition) {
255        var terms = node.Subtrees.Select(EvaluateNode).ToList();
256        if (terms.Count == 1) return terms[0];
257        return terms.Aggregate((a, b) => a + b);
258      }
259
260      if (node.Symbol is Subtraction) {
261        var terms = node.Subtrees.Select(EvaluateNode).ToList();
262        if (terms.Count == 1) return -terms[0];
263        return terms.Aggregate((a, b) => a - b);
264      }
265
266      if (node.Symbol is Multiplication) {
267        var terms = node.Subtrees.Select(EvaluateNode).ToList();
268        if (terms.Count == 1) return terms[0];
269        return terms.Aggregate((a, b) => a * b);
270      }
271
272      if (node.Symbol is Division) {
273        var terms = node.Subtrees.Select(EvaluateNode).ToList();
274        //if (terms.Count == 1) return 1.0f / terms[0];
275        if (terms.Count == 1) return 1.0 / terms[0];
276        return terms.Aggregate((a, b) => a / b);
277      }
278
279      if (node.Symbol is Absolute) {
280        var x1 = EvaluateNode(node.GetSubtree(0));
281        return tf.abs(x1);
282      }
283
284      if (node.Symbol is AnalyticQuotient) {
285        var x1 = EvaluateNode(node.GetSubtree(0));
286        var x2 = EvaluateNode(node.GetSubtree(1));
287        return x1 / tf.pow(1.0f + x2 * x2, 0.5f);
288        //return x1 / tf.pow(1.0 + x2 * x2, 0.5);
289      }
290
291      if (node.Symbol is Logarithm) {
292        return tf.log(
293          EvaluateNode(node.GetSubtree(0)));
294      }
295
296      if (node.Symbol is Exponential) {
297        return tf.pow(
298          (float)Math.E,
299          //Math.E,
300          EvaluateNode(node.GetSubtree(0)));
301      }
302
303      if (node.Symbol is Square) {
304        return tf.square(
305          EvaluateNode(node.GetSubtree(0)));
306      }
307
308      if (node.Symbol is SquareRoot) {
309        return tf.sqrt(
310          EvaluateNode(node.GetSubtree(0)));
311      }
312
313      if (node.Symbol is Cube) {
314        return tf.pow(
315          EvaluateNode(node.GetSubtree(0)), 3.0f);
316        //ConvertNode(node.GetSubtree(0)), 3.0);
317      }
318
319      if (node.Symbol is CubeRoot) {
320        return tf.pow(
321          EvaluateNode(node.GetSubtree(0)), 1.0f / 3.0f);
322        //ConvertNode(node.GetSubtree(0)), 1.0 / 3.0);
323        // TODO
324        // f: x < 0 ? -Math.Pow(-x, 1.0 / 3) : Math.Pow(x, 1.0 / 3),
325        // g:  { var cbrt_x = x < 0 ? -Math.Pow(-x, 1.0 / 3) : Math.Pow(x, 1.0 / 3); return 1.0 / (3 * cbrt_x * cbrt_x); }
326      }
327
328      if (node.Symbol is Sine) {
329        return tf.sin(
330          EvaluateNode(node.GetSubtree(0)));
331      }
332
333      if (node.Symbol is Cosine) {
334        return tf.cos(
335          EvaluateNode(node.GetSubtree(0)));
336      }
337
338      if (node.Symbol is Tangent) {
339        return tf.tan(
340          EvaluateNode(node.GetSubtree(0)));
341      }
342
343      if (node.Symbol is Mean) {
344        return tf.reduce_mean(
345          EvaluateNode(node.GetSubtree(0)),
346          axis: new[] { 1 },
347          keepdims: true);
348      }
349
350      if (node.Symbol is StandardDeviation) {
351        return tf.reduce_std(
352          EvaluateNode(node.GetSubtree(0)),
353          axis: new[] { 1 },
354          keepdims: true
355        );
356      }
357
358      if (node.Symbol is Variance) {
359        return tf.reduce_variance(
360          EvaluateNode(node.GetSubtree(0)),
361          axis: new[] { 1 } ,
362          keepdims: true
363        );
364      }
365
366      if (node.Symbol is Sum) {
367        return tf.reduce_sum(
368          EvaluateNode(node.GetSubtree(0)),
369          axis: new[] { 1 },
370          keepdims: true);
371      }
372
373      if (node.Symbol is SubVector) {
374        var tensor = EvaluateNode(node.GetSubtree(0));
375        int rows = (int)tensor.shape[0], vectorLength = (int)tensor.shape[1];
376        var windowedNode = (IWindowedSymbolTreeNode)node;
377        int startIdx = SymbolicDataAnalysisExpressionTreeVectorInterpreter.ToVectorIdx(windowedNode.Offset, vectorLength);
378        int endIdx = SymbolicDataAnalysisExpressionTreeVectorInterpreter.ToVectorIdx(windowedNode.Length, vectorLength);
379        var slices = SymbolicDataAnalysisExpressionTreeVectorInterpreter.GetVectorSlices(startIdx, endIdx, vectorLength);
380
381        var segments = new List<Tensor>();
382        foreach (var (start, count) in slices) {
383          segments.Add(tensor[new Slice(), new Slice(start, start + count)]);
384        }
385        return tf.concat(segments, axis: 1);
386      }
387
388
389      if (node.Symbol is StartSymbol) {
390        Tensor prediction = EvaluateNode(node.GetSubtree(0));
391
392        if (prediction.rank != 2 && prediction.shape[1] != 1)
393          throw new InvalidOperationException("Prediction must be a rank 1 (single value per row).");
394
395        prediction = tf.reshape(prediction, new Shape(-1));
396
397        if (addLinearScalingTerms) {
398          var vars = variables[node];
399          Tensor alpha = vars[1], beta = vars[0];
400          return prediction * alpha + beta;
401        } else {
402          return prediction;
403        }
404      }
405
406      throw new NotSupportedException($"Node symbol {node.Symbol} is not supported.");
407    }
408
409    public static bool IsCompatible(ISymbolicExpressionTree tree) {
410      var containsUnknownSymbol = (
411        from n in tree.Root.GetSubtree(0).IterateNodesPrefix()
412        where
413          !(n.Symbol is Variable) &&
414          //!(n.Symbol is BinaryFactorVariable) &&
415          //!(n.Symbol is FactorVariable) &&
416          !(n.Symbol is Constant) &&
417          !(n.Symbol is Addition) &&
418          !(n.Symbol is Subtraction) &&
419          !(n.Symbol is Multiplication) &&
420          !(n.Symbol is Division) &&
421          !(n.Symbol is Logarithm) &&
422          !(n.Symbol is Exponential) &&
423          !(n.Symbol is SquareRoot) &&
424          !(n.Symbol is Square) &&
425          !(n.Symbol is Sine) &&
426          !(n.Symbol is Cosine) &&
427          !(n.Symbol is Tangent) &&
428          !(n.Symbol is HyperbolicTangent) &&
429          !(n.Symbol is Erf) &&
430          !(n.Symbol is Norm) &&
431          !(n.Symbol is StartSymbol) &&
432          !(n.Symbol is Absolute) &&
433          !(n.Symbol is AnalyticQuotient) &&
434          !(n.Symbol is Cube) &&
435          !(n.Symbol is CubeRoot) &&
436          !(n.Symbol is Mean) &&
437          !(n.Symbol is StandardDeviation) &&
438          !(n.Symbol is Variance) &&
439          !(n.Symbol is Sum) &&
440          !(n.Symbol is SubVector)
441        select n).Any();
442      return !containsUnknownSymbol;
443    }
444  }
445}
Note: See TracBrowser for help on using the repository browser.