Changeset 17489


Ignore:
Timestamp:
04/01/20 15:49:03 (11 months ago)
Author:
pfleck
Message:

#3040 Added version with explicit array shapes for explicit broadcasting.

Location:
branches/3040_VectorBasedGP
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views/3.4/InteractiveSymbolicRegressionSolutionSimplifierView.cs

    r17472 r17489  
    4343
    4444      var tree = Content?.Model?.SymbolicExpressionTree;
    45       btnOptimizeConstants.Enabled = tree != null && NonlinearLeastSquaresConstantOptimizationEvaluator.CanOptimizeConstants(tree);
     45      //btnOptimizeConstants.Enabled = tree != null && NonlinearLeastSquaresConstantOptimizationEvaluator.CanOptimizeConstants(tree);
     46      btnOptimizeConstants.Enabled = tree != null && TensorFlowConstantOptimizationEvaluator.CanOptimizeConstants(tree);
    4647    }
    4748
     
    6566      do {
    6667        prevResult = result;
    67         tree = NonlinearLeastSquaresConstantOptimizationEvaluator.OptimizeTree(tree, regressionProblemData, regressionProblemData.TrainingIndices,
    68           applyLinearScaling: true, maxIterations: constOptIterations, updateVariableWeights: true,
    69           cancellationToken: cancellationToken, iterationCallback: (args, func, obj) => {
    70             double newProgressValue = progress.ProgressValue + (1.0 / (constOptIterations + 2) / maxRepetitions); // (constOptIterations + 2) iterations are reported
    71             progress.ProgressValue = Math.Min(newProgressValue, 1.0);
    72           });
     68        //tree = NonlinearLeastSquaresConstantOptimizationEvaluator.OptimizeTree(tree, regressionProblemData, regressionProblemData.TrainingIndices,
     69        //  applyLinearScaling: true, maxIterations: constOptIterations, updateVariableWeights: true,
     70        //  cancellationToken: cancellationToken, iterationCallback: (args, func, obj) => {
     71        //    double newProgressValue = progress.ProgressValue + (1.0 / (constOptIterations + 2) / maxRepetitions); // (constOptIterations + 2) iterations are reported
     72        //    progress.ProgressValue = Math.Min(newProgressValue, 1.0);
     73        //  });
     74        tree = TensorFlowConstantOptimizationEvaluator.OptimizeTree(tree, regressionProblemData, regressionProblemData.TrainingIndices,
     75          applyLinearScaling: true, updateVariableWeights: true, maxIterations: 10, learningRate: 0.001);
    7376        result = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(model.Interpreter, tree,
    7477          model.LowerEstimationLimit, model.UpperEstimationLimit, regressionProblemData, regressionProblemData.TrainingIndices, applyLinearScaling: true);
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/TensorFlowConstantOptimizationEvaluator.cs

    r17476 r17489  
    1919 */
    2020#endregion
     21
     22#define EXPLICIT_SHAPE
    2123
    2224using System;
     
    9496      CancellationToken cancellationToken = default(CancellationToken), EvaluationsCounter counter = null) {
    9597
    96       var vectorVariables = tree.IterateNodesBreadth()
    97         .OfType<VariableTreeNodeBase>()
    98         .Where(node => problemData.Dataset.VariableHasType<DoubleVector>(node.VariableName))
    99         .Select(node => node.VariableName);
    100 
    101       int? vectorLength = null;
    102       if (vectorVariables.Any()) {
    103         vectorLength = vectorVariables.Select(var => problemData.Dataset.GetDoubleVectorValues(var, rows)).First().First().Count;
    104       }
    10598      int numRows = rows.Count();
     99      var variableLengths = problemData.AllowedInputVariables.ToDictionary(
     100        var => var,
     101        var => {
     102          if (problemData.Dataset.VariableHasType<double>(var)) return 1;
     103          if (problemData.Dataset.VariableHasType<DoubleVector>(var)) return problemData.Dataset.GetDoubleVectorValue(var, 0).Count;
     104          throw new NotSupportedException($"Type of variable {var} is not supported.");
     105        });
    106106
    107107      bool success = TreeToTensorConverter.TryConvert(tree,
    108         numRows, vectorLength,
     108        numRows, variableLengths,
    109109        updateVariableWeights, applyLinearScaling,
    110110        out Tensor prediction,
    111111        out Dictionary<Tensor, string> parameters, out List<Tensor> variables/*, out double[] initialConstants*/);
    112112
    113       var target = tf.placeholder(tf.float64, name: problemData.TargetVariable);
    114       int samples = rows.Count();
     113#if EXPLICIT_SHAPE
     114      var target = tf.placeholder(tf.float64, new TensorShape(numRows, 1), name: problemData.TargetVariable);
     115#endif
    115116      // mse
    116       var costs = tf.reduce_sum(tf.square(prediction - target)) / (2.0 * samples);
     117      var costs = tf.reduce_sum(tf.square(target - prediction)) / (2.0 * numRows);
    117118      var optimizer = tf.train.GradientDescentOptimizer((float)learningRate).minimize(costs);
    118119
     
    124125        if (problemData.Dataset.VariableHasType<double>(variableName)) {
    125126          var data = problemData.Dataset.GetDoubleValues(variableName, rows).ToArray();
    126           if (vectorLength.HasValue) {
    127             var vectorData = new double[numRows][];
    128             for (int i = 0; i < numRows; i++)
    129               vectorData[i] = Enumerable.Repeat(data[i], vectorLength.Value).ToArray();
    130             variablesFeed.Add(variable, np.array(vectorData));
    131           } else
    132             variablesFeed.Add(variable, np.array(data, copy: false));
     127          //if (vectorLength.HasValue) {
     128          //  var vectorData = new double[numRows][];
     129          //  for (int i = 0; i < numRows; i++)
     130          //    vectorData[i] = Enumerable.Repeat(data[i], vectorLength.Value).ToArray();
     131          //  variablesFeed.Add(variable, np.array(vectorData));
     132          //} else
     133          variablesFeed.Add(variable, np.array(data, copy: false).reshape(numRows, 1));
    133134          //} else if (problemData.Dataset.VariableHasType<string>(variableName)) {
    134135          //  variablesFeed.Add(variable, problemData.Dataset.GetStringValues(variableName, rows));
     
    140141      }
    141142      var targetData = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows).ToArray();
    142       variablesFeed.Add(target, np.array(targetData, copy: false));
    143 
     143      variablesFeed.Add(target, np.array(targetData, copy: false).reshape(numRows, 1));
    144144
    145145      using (var session = tf.Session()) {
    146146        session.run(tf.global_variables_initializer());
     147
     148        // https://github.com/SciSharp/TensorFlow.NET/wiki/Debugging
     149        tf.train.export_meta_graph(@"C:\temp\TFboard\graph.meta", as_text: false);
    147150
    148151        Trace.WriteLine("Weights:");
  • branches/3040_VectorBasedGP/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Converters/TreeToTensorConverter.cs

    r17476 r17489  
    2020#endregion
    2121
     22#define EXPLICIT_SHAPE
     23
    2224using System;
    2325using System.Collections.Generic;
    2426using System.Linq;
    2527using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
     28using NumSharp;
    2629using Tensorflow;
    2730using static Tensorflow.Binding;
     
    5457    #endregion
    5558
    56     public static bool TryConvert(ISymbolicExpressionTree tree, int numRows, int? vectorLength,
     59    public static bool TryConvert(ISymbolicExpressionTree tree, int numRows, Dictionary<string, int> variableLengths,
    5760      bool makeVariableWeightsVariable, bool addLinearScalingTerms,
    5861      out Tensor graph, out Dictionary<Tensor, string> parameters, out List<Tensor> variables
     
    6063
    6164      try {
    62         var converter = new TreeToTensorConverter(numRows, vectorLength, makeVariableWeightsVariable, addLinearScalingTerms);
     65        var converter = new TreeToTensorConverter(numRows, variableLengths, makeVariableWeightsVariable, addLinearScalingTerms);
    6366        graph = converter.ConvertNode(tree.Root.GetSubtree(0));
    6467
     
    7881
    7982    private readonly int numRows;
    80     private readonly int? vectorLength;
     83    private readonly Dictionary<string, int> variableLengths;
    8184    private readonly bool makeVariableWeightsVariable;
    8285    private readonly bool addLinearScalingTerms;
     
    8689    private readonly List<Tensor> variables = new List<Tensor>();
    8790
    88     private TreeToTensorConverter(int numRows, int? vectorLength, bool makeVariableWeightsVariable, bool addLinearScalingTerms) {
     91    private TreeToTensorConverter(int numRows, Dictionary<string, int> variableLengths, bool makeVariableWeightsVariable, bool addLinearScalingTerms) {
    8992      this.numRows = numRows;
    90       this.vectorLength = vectorLength;
     93      this.variableLengths = variableLengths;
    9194      this.makeVariableWeightsVariable = makeVariableWeightsVariable;
    9295      this.addLinearScalingTerms = addLinearScalingTerms;
    9396    }
     97
    9498
    9599
     
    98102        var value = ((ConstantTreeNode)node).Value;
    99103        //initialConstants.Add(value);
    100         var var = tf.Variable(value, name: $"c_{variables.Count}", dtype: tf.float64);
     104#if EXPLICIT_SHAPE
     105        //var var = (RefVariable)tf.VariableV1(value, name: $"c_{variables.Count}", dtype: tf.float64, shape: new[] { 1, 1 });
     106        var value_arr = np.array(value).reshape(1, 1);
     107        var var = tf.Variable(value_arr, name: $"c_{variables.Count}", dtype: tf.float64);
     108#endif
     109        //var var = tf.Variable(value, name: $"c_{variables.Count}", dtype: tf.float64/*, shape: new[] { 1, 1 }*/);
    101110        variables.Add(var);
    102111        return var;
     
    109118        //var varValue = factorVarNode != null ? factorVarNode.VariableValue : string.Empty;
    110119        //var par = FindOrCreateParameter(parameters, varNode.VariableName, varValue);
    111         var shape = vectorLength.HasValue
    112           ? new TensorShape(numRows, vectorLength.Value)
    113           : new TensorShape(numRows);
    114         var par = tf.placeholder(tf.float64, shape: shape, name: varNode.VariableName);
     120#if EXPLICIT_SHAPE
     121        var par = tf.placeholder(tf.float64, new TensorShape(numRows, variableLengths[varNode.VariableName]), name: varNode.VariableName);
     122#endif
    115123        parameters.Add(par, varNode.VariableName);
    116124
    117125        if (makeVariableWeightsVariable) {
    118126          //initialConstants.Add(varNode.Weight);
    119           var w = tf.Variable(varNode.Weight, name: $"w_{varNode.VariableName}_{variables.Count}", dtype: tf.float64);
     127#if EXPLICIT_SHAPE
     128          //var w = (RefVariable)tf.VariableV1(varNode.Weight, name: $"w_{varNode.VariableName}_{variables.Count}", dtype: tf.float64, shape: new[] { 1, 1 });
     129          var w_arr = np.array(varNode.Weight).reshape(1, 1);
     130          var w = tf.Variable(w_arr, name: $"w_{varNode.VariableName}", dtype: tf.float64);
     131#endif
     132          //var w = tf.Variable(varNode.Weight, name: $"w_{varNode.VariableName}_{variables.Count}", dtype: tf.float64/*, shape: new[] { 1, 1 }*/);
    120133          variables.Add(w);
    121134          return w * par;
     
    125138      }
    126139
    127       if (node.Symbol is FactorVariable) {
    128         var factorVarNode = node as FactorVariableTreeNode;
    129         var products = new List<Tensor>();
    130         foreach (var variableValue in factorVarNode.Symbol.GetVariableValues(factorVarNode.VariableName)) {
    131           //var par = FindOrCreateParameter(parameters, factorVarNode.VariableName, variableValue);
    132           var par = tf.placeholder(tf.float64, shape: new TensorShape(numRows), name: factorVarNode.VariableName);
    133           parameters.Add(par, factorVarNode.VariableName);
    134 
    135           var value = factorVarNode.GetValue(variableValue);
    136           //initialConstants.Add(value);
    137           var wVar = tf.Variable(value, name: $"f_{factorVarNode.VariableName}_{variables.Count}");
    138           variables.Add(wVar);
    139 
    140           products.add(wVar * par);
    141         }
    142 
    143         return products.Aggregate((a, b) => a + b);
    144       }
     140      //if (node.Symbol is FactorVariable) {
     141      //  var factorVarNode = node as FactorVariableTreeNode;
     142      //  var products = new List<Tensor>();
     143      //  foreach (var variableValue in factorVarNode.Symbol.GetVariableValues(factorVarNode.VariableName)) {
     144      //    //var par = FindOrCreateParameter(parameters, factorVarNode.VariableName, variableValue);
     145      //    var par = tf.placeholder(tf.float64, new TensorShape(numRows, 1), name: factorVarNode.VariableName);
     146      //    parameters.Add(par, factorVarNode.VariableName);
     147
     148      //    var value = factorVarNode.GetValue(variableValue);
     149      //    //initialConstants.Add(value);
     150      //    var wVar = (RefVariable)tf.VariableV1(value, name: $"f_{factorVarNode.VariableName}_{variables.Count}", dtype: tf.float64, shape: new[] { 1, 1 });
     151      //    //var wVar = tf.Variable(value, name: $"f_{factorVarNode.VariableName}_{variables.Count}"/*, shape: new[] { 1, 1 }*/);
     152      //    variables.Add(wVar);
     153
     154      //    products.add(wVar * par);
     155      //  }
     156
     157      //  return products.Aggregate((a, b) => a + b);
     158      //}
    145159
    146160      if (node.Symbol is Addition) {
     
    248262        return tf.reduce_mean(
    249263          ConvertNode(node.GetSubtree(0)),
    250           axis: new[] { 1 });
     264          axis: new[] { 1 },
     265          keepdims: true);
    251266      }
    252267
     
    261276        return tf.reduce_sum(
    262277          ConvertNode(node.GetSubtree(0)),
    263           axis: new[] { 1 });
     278          axis: new[] { 1 },
     279          keepdims: true);
    264280      }
    265281
     
    267283        if (addLinearScalingTerms) {
    268284          // scaling variables α, β are given at the beginning of the parameter vector
    269           var alpha = tf.Variable(1.0, name: $"alpha_{1.0}", dtype: tf.float64);
    270           var beta = tf.Variable(0.0, name: $"beta_{0.0}", dtype: tf.float64);
     285#if EXPLICIT_SHAPE
     286          //var alpha = (RefVariable)tf.VariableV1(1.0, name: $"alpha_{1.0}", dtype: tf.float64, shape: new[] { 1, 1 });
     287          //var beta = (RefVariable)tf.VariableV1(0.0, name: $"beta_{0.0}", dtype: tf.float64, shape: new[] { 1, 1 });
     288
     289          var alpha_arr = np.array(1.0).reshape(1, 1);
     290          var alpha = tf.Variable(alpha_arr, name: $"alpha", dtype: tf.float64);
     291          var beta_arr = np.array(1.0).reshape(1, 1);
     292          var beta = tf.Variable(beta_arr, name: $"beta", dtype: tf.float64);
     293#endif
     294          //var alpha = tf.Variable(1.0, name: $"alpha_{1.0}", dtype: tf.float64/*, shape: new[] { 1, 1 }*/);
     295          //var beta = tf.Variable(0.0, name: $"beta_{0.0}", dtype: tf.float64/*, shape: new[] { 1, 1 }*/);
    271296          variables.Add(alpha);
    272297          variables.Add(beta);
     
    277302
    278303      throw new NotSupportedException($"Node symbol {node.Symbol} is not supported.");
    279     }
    280 
    281     // for each factor variable value we need a parameter which represents a binary indicator for that variable & value combination
    282     // each binary indicator is only necessary once. So we only create a parameter if this combination is not yet available
    283     private static Tensor FindOrCreateParameter(Dictionary<DataForVariable, Tensor> parameters, string varName, string varValue = "") {
    284       var data = new DataForVariable(varName, varValue);
    285 
    286       if (!parameters.TryGetValue(data, out var par)) {
    287         // not found -> create new parameter and entries in names and values lists
    288         par = tf.placeholder(tf.float64, shape: new TensorShape(-1), name: varName);
    289         parameters.Add(data, par);
    290       }
    291       return par;
    292304    }
    293305
Note: See TracChangeset for help on using the changeset viewer.