Changeset 8704


Ignore:
Timestamp:
09/27/12 12:58:46 (7 years ago)
Author:
gkronber
Message:

#1962 changed SymbolicRegressionConstantOptimizationEvaluator to calculate gradients using AutoDiff.

Location:
trunk/sources
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views/3.4/InteractiveSymbolicRegressionSolutionSimplifierView.cs

    r8664 r8704  
    124124      var model = Content.Model;
    125125      SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants(Content.Model.Interpreter, Content.Model.SymbolicExpressionTree, Content.ProblemData, Content.ProblemData.TrainingIndices,
    126         applyLinearScaling: true, improvement: 0.001, iterations: 0, differentialStep: 0.0001, upperEstimationLimit: model.UpperEstimationLimit, lowerEstimationLimit: model.LowerEstimationLimit);
     126        applyLinearScaling: true, maxIterations: 50, upperEstimationLimit: model.UpperEstimationLimit, lowerEstimationLimit: model.LowerEstimationLimit);
    127127      UpdateModel(Content.Model.SymbolicExpressionTree);
    128128    }
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression-3.4.csproj

    r8600 r8704  
    9797      <Private>False</Private>
    9898    </Reference>
     99    <Reference Include="AutoDiff-1.0, Version=1.0.4652.23360, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
     100      <HintPath>..\..\bin\AutoDiff-1.0.dll</HintPath>
     101      <Private>False</Private>
     102    </Reference>
    99103    <Reference Include="System" />
    100104    <Reference Include="System.Core">
     
    241245  -->
    242246  <PropertyGroup>
    243    <PreBuildEvent Condition=" '$(OS)' == 'Windows_NT' ">set Path=%25Path%25;$(ProjectDir);$(SolutionDir)
     247    <PreBuildEvent Condition=" '$(OS)' == 'Windows_NT' ">set Path=%25Path%25;$(ProjectDir);$(SolutionDir)
    244248set ProjectDir=$(ProjectDir)
    245249set SolutionDir=$(SolutionDir)
     
    248252call PreBuildEvent.cmd
    249253</PreBuildEvent>
    250 <PreBuildEvent Condition=" '$(OS)' != 'Windows_NT' ">
     254    <PreBuildEvent Condition=" '$(OS)' != 'Windows_NT' ">
    251255export ProjectDir=$(ProjectDir)
    252256export SolutionDir=$(SolutionDir)
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/Plugin.cs.frame

    r8452 r8704  
    2929  [PluginFile("HeuristicLab.Problems.DataAnalysis.Symbolic.Regression-3.4.dll", PluginFileType.Assembly)]
    3030  [PluginDependency("HeuristicLab.ALGLIB", "3.6.0")]
     31  [PluginDependency("HeuristicLab.AutoDiff", "1.0")]
    3132  [PluginDependency("HeuristicLab.Analysis", "3.3")]
    3233  [PluginDependency("HeuristicLab.Common", "3.3")]
  • trunk/sources/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/SymbolicRegressionConstantOptimizationEvaluator.cs

    r8664 r8704  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324using System.Linq;
     25using AutoDiff;
    2426using HeuristicLab.Common;
    2527using HeuristicLab.Core;
     
    100102    public override IOperation Apply() {
    101103      AddResults();
    102       int seed = RandomParameter.ActualValue.Next();
    103104      var solution = SymbolicExpressionTreeParameter.ActualValue;
    104105      double quality;
     
    106107        IEnumerable<int> constantOptimizationRows = GenerateRowsToEvaluate(ConstantOptimizationRowsPercentage.Value);
    107108        quality = OptimizeConstants(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, ProblemDataParameter.ActualValue,
    108            constantOptimizationRows, ApplyLinearScalingParameter.ActualValue.Value, ConstantOptimizationImprovement.Value, ConstantOptimizationIterations.Value, 0.001,
     109           constantOptimizationRows, ApplyLinearScalingParameter.ActualValue.Value, ConstantOptimizationIterations.Value,
    109110           EstimationLimitsParameter.ActualValue.Upper, EstimationLimitsParameter.ActualValue.Lower,
    110111          EvaluatedTreesParameter.ActualValue, EvaluatedTreeNodesParameter.ActualValue);
     
    157158
    158159    public static double OptimizeConstants(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree tree, IRegressionProblemData problemData,
    159       IEnumerable<int> rows, bool applyLinearScaling, double improvement, int iterations, double differentialStep, double upperEstimationLimit = double.MaxValue, double lowerEstimationLimit = double.MinValue, IntValue evaluatedTrees = null, IntValue evaluatedTreeNodes = null) {
     160      IEnumerable<int> rows, bool applyLinearScaling, int maxIterations, double upperEstimationLimit = double.MaxValue, double lowerEstimationLimit = double.MinValue, IntValue evaluatedTrees = null, IntValue evaluatedTreeNodes = null) {
     161
     162      List<AutoDiff.Variable> variables = new List<AutoDiff.Variable>();
     163      List<AutoDiff.Variable> parameters = new List<AutoDiff.Variable>();
     164      List<string> variableNames = new List<string>();
     165
     166      AutoDiff.Term func;
     167      if (!TryTransformToAutoDiff(tree.Root.GetSubtree(0), variables, parameters, variableNames, out func)) return 0.0;
     168      if (variableNames.Count == 0) return 0.0;
     169
     170      AutoDiff.IParametricCompiledTerm compiledFunc = AutoDiff.TermUtils.Compile(func, variables.ToArray(), parameters.ToArray());
     171
    160172      List<SymbolicExpressionTreeTerminalNode> terminalNodes = tree.Root.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>().ToList();
    161       double[] c = new double[terminalNodes.Count];
    162       int treeLength = tree.Length;
    163 
    164       //extract inital constants
    165       for (int i = 0; i < terminalNodes.Count; i++) {
    166         ConstantTreeNode constantTreeNode = terminalNodes[i] as ConstantTreeNode;
    167         if (constantTreeNode != null) c[i] = constantTreeNode.Value;
    168         VariableTreeNode variableTreeNode = terminalNodes[i] as VariableTreeNode;
    169         if (variableTreeNode != null) c[i] = variableTreeNode.Weight;
    170       }
    171 
    172       double epsg = 0;
    173       double epsf = improvement;
    174       double epsx = 0;
    175       int maxits = iterations;
    176       double diffstep = differentialStep;
    177 
    178       alglib.minlmstate state;
    179       alglib.minlmreport report;
    180 
    181       alglib.minlmcreatev(1, c, diffstep, out state);
    182       alglib.minlmsetcond(state, epsg, epsf, epsx, maxits);
    183       alglib.minlmoptimize(state, CreateCallBack(interpreter, tree, problemData, rows, applyLinearScaling, upperEstimationLimit, lowerEstimationLimit, treeLength, evaluatedTrees, evaluatedTreeNodes), null, terminalNodes);
    184       alglib.minlmresults(state, out c, out report);
    185 
    186       for (int i = 0; i < c.Length; i++) {
    187         ConstantTreeNode constantTreeNode = terminalNodes[i] as ConstantTreeNode;
    188         if (constantTreeNode != null) constantTreeNode.Value = c[i];
    189         VariableTreeNode variableTreeNode = terminalNodes[i] as VariableTreeNode;
    190         if (variableTreeNode != null) variableTreeNode.Weight = c[i];
    191       }
    192 
    193       return (state.fi[0] - 1) * -1;
    194     }
    195 
    196     private static alglib.ndimensional_fvec CreateCallBack(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling, double upperEstimationLimit, double lowerEstimationLimit, int treeLength, IntValue evaluatedTrees = null, IntValue evaluatedTreeNodes = null) {
    197       return (double[] arg, double[] fi, object obj) => {
    198         // update constants of tree
    199         List<SymbolicExpressionTreeTerminalNode> terminalNodes = (List<SymbolicExpressionTreeTerminalNode>)obj;
    200         for (int i = 0; i < terminalNodes.Count; i++) {
    201           ConstantTreeNode constantTreeNode = terminalNodes[i] as ConstantTreeNode;
    202           if (constantTreeNode != null) constantTreeNode.Value = arg[i];
    203           VariableTreeNode variableTreeNode = terminalNodes[i] as VariableTreeNode;
    204           if (variableTreeNode != null) variableTreeNode.Weight = arg[i];
    205         }
    206 
    207         double quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);
    208 
    209         fi[0] = 1 - quality;
    210         if (evaluatedTrees != null) evaluatedTrees.Value++;
    211         if (evaluatedTreeNodes != null) evaluatedTreeNodes.Value += treeLength;
     173      double[] c = new double[variables.Count];
     174
     175      {
     176        c[0] = 0.0;
     177        c[1] = 1.0;
     178        //extract inital constants
     179        int i = 2;
     180        foreach (var node in terminalNodes) {
     181          ConstantTreeNode constantTreeNode = node as ConstantTreeNode;
     182          VariableTreeNode variableTreeNode = node as VariableTreeNode;
     183          if (constantTreeNode != null)
     184            c[i++] = constantTreeNode.Value;
     185          else if (variableTreeNode != null && !variableTreeNode.Weight.IsAlmost(1.0))
     186            c[i++] = variableTreeNode.Weight;
     187        }
     188      }
     189
     190      alglib.lsfitstate state;
     191      alglib.lsfitreport rep;
     192      int info;
     193
     194      Dataset ds = problemData.Dataset;
     195      double[,] x = new double[rows.Count(), variableNames.Count];
     196      int row = 0;
     197      foreach (var r in rows) {
     198        for (int col = 0; col < variableNames.Count; col++) {
     199          x[row, col] = ds.GetDoubleValue(variableNames[col], r);
     200        }
     201        row++;
     202      }
     203      double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray();
     204      int n = x.GetLength(0);
     205      int m = x.GetLength(1);
     206      int k = c.Length;
     207
     208      alglib.ndimensional_pfunc function_cx_1_func = CreatePFunc(compiledFunc);
     209      alglib.ndimensional_pgrad function_cx_1_grad = CreatePGrad(compiledFunc);
     210
     211      try {
     212        alglib.lsfitcreatefg(x, y, c, n, m, k, false, out state);
     213        alglib.lsfitsetcond(state, 0, 0, maxIterations);
     214        alglib.lsfitfit(state, function_cx_1_func, function_cx_1_grad, null, null);
     215        alglib.lsfitresults(state, out info, out c, out rep);
     216
     217      }
     218      catch (alglib.alglibexception) {
     219        return 0.0;
     220      }
     221      {
     222        // only when no error occurred
     223        // set constants in tree
     224        int i = 2;
     225        foreach (var node in terminalNodes) {
     226          ConstantTreeNode constantTreeNode = node as ConstantTreeNode;
     227          VariableTreeNode variableTreeNode = node as VariableTreeNode;
     228          if (constantTreeNode != null)
     229            constantTreeNode.Value = c[i++];
     230          else if (variableTreeNode != null && !variableTreeNode.Weight.IsAlmost(1.0))
     231            variableTreeNode.Weight = c[i++];
     232        }
     233      }
     234
     235      return SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);
     236    }
     237
     238    private static alglib.ndimensional_pfunc CreatePFunc(AutoDiff.IParametricCompiledTerm compiledFunc) {
     239      return (double[] c, double[] x, ref double func, object o) => {
     240        func = compiledFunc.Evaluate(c, x);
    212241      };
    213242    }
    214243
     244    private static alglib.ndimensional_pgrad CreatePGrad(AutoDiff.IParametricCompiledTerm compiledFunc) {
     245      return (double[] c, double[] x, ref double func, double[] grad, object o) => {
     246        var tupel = compiledFunc.Differentiate(c, x);
     247        func = tupel.Item2;
     248        Array.Copy(tupel.Item1, grad, grad.Length);
     249      };
     250    }
     251
     252    private static bool TryTransformToAutoDiff(ISymbolicExpressionTreeNode node, List<AutoDiff.Variable> variables, List<AutoDiff.Variable> parameters, List<string> variableNames, out AutoDiff.Term term) {
     253      if (node.Symbol is Constant) {
     254        var var = new AutoDiff.Variable();
     255        variables.Add(var);
     256        term = var;
     257        return true;
     258      }
     259      if (node.Symbol is Variable) {
     260        // don't tune weights with a value of 1.0 because it was probably set by the simplifier
     261        var varNode = node as VariableTreeNode;
     262        var par = new AutoDiff.Variable();
     263        parameters.Add(par);
     264        variableNames.Add(varNode.VariableName);
     265        if (!varNode.Weight.IsAlmost(1.0)) {
     266          var w = new AutoDiff.Variable();
     267          variables.Add(w);
     268          term = AutoDiff.TermBuilder.Product(w, par);
     269        } else {
     270          term = par;
     271        }
     272        return true;
     273      }
     274      if (node.Symbol is Addition) {
     275        List<AutoDiff.Term> terms = new List<Term>();
     276        foreach (var subTree in node.Subtrees) {
     277          AutoDiff.Term t;
     278          if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, out t)) {
     279            term = null;
     280            return false;
     281          }
     282          terms.Add(t);
     283        }
     284        term = AutoDiff.TermBuilder.Sum(terms);
     285        return true;
     286      }
     287      if (node.Symbol is Multiplication) {
     288        AutoDiff.Term a, b;
     289        if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, out a) ||
     290          !TryTransformToAutoDiff(node.GetSubtree(1), variables, parameters, variableNames, out b)) {
     291          term = null;
     292          return false;
     293        } else {
     294          List<AutoDiff.Term> factors = new List<Term>();
     295          foreach (var subTree in node.Subtrees.Skip(2)) {
     296            AutoDiff.Term f;
     297            if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, out f)) {
     298              term = null;
     299              return false;
     300            }
     301            factors.Add(f);
     302          }
     303          term = AutoDiff.TermBuilder.Product(a, b, factors.ToArray());
     304          return true;
     305        }
     306      }
     307      if (node.Symbol is Division) {
     308        // only works for at least two subtrees
     309        AutoDiff.Term a, b;
     310        if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, out a) ||
     311          !TryTransformToAutoDiff(node.GetSubtree(1), variables, parameters, variableNames, out b)) {
     312          term = null;
     313          return false;
     314        } else {
     315          List<AutoDiff.Term> factors = new List<Term>();
     316          foreach (var subTree in node.Subtrees.Skip(2)) {
     317            AutoDiff.Term f;
     318            if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, out f)) {
     319              term = null;
     320              return false;
     321            }
     322            factors.Add(1.0 / f);
     323          }
     324          term = AutoDiff.TermBuilder.Product(a, 1.0 / b, factors.ToArray());
     325          return true;
     326        }
     327      }
     328      if (node.Symbol is Logarithm) {
     329        AutoDiff.Term t;
     330        if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, out t)) {
     331          term = null;
     332          return false;
     333        } else {
     334          term = AutoDiff.TermBuilder.Log(t);
     335          return true;
     336        }
     337      }
     338      if (node.Symbol is Exponential) {
     339        AutoDiff.Term t;
     340        if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, out t)) {
     341          term = null;
     342          return false;
     343        } else {
     344          term = AutoDiff.TermBuilder.Exp(t);
     345          return true;
     346        }
     347      }
     348      if (node.Symbol is StartSymbol) {
     349        var alpha = new AutoDiff.Variable();
     350        var beta = new AutoDiff.Variable();
     351        variables.Add(beta);
     352        variables.Add(alpha);
     353        AutoDiff.Term branchTerm;
     354        if (TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, out branchTerm)) {
     355          term = branchTerm * alpha + beta;
     356          return true;
     357        } else {
     358          term = null;
     359          return false;
     360        }
     361      }
     362      term = null;
     363      return false;
     364    }
    215365  }
    216366}
Note: See TracChangeset for help on using the changeset viewer.