Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
02/05/15 07:03:15 (9 years ago)
Author:
gkronber
Message:

#2283: constant opt, expressioncompiler, autodiff, fixes in GP solvers

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.SymbReg/SymbolicRegressionProblem.cs

    r11851 r11895  
    11using System;
     2using System.CodeDom.Compiler;
    23using System.Collections.Generic;
     4using System.Diagnostics;
    35using System.Linq;
    46using System.Security;
    57using System.Security.AccessControl;
     8using System.Security.Authentication.ExtendedProtection.Configuration;
    69using System.Text;
     10using AutoDiff;
    711using HeuristicLab.Common;
     12using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    813using HeuristicLab.Problems.DataAnalysis;
    914using HeuristicLab.Problems.Instances;
     
    1217namespace HeuristicLab.Problems.GrammaticalOptimization.SymbReg {
    1318  // provides bridge to HL regression problem instances
    14   public class SymbolicRegressionProblem : IProblem {
     19  public class SymbolicRegressionProblem : ISymbolicExpressionTreeProblem {
    1520
    1621    private const string grammarString = @"
    1722        G(E):
    18         E -> V | V+E | V-E | V*E | V/E | (E) | C | C+E | C-E | C*E | C/E
     23        E -> V | C | V+E | V-E | V*E | V%E | (E) | C+E | C-E | C*E | C%E
    1924        C -> 0..9
    2025        V -> <variables>
     
    2227    // C represents Koza-style ERC (the symbol is the index of the ERC), the values are initialized below
    2328
     29    // S .. Sum (+), N .. Neg. sum (-), P .. Product (*), D .. Division (%)
     30    private const string treeGrammarString = @"
     31        G(E):
     32        E -> V | C | S | N | P | D
     33        S -> EE | EEE
     34        N -> EE | EEE
     35        P -> EE | EEE
     36        D -> EE
     37        C -> 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
     38        V -> <variables>
     39        ";
     40
     41    // when we use constants optimization we can completely ignore all constants by a simple strategy:
     42    // introduce a constant factor for each complete term
     43    // introduce a constant offset for each complete expression (including expressions in brackets)
     44    // e.g. 1*(2*a + b - 3 + 4) is the same as c0*a + c1*b + c2
     45
    2446    private readonly IGrammar grammar;
    2547
    2648    private readonly int N;
    27     private readonly double[][] x;
     49    private readonly double[,] x;
    2850    private readonly double[] y;
    2951    private readonly int d;
     
    4062      this.d = problemData.AllowedInputVariables.Count();
    4163      if (d > 26) throw new NotSupportedException(); // we only allow single-character terminal symbols so far
    42       this.x = new double[N][];
     64      this.x = new double[N, d];
    4365      this.y = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();
    4466
    4567      int i = 0;
    4668      foreach (var r in problemData.TrainingIndices) {
    47         x[i] = new double[d];
    4869        int j = 0;
    4970        foreach (var inputVariable in problemData.AllowedInputVariables) {
    50           x[i][j++] = problemData.Dataset.GetDoubleValue(inputVariable, r);
     71          x[i, j++] = problemData.Dataset.GetDoubleValue(inputVariable, r);
    5172        }
    5273        i++;
     
    5879      char lastVar = Convert.ToChar(Convert.ToByte('a') + d - 1);
    5980      this.grammar = new Grammar(grammarString.Replace("<variables>", firstVar + " .. " + lastVar));
     81      this.TreeBasedGPGrammar = new Grammar(treeGrammarString.Replace("<variables>", firstVar + " .. " + lastVar));
    6082    }
    6183
     
    7193
    7294    public double Evaluate(string sentence) {
     95      return OptimizeConstantsAndEvaluate(sentence);
     96    }
     97
     98    public double SimpleEvaluate(string sentence) {
    7399      var interpreter = new ExpressionInterpreter();
    74       return HeuristicLab.Common.Extensions.RSq(y, Enumerable.Range(0, N).Select(i => interpreter.Interpret(sentence, x[i], erc)));
     100      var rowData = new double[d];
     101      return HeuristicLab.Common.Extensions.RSq(y, Enumerable.Range(0, N).Select(i => {
     102        for (int j = 0; j < d; j++) rowData[j] = x[i, j];
     103        return interpreter.Interpret(sentence, rowData, erc);
     104      }));
    75105    }
    76106
     
    83113    }
    84114
    85     public IEnumerable<Feature> GetFeatures(string phrase)
    86     {
     115    public IEnumerable<Feature> GetFeatures(string phrase) {
    87116      throw new NotImplementedException();
    88117    }
    89118
    90119
    91     /*
    92     public static double OptimizeConstants(string sentence) {
    93 
    94       List<AutoDiff.Variable> variables = new List<AutoDiff.Variable>();
    95       List<AutoDiff.Variable> parameters = new List<AutoDiff.Variable>();
    96       List<string> variableNames = new List<string>();
    97 
     120
     121    public double OptimizeConstantsAndEvaluate(string sentence) {
    98122      AutoDiff.Term func;
    99       if (!TryTransformToAutoDiff(sentence, 0, variables, parameters, out func))
    100         throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree.");
    101       if (variableNames.Count == 0) return 0.0;
    102 
    103       AutoDiff.IParametricCompiledTerm compiledFunc = AutoDiff.TermUtils.Compile(func, variables.ToArray(), parameters.ToArray());
    104 
    105       List<SymbolicExpressionTreeTerminalNode> terminalNodes = tree.Root.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>().ToList();
    106       double[] c = new double[variables.Count];
    107 
    108       {
    109         c[0] = 0.0;
    110         c[1] = 1.0;
    111         //extract inital constants
    112         int i = 2;
    113         foreach (var node in terminalNodes) {
    114           ConstantTreeNode constantTreeNode = node as ConstantTreeNode;
    115           VariableTreeNode variableTreeNode = node as VariableTreeNode;
    116           if (constantTreeNode != null)
    117             c[i++] = constantTreeNode.Value;
    118           else if (variableTreeNode != null)
    119             c[i++] = variableTreeNode.Weight;
    120         }
    121       }
    122       double[] originalConstants = (double[])c.Clone();
    123       double originalQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);
     123      int pos = 0;
     124
     125      var compiler = new ExpressionCompiler();
     126      Variable[] variables;
     127      Variable[] constants;
     128      compiler.Compile(sentence, out func, out variables, out constants);
     129
     130      // constants are manipulated
     131
     132      if (!constants.Any()) return SimpleEvaluate(sentence);
     133
     134      AutoDiff.IParametricCompiledTerm compiledFunc = func.Compile(constants, variables); // variate constants leave variables fixed to data
     135
     136      double[] c = constants.Select(_ => 1.0).ToArray(); // start with ones
    124137
    125138      alglib.lsfitstate state;
     
    127140      int info;
    128141
    129       Dataset ds = problemData.Dataset;
    130       double[,] x = new double[rows.Count(), variableNames.Count];
    131       int row = 0;
    132       foreach (var r in rows) {
    133         for (int col = 0; col < variableNames.Count; col++) {
    134           x[row, col] = ds.GetDoubleValue(variableNames[col], r);
    135         }
    136         row++;
    137       }
    138       double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray();
    139142      int n = x.GetLength(0);
    140143      int m = x.GetLength(1);
     
    144147      alglib.ndimensional_pgrad function_cx_1_grad = CreatePGrad(compiledFunc);
    145148
     149      const int maxIterations = 10;
    146150      try {
    147151        alglib.lsfitcreatefg(x, y, c, n, m, k, false, out state);
     
    151155        alglib.lsfitresults(state, out info, out c, out rep);
    152156      } catch (ArithmeticException) {
    153         return originalQuality;
     157        return 0.0;
    154158      } catch (alglib.alglibexception) {
    155         return originalQuality;
     159        return 0.0;
    156160      }
    157161
    158162      //info == -7  => constant optimization failed due to wrong gradient
    159       if (info != -7) throw new ArgumentException();
     163      if (info == -7) throw new ArgumentException();
     164      {
     165        var rowData = new double[d];
     166        return HeuristicLab.Common.Extensions.RSq(y, Enumerable.Range(0, N).Select(i => {
     167          for (int j = 0; j < d; j++) rowData[j] = x[i, j];
     168          return compiledFunc.Evaluate(c, rowData);
     169        }));
     170      }
    160171    }
    161172
     
    175186    }
    176187
    177     private static bool TryTransformToAutoDiff(string phrase, int symbolPos, List<AutoDiff.Variable> variables, List<AutoDiff.Variable> parameters, out AutoDiff.Term term)
    178     {
    179       var curSy = phrase[0];
    180       if () {
    181         var var = new AutoDiff.Variable();
    182         variables.Add(var);
    183         term = var;
    184         return true;
    185       }
    186       if (node.Symbol is Variable) {
    187         var varNode = node as VariableTreeNode;
    188         var par = new AutoDiff.Variable();
    189         parameters.Add(par);
    190         variableNames.Add(varNode.VariableName);
    191         var w = new AutoDiff.Variable();
    192         variables.Add(w);
    193         term = AutoDiff.TermBuilder.Product(w, par);
    194         return true;
    195       }
    196       if (node.Symbol is Addition) {
    197         List<AutoDiff.Term> terms = new List<Term>();
    198         foreach (var subTree in node.Subtrees) {
    199           AutoDiff.Term t;
    200           if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, out t)) {
    201             term = null;
    202             return false;
    203           }
    204           terms.Add(t);
     188    public IGrammar TreeBasedGPGrammar { get; private set; }
     189    public string ConvertTreeToSentence(ISymbolicExpressionTree tree) {
     190      var sb = new StringBuilder();
     191
     192      TreeToSentence(tree.Root.GetSubtree(0).GetSubtree(0), sb);
     193
     194      return sb.ToString();
     195    }
     196
     197    private void TreeToSentence(ISymbolicExpressionTreeNode treeNode, StringBuilder sb) {
     198      if (treeNode.SubtreeCount == 0) {
     199        // terminal
     200        sb.Append(treeNode.Symbol.Name);
     201      } else {
     202        string op = string.Empty;
     203        switch (treeNode.Symbol.Name) {
     204          case "S": op = "+"; break;
     205          case "N": op = "-"; break;
     206          case "P": op = "*"; break;
     207          case "D": op = "%"; break;
     208          default: {
     209              Debug.Assert(treeNode.SubtreeCount == 1);
     210              break;
     211            }
    205212        }
    206         term = AutoDiff.TermBuilder.Sum(terms);
    207         return true;
    208       }
    209       if (node.Symbol is Subtraction) {
    210         List<AutoDiff.Term> terms = new List<Term>();
    211         for (int i = 0; i < node.SubtreeCount; i++) {
    212           AutoDiff.Term t;
    213           if (!TryTransformToAutoDiff(node.GetSubtree(i), variables, parameters, variableNames, out t)) {
    214             term = null;
    215             return false;
    216           }
    217           if (i > 0) t = -t;
    218           terms.Add(t);
     213        // nonterminal
     214        if (op == "+" || op == "-") sb.Append("(");
     215        TreeToSentence(treeNode.Subtrees.First(), sb);
     216        foreach (var subTree in treeNode.Subtrees.Skip(1)) {
     217          sb.Append(op);
     218          TreeToSentence(subTree, sb);
    219219        }
    220         term = AutoDiff.TermBuilder.Sum(terms);
    221         return true;
    222       }
    223       if (node.Symbol is Multiplication) {
    224         AutoDiff.Term a, b;
    225         if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, out a) ||
    226           !TryTransformToAutoDiff(node.GetSubtree(1), variables, parameters, variableNames, out b)) {
    227           term = null;
    228           return false;
    229         } else {
    230           List<AutoDiff.Term> factors = new List<Term>();
    231           foreach (var subTree in node.Subtrees.Skip(2)) {
    232             AutoDiff.Term f;
    233             if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, out f)) {
    234               term = null;
    235               return false;
    236             }
    237             factors.Add(f);
    238           }
    239           term = AutoDiff.TermBuilder.Product(a, b, factors.ToArray());
    240           return true;
    241         }
    242       }
    243       if (node.Symbol is Division) {
    244         // only works for at least two subtrees
    245         AutoDiff.Term a, b;
    246         if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, out a) ||
    247           !TryTransformToAutoDiff(node.GetSubtree(1), variables, parameters, variableNames, out b)) {
    248           term = null;
    249           return false;
    250         } else {
    251           List<AutoDiff.Term> factors = new List<Term>();
    252           foreach (var subTree in node.Subtrees.Skip(2)) {
    253             AutoDiff.Term f;
    254             if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, out f)) {
    255               term = null;
    256               return false;
    257             }
    258             factors.Add(1.0 / f);
    259           }
    260           term = AutoDiff.TermBuilder.Product(a, 1.0 / b, factors.ToArray());
    261           return true;
    262         }
    263       }
    264      
    265       if (node.Symbol is StartSymbol) {
    266         var alpha = new AutoDiff.Variable();
    267         var beta = new AutoDiff.Variable();
    268         variables.Add(beta);
    269         variables.Add(alpha);
    270         AutoDiff.Term branchTerm;
    271         if (TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, out branchTerm)) {
    272           term = branchTerm * alpha + beta;
    273           return true;
    274         } else {
    275           term = null;
    276           return false;
    277         }
    278       }
    279       term = null;
    280       return false;
    281     }
    282      */
     220        if (op == "+" || op == "-") sb.Append(")");
     221      }
     222    }
    283223  }
    284224}
Note: See TracChangeset for help on using the changeset viewer.