Changeset 11895 for branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.SymbReg
- Timestamp:
- 02/05/15 07:03:15 (10 years ago)
- Location:
- branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.SymbReg
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.SymbReg/HeuristicLab.Problems.GrammaticalOptimization.SymbReg.csproj
r11851 r11895 31 31 </PropertyGroup> 32 32 <ItemGroup> 33 <Reference Include="ALGLIB-3.7.0, Version=3.7.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 34 <SpecificVersion>False</SpecificVersion> 35 <HintPath>..\..\..\trunk\sources\bin\ALGLIB-3.7.0.dll</HintPath> 36 </Reference> 37 <Reference Include="AutoDiff-1.0"> 38 <HintPath>..\..\..\trunk\sources\bin\AutoDiff-1.0.dll</HintPath> 39 </Reference> 33 40 <Reference Include="HeuristicLab.Common-3.3"> 34 41 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Common-3.3.dll</HintPath> … … 39 46 <Reference Include="HeuristicLab.Data-3.3"> 40 47 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Data-3.3.dll</HintPath> 48 </Reference> 49 <Reference Include="HeuristicLab.Encodings.SymbolicExpressionTreeEncoding-3.4, Version=3.4.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 50 <SpecificVersion>False</SpecificVersion> 51 <HintPath>..\..\..\trunk\sources\bin\HeuristicLab.Encodings.SymbolicExpressionTreeEncoding-3.4.dll</HintPath> 41 52 </Reference> 42 53 <Reference Include="HeuristicLab.Problems.DataAnalysis-3.4"> … … 53 64 </ItemGroup> 54 65 <ItemGroup> 66 <Compile Include="ExpressionCompiler.cs" /> 55 67 <Compile Include="Properties\AssemblyInfo.cs" /> 56 68 <Compile Include="SymbolicRegressionProblem.cs" /> -
branches/HeuristicLab.Problems.GrammaticalOptimization/HeuristicLab.Problems.GrammaticalOptimization.SymbReg/SymbolicRegressionProblem.cs
r11851 r11895 1 1 using System; 2 using System.CodeDom.Compiler; 2 3 using System.Collections.Generic; 4 using System.Diagnostics; 3 5 using System.Linq; 4 6 using System.Security; 5 7 using System.Security.AccessControl; 8 using System.Security.Authentication.ExtendedProtection.Configuration; 6 9 using System.Text; 10 using AutoDiff; 7 11 using HeuristicLab.Common; 12 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; 8 13 using HeuristicLab.Problems.DataAnalysis; 9 14 using HeuristicLab.Problems.Instances; … … 12 17 namespace HeuristicLab.Problems.GrammaticalOptimization.SymbReg { 13 18 // provides bridge to HL regression problem instances 14 public class SymbolicRegressionProblem : I Problem {19 public class SymbolicRegressionProblem : ISymbolicExpressionTreeProblem { 15 20 16 21 private const string grammarString = @" 17 22 G(E): 18 E -> V | V+E | V-E | V*E | V/E | (E) | C | C+E | C-E | C*E | C/E23 E -> V | C | V+E | V-E | V*E | V%E | (E) | C+E | C-E | C*E | C%E 19 24 C -> 0..9 20 25 V -> <variables> … … 22 27 // C represents Koza-style ERC (the symbol is the index of the ERC), the values are initialized below 23 28 29 // S .. Sum (+), N .. Neg. sum (-), P .. Product (*), D .. Division (%) 30 private const string treeGrammarString = @" 31 G(E): 32 E -> V | C | S | N | P | D 33 S -> EE | EEE 34 N -> EE | EEE 35 P -> EE | EEE 36 D -> EE 37 C -> 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 38 V -> <variables> 39 "; 40 41 // when we use constants optimization we can completely ignore all constants by a simple strategy: 42 // introduce a constant factor for each complete term 43 // introduce a constant offset for each complete expression (including expressions in brackets) 44 // e.g. 1*(2*a + b - 3 + 4) is the same as c0*a + c1*b + c2 45 24 46 private readonly IGrammar grammar; 25 47 26 48 private readonly int N; 27 private readonly double[ ][] x;49 private readonly double[,] x; 28 50 private readonly double[] y; 29 51 private readonly int d; … … 40 62 this.d = problemData.AllowedInputVariables.Count(); 41 63 if (d > 26) throw new NotSupportedException(); // we only allow single-character terminal symbols so far 42 this.x = new double[N ][];64 this.x = new double[N, d]; 43 65 this.y = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray(); 44 66 45 67 int i = 0; 46 68 foreach (var r in problemData.TrainingIndices) { 47 x[i] = new double[d];48 69 int j = 0; 49 70 foreach (var inputVariable in problemData.AllowedInputVariables) { 50 x[i ][j++] = problemData.Dataset.GetDoubleValue(inputVariable, r);71 x[i, j++] = problemData.Dataset.GetDoubleValue(inputVariable, r); 51 72 } 52 73 i++; … … 58 79 char lastVar = Convert.ToChar(Convert.ToByte('a') + d - 1); 59 80 this.grammar = new Grammar(grammarString.Replace("<variables>", firstVar + " .. " + lastVar)); 81 this.TreeBasedGPGrammar = new Grammar(treeGrammarString.Replace("<variables>", firstVar + " .. " + lastVar)); 60 82 } 61 83 … … 71 93 72 94 public double Evaluate(string sentence) { 95 return OptimizeConstantsAndEvaluate(sentence); 96 } 97 98 public double SimpleEvaluate(string sentence) { 73 99 var interpreter = new ExpressionInterpreter(); 74 return HeuristicLab.Common.Extensions.RSq(y, Enumerable.Range(0, N).Select(i => interpreter.Interpret(sentence, x[i], erc))); 100 var rowData = new double[d]; 101 return HeuristicLab.Common.Extensions.RSq(y, Enumerable.Range(0, N).Select(i => { 102 for (int j = 0; j < d; j++) rowData[j] = x[i, j]; 103 return interpreter.Interpret(sentence, rowData, erc); 104 })); 75 105 } 76 106 … … 83 113 } 84 114 85 public IEnumerable<Feature> GetFeatures(string phrase) 86 { 115 public IEnumerable<Feature> GetFeatures(string phrase) { 87 116 throw new NotImplementedException(); 88 117 } 89 118 90 119 91 /* 92 public static double OptimizeConstants(string sentence) { 93 94 List<AutoDiff.Variable> variables = new List<AutoDiff.Variable>(); 95 List<AutoDiff.Variable> parameters = new List<AutoDiff.Variable>(); 96 List<string> variableNames = new List<string>(); 97 120 121 public double OptimizeConstantsAndEvaluate(string sentence) { 98 122 AutoDiff.Term func; 99 if (!TryTransformToAutoDiff(sentence, 0, variables, parameters, out func)) 100 throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree."); 101 if (variableNames.Count == 0) return 0.0; 102 103 AutoDiff.IParametricCompiledTerm compiledFunc = AutoDiff.TermUtils.Compile(func, variables.ToArray(), parameters.ToArray()); 104 105 List<SymbolicExpressionTreeTerminalNode> terminalNodes = tree.Root.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>().ToList(); 106 double[] c = new double[variables.Count]; 107 108 { 109 c[0] = 0.0; 110 c[1] = 1.0; 111 //extract inital constants 112 int i = 2; 113 foreach (var node in terminalNodes) { 114 ConstantTreeNode constantTreeNode = node as ConstantTreeNode; 115 VariableTreeNode variableTreeNode = node as VariableTreeNode; 116 if (constantTreeNode != null) 117 c[i++] = constantTreeNode.Value; 118 else if (variableTreeNode != null) 119 c[i++] = variableTreeNode.Weight; 120 } 121 } 122 double[] originalConstants = (double[])c.Clone(); 123 double originalQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling); 123 int pos = 0; 124 125 var compiler = new ExpressionCompiler(); 126 Variable[] variables; 127 Variable[] constants; 128 compiler.Compile(sentence, out func, out variables, out constants); 129 130 // constants are manipulated 131 132 if (!constants.Any()) return SimpleEvaluate(sentence); 133 134 AutoDiff.IParametricCompiledTerm compiledFunc = func.Compile(constants, variables); // variate constants leave variables fixed to data 135 136 double[] c = constants.Select(_ => 1.0).ToArray(); // start with ones 124 137 125 138 alglib.lsfitstate state; … … 127 140 int info; 128 141 129 Dataset ds = problemData.Dataset;130 double[,] x = new double[rows.Count(), variableNames.Count];131 int row = 0;132 foreach (var r in rows) {133 for (int col = 0; col < variableNames.Count; col++) {134 x[row, col] = ds.GetDoubleValue(variableNames[col], r);135 }136 row++;137 }138 double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray();139 142 int n = x.GetLength(0); 140 143 int m = x.GetLength(1); … … 144 147 alglib.ndimensional_pgrad function_cx_1_grad = CreatePGrad(compiledFunc); 145 148 149 const int maxIterations = 10; 146 150 try { 147 151 alglib.lsfitcreatefg(x, y, c, n, m, k, false, out state); … … 151 155 alglib.lsfitresults(state, out info, out c, out rep); 152 156 } catch (ArithmeticException) { 153 return originalQuality;157 return 0.0; 154 158 } catch (alglib.alglibexception) { 155 return originalQuality;159 return 0.0; 156 160 } 157 161 158 162 //info == -7 => constant optimization failed due to wrong gradient 159 if (info != -7) throw new ArgumentException(); 163 if (info == -7) throw new ArgumentException(); 164 { 165 var rowData = new double[d]; 166 return HeuristicLab.Common.Extensions.RSq(y, Enumerable.Range(0, N).Select(i => { 167 for (int j = 0; j < d; j++) rowData[j] = x[i, j]; 168 return compiledFunc.Evaluate(c, rowData); 169 })); 170 } 160 171 } 161 172 … … 175 186 } 176 187 177 private static bool TryTransformToAutoDiff(string phrase, int symbolPos, List<AutoDiff.Variable> variables, List<AutoDiff.Variable> parameters, out AutoDiff.Term term) 178 { 179 var curSy = phrase[0]; 180 if () { 181 var var = new AutoDiff.Variable(); 182 variables.Add(var); 183 term = var; 184 return true; 185 } 186 if (node.Symbol is Variable) { 187 var varNode = node as VariableTreeNode; 188 var par = new AutoDiff.Variable(); 189 parameters.Add(par); 190 variableNames.Add(varNode.VariableName); 191 var w = new AutoDiff.Variable(); 192 variables.Add(w); 193 term = AutoDiff.TermBuilder.Product(w, par); 194 return true; 195 } 196 if (node.Symbol is Addition) { 197 List<AutoDiff.Term> terms = new List<Term>(); 198 foreach (var subTree in node.Subtrees) { 199 AutoDiff.Term t; 200 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, out t)) { 201 term = null; 202 return false; 203 } 204 terms.Add(t); 188 public IGrammar TreeBasedGPGrammar { get; private set; } 189 public string ConvertTreeToSentence(ISymbolicExpressionTree tree) { 190 var sb = new StringBuilder(); 191 192 TreeToSentence(tree.Root.GetSubtree(0).GetSubtree(0), sb); 193 194 return sb.ToString(); 195 } 196 197 private void TreeToSentence(ISymbolicExpressionTreeNode treeNode, StringBuilder sb) { 198 if (treeNode.SubtreeCount == 0) { 199 // terminal 200 sb.Append(treeNode.Symbol.Name); 201 } else { 202 string op = string.Empty; 203 switch (treeNode.Symbol.Name) { 204 case "S": op = "+"; break; 205 case "N": op = "-"; break; 206 case "P": op = "*"; break; 207 case "D": op = "%"; break; 208 default: { 209 Debug.Assert(treeNode.SubtreeCount == 1); 210 break; 211 } 205 212 } 206 term = AutoDiff.TermBuilder.Sum(terms); 207 return true; 208 } 209 if (node.Symbol is Subtraction) { 210 List<AutoDiff.Term> terms = new List<Term>(); 211 for (int i = 0; i < node.SubtreeCount; i++) { 212 AutoDiff.Term t; 213 if (!TryTransformToAutoDiff(node.GetSubtree(i), variables, parameters, variableNames, out t)) { 214 term = null; 215 return false; 216 } 217 if (i > 0) t = -t; 218 terms.Add(t); 213 // nonterminal 214 if (op == "+" || op == "-") sb.Append("("); 215 TreeToSentence(treeNode.Subtrees.First(), sb); 216 foreach (var subTree in treeNode.Subtrees.Skip(1)) { 217 sb.Append(op); 218 TreeToSentence(subTree, sb); 219 219 } 220 term = AutoDiff.TermBuilder.Sum(terms); 221 return true; 222 } 223 if (node.Symbol is Multiplication) { 224 AutoDiff.Term a, b; 225 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, out a) || 226 !TryTransformToAutoDiff(node.GetSubtree(1), variables, parameters, variableNames, out b)) { 227 term = null; 228 return false; 229 } else { 230 List<AutoDiff.Term> factors = new List<Term>(); 231 foreach (var subTree in node.Subtrees.Skip(2)) { 232 AutoDiff.Term f; 233 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, out f)) { 234 term = null; 235 return false; 236 } 237 factors.Add(f); 238 } 239 term = AutoDiff.TermBuilder.Product(a, b, factors.ToArray()); 240 return true; 241 } 242 } 243 if (node.Symbol is Division) { 244 // only works for at least two subtrees 245 AutoDiff.Term a, b; 246 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, out a) || 247 !TryTransformToAutoDiff(node.GetSubtree(1), variables, parameters, variableNames, out b)) { 248 term = null; 249 return false; 250 } else { 251 List<AutoDiff.Term> factors = new List<Term>(); 252 foreach (var subTree in node.Subtrees.Skip(2)) { 253 AutoDiff.Term f; 254 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, out f)) { 255 term = null; 256 return false; 257 } 258 factors.Add(1.0 / f); 259 } 260 term = AutoDiff.TermBuilder.Product(a, 1.0 / b, factors.ToArray()); 261 return true; 262 } 263 } 264 265 if (node.Symbol is StartSymbol) { 266 var alpha = new AutoDiff.Variable(); 267 var beta = new AutoDiff.Variable(); 268 variables.Add(beta); 269 variables.Add(alpha); 270 AutoDiff.Term branchTerm; 271 if (TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, out branchTerm)) { 272 term = branchTerm * alpha + beta; 273 return true; 274 } else { 275 term = null; 276 return false; 277 } 278 } 279 term = null; 280 return false; 281 } 282 */ 220 if (op == "+" || op == "-") sb.Append(")"); 221 } 222 } 283 223 } 284 224 }
Note: See TracChangeset
for help on using the changeset viewer.