Changeset 14756
- Timestamp:
- 03/16/17 09:25:47 (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/SymbolicRegressionConstantOptimizationEvaluator.cs
r14402 r14756 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Diagnostics.Contracts;25 24 using System.Linq; 26 25 using AutoDiff; … … 110 109 [StorableHook(HookType.AfterDeserialization)] 111 110 private void AfterDeserialization() { 112 if 111 if(!Parameters.ContainsKey(UpdateConstantsInTreeParameterName)) 113 112 Parameters.Add(new FixedValueParameter<BoolValue>(UpdateConstantsInTreeParameterName, "Determines if the constants in the tree should be overwritten by the optimized constants.", new BoolValue(true))); 114 if 113 if(!Parameters.ContainsKey(UpdateVariableWeightsParameterName)) 115 114 Parameters.Add(new FixedValueParameter<BoolValue>(UpdateVariableWeightsParameterName, "Determines if the variable weights in the tree should be optimized.", new BoolValue(true))); 116 115 } … … 119 118 var solution = SymbolicExpressionTreeParameter.ActualValue; 120 119 double quality; 121 if 120 if(RandomParameter.ActualValue.NextDouble() < ConstantOptimizationProbability.Value) { 122 121 IEnumerable<int> constantOptimizationRows = GenerateRowsToEvaluate(ConstantOptimizationRowsPercentage.Value); 123 122 quality = OptimizeConstants(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, ProblemDataParameter.ActualValue, 124 123 constantOptimizationRows, ApplyLinearScalingParameter.ActualValue.Value, ConstantOptimizationIterations.Value, updateVariableWeights: UpdateVariableWeights, lowerEstimationLimit: EstimationLimitsParameter.ActualValue.Lower, upperEstimationLimit: EstimationLimitsParameter.ActualValue.Upper, updateConstantsInTree: UpdateConstantsInTree); 125 124 126 if 125 if(ConstantOptimizationRowsPercentage.Value != RelativeNumberOfEvaluatedSamplesParameter.ActualValue.Value) { 127 126 var evaluationRows = GenerateRowsToEvaluate(); 128 127 quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value); … … 177 176 178 177 179 public static double OptimizeConstants(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling, int maxIterations, bool updateVariableWeights = true, double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue, bool updateConstantsInTree = true) { 180 181 List<AutoDiff.Variable> variables = new List<AutoDiff.Variable>(); 182 List<AutoDiff.Variable> parameters = new List<AutoDiff.Variable>(); 183 List<string> variableNames = new List<string>(); 184 List<string> categoricalVariableValues = new List<string>(); 185 List<int> lags = new List<int>(); 178 179 public static double OptimizeConstants(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, 180 ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling, 181 int maxIterations, bool updateVariableWeights = true, 182 double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue, 183 bool updateConstantsInTree = true) { 184 185 // numeric constants in the tree become variables for constant opt 186 // variables in the tree become parameters (fixed values) for constant opt 187 // for each parameter (variable in the original tree) we store the 188 // variable name, variable value (for factor vars) and lag as a DataForVariable object. 189 // A dictionary is used to find parameters 190 var variables = new List<AutoDiff.Variable>(); 191 var parameters = new Dictionary<DataForVariable, AutoDiff.Variable>(); 192 //List<string> variableNames = new List<string>(); 193 //List<string> categoricalVariableValues = new List<string>(); 194 //List<int> lags = new List<int>(); 186 195 187 196 AutoDiff.Term func; 188 if (!TryTransformToAutoDiff(tree.Root.GetSubtree(0), variables, parameters, variableNames, lags, categoricalVariableValues, updateVariableWeights, out func))197 if(!TryTransformToAutoDiff(tree.Root.GetSubtree(0), variables, parameters, updateVariableWeights, out func)) 189 198 throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree."); 190 if (variableNames.Count == 0) return 0.0; // gkronber: constant expressions always have a R² of 0.0 191 192 AutoDiff.IParametricCompiledTerm compiledFunc = func.Compile(variables.ToArray(), parameters.ToArray()); 199 if(parameters.Count == 0) return 0.0; // gkronber: constant expressions always have a R² of 0.0 200 201 var parameterEntries = parameters.ToArray(); // order of entries must be the same for x 202 AutoDiff.IParametricCompiledTerm compiledFunc = func.Compile(variables.ToArray(), parameterEntries.Select(kvp => kvp.Value).ToArray()); 193 203 194 204 List<SymbolicExpressionTreeTerminalNode> terminalNodes = null; // gkronber only used for extraction of initial constants 195 if 205 if(updateVariableWeights) 196 206 terminalNodes = tree.Root.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>().ToList(); 197 207 else … … 207 217 c[1] = 1.0; 208 218 int i = 2; 209 foreach 219 foreach(var node in terminalNodes) { 210 220 ConstantTreeNode constantTreeNode = node as ConstantTreeNode; 211 221 VariableTreeNode variableTreeNode = node as VariableTreeNode; 212 222 BinaryFactorVariableTreeNode binFactorVarTreeNode = node as BinaryFactorVariableTreeNode; 213 223 FactorVariableTreeNode factorVarTreeNode = node as FactorVariableTreeNode; 214 if 224 if(constantTreeNode != null) 215 225 c[i++] = constantTreeNode.Value; 216 else if 226 else if(updateVariableWeights && variableTreeNode != null) 217 227 c[i++] = variableTreeNode.Weight; 218 else if 228 else if(updateVariableWeights && binFactorVarTreeNode != null) 219 229 c[i++] = binFactorVarTreeNode.Weight; 220 else if 230 else if(factorVarTreeNode != null) { 221 231 // gkronber: a factorVariableTreeNode holds a category-specific constant therefore we can consider factors to be the same as constants 222 foreach 232 foreach(var w in factorVarTreeNode.Weights) c[i++] = w; 223 233 } 224 234 } … … 229 239 alglib.lsfitstate state; 230 240 alglib.lsfitreport rep; 231 int info;241 int retVal; 232 242 233 243 IDataset ds = problemData.Dataset; 234 double[,] x = new double[rows.Count(), variableNames.Count];244 double[,] x = new double[rows.Count(), parameters.Count]; 235 245 int row = 0; 236 foreach (var r in rows) { 237 for (int col = 0; col < variableNames.Count; col++) { 238 int lag = lags[col]; 239 if (ds.VariableHasType<double>(variableNames[col])) { 240 x[row, col] = ds.GetDoubleValue(variableNames[col], r + lag); 241 } else if (ds.VariableHasType<string>(variableNames[col])) { 242 x[row, col] = ds.GetStringValue(variableNames[col], r) == categoricalVariableValues[col] ? 1 : 0; 246 foreach(var r in rows) { 247 int col = 0; 248 foreach(var kvp in parameterEntries) { 249 var info = kvp.Key; 250 int lag = info.lag; 251 if(ds.VariableHasType<double>(info.variableName)) { 252 x[row, col] = ds.GetDoubleValue(info.variableName, r + lag); 253 } else if(ds.VariableHasType<string>(info.variableName)) { 254 x[row, col] = ds.GetStringValue(info.variableName, r) == info.variableValue ? 1 : 0; 243 255 } else throw new InvalidProgramException("found a variable of unknown type"); 256 col++; 244 257 } 245 258 row++; … … 258 271 //alglib.lsfitsetgradientcheck(state, 0.001); 259 272 alglib.lsfitfit(state, function_cx_1_func, function_cx_1_grad, null, null); 260 alglib.lsfitresults(state, out info, out c, out rep);261 } catch 273 alglib.lsfitresults(state, out retVal, out c, out rep); 274 } catch(ArithmeticException) { 262 275 return originalQuality; 263 } catch 276 } catch(alglib.alglibexception) { 264 277 return originalQuality; 265 278 } 266 279 267 // info== -7 => constant optimization failed due to wrong gradient268 if (info!= -7) UpdateConstants(tree, c.Skip(2).ToArray(), updateVariableWeights);280 //retVal == -7 => constant optimization failed due to wrong gradient 281 if(retVal != -7) UpdateConstants(tree, c.Skip(2).ToArray(), updateVariableWeights); 269 282 var quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling); 270 283 271 if 272 if 284 if(!updateConstantsInTree) UpdateConstants(tree, originalConstants.Skip(2).ToArray(), updateVariableWeights); 285 if(originalQuality - quality > 0.001 || double.IsNaN(quality)) { 273 286 UpdateConstants(tree, originalConstants.Skip(2).ToArray(), updateVariableWeights); 274 287 return originalQuality; … … 279 292 private static void UpdateConstants(ISymbolicExpressionTree tree, double[] constants, bool updateVariableWeights) { 280 293 int i = 0; 281 foreach 294 foreach(var node in tree.Root.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>()) { 282 295 ConstantTreeNode constantTreeNode = node as ConstantTreeNode; 283 296 VariableTreeNode variableTreeNode = node as VariableTreeNode; 284 297 BinaryFactorVariableTreeNode binFactorVarTreeNode = node as BinaryFactorVariableTreeNode; 285 298 FactorVariableTreeNode factorVarTreeNode = node as FactorVariableTreeNode; 286 if 299 if(constantTreeNode != null) 287 300 constantTreeNode.Value = constants[i++]; 288 else if 301 else if(updateVariableWeights && variableTreeNode != null) 289 302 variableTreeNode.Weight = constants[i++]; 290 else if 303 else if(updateVariableWeights && binFactorVarTreeNode != null) 291 304 binFactorVarTreeNode.Weight = constants[i++]; 292 else if 293 for 305 else if(factorVarTreeNode != null) { 306 for(int j = 0; j < factorVarTreeNode.Weights.Length; j++) 294 307 factorVarTreeNode.Weights[j] = constants[i++]; 295 308 } … … 311 324 } 312 325 313 private static bool TryTransformToAutoDiff(ISymbolicExpressionTreeNode node, List<AutoDiff.Variable> variables, List<AutoDiff.Variable> parameters, 314 List<string> variableNames, List<int> lags, List<string> categoricalVariableValues, bool updateVariableWeights, out AutoDiff.Term term) { 315 if (node.Symbol is Constant) { 326 private static bool TryTransformToAutoDiff(ISymbolicExpressionTreeNode node, 327 List<AutoDiff.Variable> variables, Dictionary<DataForVariable, AutoDiff.Variable> parameters, 328 bool updateVariableWeights, out AutoDiff.Term term) { 329 if(node.Symbol is Constant) { 316 330 var var = new AutoDiff.Variable(); 317 331 variables.Add(var); … … 319 333 return true; 320 334 } 321 if 335 if(node.Symbol is Variable || node.Symbol is BinaryFactorVariable) { 322 336 var varNode = node as VariableTreeNodeBase; 323 337 var factorVarNode = node as BinaryFactorVariableTreeNode; 324 338 // factor variable values are only 0 or 1 and set in x accordingly 325 339 var varValue = factorVarNode != null ? factorVarNode.VariableValue : string.Empty; 326 var par = FindOrCreateParameter(varNode.VariableName, varValue, parameters, variableNames, categoricalVariableValues); 327 lags.Add(0); 328 329 if (updateVariableWeights) { 340 var par = FindOrCreateParameter(parameters, varNode.VariableName, varValue); 341 342 if(updateVariableWeights) { 330 343 var w = new AutoDiff.Variable(); 331 344 variables.Add(w); … … 336 349 return true; 337 350 } 338 if 351 if(node.Symbol is FactorVariable) { 339 352 var factorVarNode = node as FactorVariableTreeNode; 340 353 var products = new List<Term>(); 341 foreach (var variableValue in factorVarNode.Symbol.GetVariableValues(factorVarNode.VariableName)) { 342 var par = FindOrCreateParameter(factorVarNode.VariableName, variableValue, parameters, variableNames, categoricalVariableValues); 343 lags.Add(0); 354 foreach(var variableValue in factorVarNode.Symbol.GetVariableValues(factorVarNode.VariableName)) { 355 var par = FindOrCreateParameter(parameters, factorVarNode.VariableName, variableValue); 344 356 345 357 var wVar = new AutoDiff.Variable(); … … 351 363 return true; 352 364 } 353 if 365 if(node.Symbol is LaggedVariable) { 354 366 var varNode = node as LaggedVariableTreeNode; 355 var par = new AutoDiff.Variable(); 356 parameters.Add(par); 357 variableNames.Add(varNode.VariableName); 358 lags.Add(varNode.Lag); 359 360 if (updateVariableWeights) { 367 var par = FindOrCreateParameter(parameters, varNode.VariableName, string.Empty, varNode.Lag); 368 369 if(updateVariableWeights) { 361 370 var w = new AutoDiff.Variable(); 362 371 variables.Add(w); … … 367 376 return true; 368 377 } 369 if 378 if(node.Symbol is Addition) { 370 379 List<AutoDiff.Term> terms = new List<Term>(); 371 foreach 380 foreach(var subTree in node.Subtrees) { 372 381 AutoDiff.Term t; 373 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, lags, categoricalVariableValues, updateVariableWeights, out t)) {382 if(!TryTransformToAutoDiff(subTree, variables, parameters, updateVariableWeights, out t)) { 374 383 term = null; 375 384 return false; … … 380 389 return true; 381 390 } 382 if 391 if(node.Symbol is Subtraction) { 383 392 List<AutoDiff.Term> terms = new List<Term>(); 384 for 393 for(int i = 0; i < node.SubtreeCount; i++) { 385 394 AutoDiff.Term t; 386 if (!TryTransformToAutoDiff(node.GetSubtree(i), variables, parameters, variableNames, lags, categoricalVariableValues, updateVariableWeights, out t)) {395 if(!TryTransformToAutoDiff(node.GetSubtree(i), variables, parameters, updateVariableWeights, out t)) { 387 396 term = null; 388 397 return false; 389 398 } 390 if 399 if(i > 0) t = -t; 391 400 terms.Add(t); 392 401 } 393 if 402 if(terms.Count == 1) term = -terms[0]; 394 403 else term = AutoDiff.TermBuilder.Sum(terms); 395 404 return true; 396 405 } 397 if 406 if(node.Symbol is Multiplication) { 398 407 List<AutoDiff.Term> terms = new List<Term>(); 399 foreach 408 foreach(var subTree in node.Subtrees) { 400 409 AutoDiff.Term t; 401 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, lags, categoricalVariableValues, updateVariableWeights, out t)) {410 if(!TryTransformToAutoDiff(subTree, variables, parameters, updateVariableWeights, out t)) { 402 411 term = null; 403 412 return false; … … 405 414 terms.Add(t); 406 415 } 407 if 416 if(terms.Count == 1) term = terms[0]; 408 417 else term = terms.Aggregate((a, b) => new AutoDiff.Product(a, b)); 409 418 return true; 410 419 411 420 } 412 if 421 if(node.Symbol is Division) { 413 422 List<AutoDiff.Term> terms = new List<Term>(); 414 foreach 423 foreach(var subTree in node.Subtrees) { 415 424 AutoDiff.Term t; 416 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, lags, categoricalVariableValues, updateVariableWeights, out t)) {425 if(!TryTransformToAutoDiff(subTree, variables, parameters, updateVariableWeights, out t)) { 417 426 term = null; 418 427 return false; … … 420 429 terms.Add(t); 421 430 } 422 if 431 if(terms.Count == 1) term = 1.0 / terms[0]; 423 432 else term = terms.Aggregate((a, b) => new AutoDiff.Product(a, 1.0 / b)); 424 433 return true; 425 434 } 426 if 427 AutoDiff.Term t; 428 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, lags, categoricalVariableValues, updateVariableWeights, out t)) {435 if(node.Symbol is Logarithm) { 436 AutoDiff.Term t; 437 if(!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, updateVariableWeights, out t)) { 429 438 term = null; 430 439 return false; … … 434 443 } 435 444 } 436 if 437 AutoDiff.Term t; 438 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, lags, categoricalVariableValues, updateVariableWeights, out t)) {445 if(node.Symbol is Exponential) { 446 AutoDiff.Term t; 447 if(!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, updateVariableWeights, out t)) { 439 448 term = null; 440 449 return false; … … 444 453 } 445 454 } 446 if 447 AutoDiff.Term t; 448 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, lags, categoricalVariableValues, updateVariableWeights, out t)) {455 if(node.Symbol is Square) { 456 AutoDiff.Term t; 457 if(!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, updateVariableWeights, out t)) { 449 458 term = null; 450 459 return false; … … 454 463 } 455 464 } 456 if 457 AutoDiff.Term t; 458 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, lags, categoricalVariableValues, updateVariableWeights, out t)) {465 if(node.Symbol is SquareRoot) { 466 AutoDiff.Term t; 467 if(!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, updateVariableWeights, out t)) { 459 468 term = null; 460 469 return false; … … 464 473 } 465 474 } 466 if 467 AutoDiff.Term t; 468 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, lags, categoricalVariableValues, updateVariableWeights, out t)) {475 if(node.Symbol is Sine) { 476 AutoDiff.Term t; 477 if(!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, updateVariableWeights, out t)) { 469 478 term = null; 470 479 return false; … … 474 483 } 475 484 } 476 if 477 AutoDiff.Term t; 478 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, lags, categoricalVariableValues, updateVariableWeights, out t)) {485 if(node.Symbol is Cosine) { 486 AutoDiff.Term t; 487 if(!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, updateVariableWeights, out t)) { 479 488 term = null; 480 489 return false; … … 484 493 } 485 494 } 486 if 487 AutoDiff.Term t; 488 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, lags, categoricalVariableValues, updateVariableWeights, out t)) {495 if(node.Symbol is Tangent) { 496 AutoDiff.Term t; 497 if(!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, updateVariableWeights, out t)) { 489 498 term = null; 490 499 return false; … … 494 503 } 495 504 } 496 if 497 AutoDiff.Term t; 498 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, lags, categoricalVariableValues, updateVariableWeights, out t)) {505 if(node.Symbol is Erf) { 506 AutoDiff.Term t; 507 if(!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, updateVariableWeights, out t)) { 499 508 term = null; 500 509 return false; … … 504 513 } 505 514 } 506 if 507 AutoDiff.Term t; 508 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, lags, categoricalVariableValues, updateVariableWeights, out t)) {515 if(node.Symbol is Norm) { 516 AutoDiff.Term t; 517 if(!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, updateVariableWeights, out t)) { 509 518 term = null; 510 519 return false; … … 514 523 } 515 524 } 516 if 525 if(node.Symbol is StartSymbol) { 517 526 var alpha = new AutoDiff.Variable(); 518 527 var beta = new AutoDiff.Variable(); … … 520 529 variables.Add(alpha); 521 530 AutoDiff.Term branchTerm; 522 if (TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, lags, categoricalVariableValues, updateVariableWeights, out branchTerm)) {531 if(TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, updateVariableWeights, out branchTerm)) { 523 532 term = branchTerm * alpha + beta; 524 533 return true; … … 534 543 // for each factor variable value we need a parameter which represents a binary indicator for that variable & value combination 535 544 // each binary indicator is only necessary once. So we only create a parameter if this combination is not yet available 536 private static Term FindOrCreateParameter(string varName, string varValue, 537 List<AutoDiff.Variable> parameters, List<string> variableNames, List<string> variableValues) { 538 Contract.Assert(variableNames.Count == variableValues.Count); 539 int idx = -1; 540 for (int i = 0; i < variableNames.Count; i++) { 541 if (variableNames[i] == varName && variableValues[i] == varValue) { 542 idx = i; 543 break; 544 } 545 } 545 private static Term FindOrCreateParameter(Dictionary<DataForVariable, AutoDiff.Variable> parameters, 546 string varName, string varValue = "", int lag = 0) { 547 var data = new DataForVariable(varName, varValue, lag); 546 548 547 549 AutoDiff.Variable par = null; 548 if (idx == -1) {550 if(!parameters.TryGetValue(data, out par)) { 549 551 // not found -> create new parameter and entries in names and values lists 550 552 par = new AutoDiff.Variable(); 551 parameters.Add(par); 552 variableNames.Add(varName); 553 variableValues.Add(varValue); 554 } else { 555 par = parameters[idx]; 553 parameters.Add(data, par); 556 554 } 557 555 return par; … … 585 583 return !containsUnknownSymbol; 586 584 } 585 586 587 #region helper class 588 private class DataForVariable { 589 public readonly string variableName; 590 public readonly string variableValue; // for factor vars 591 public readonly int lag; 592 593 public DataForVariable(string varName, string varValue, int lag) { 594 this.variableName = varName; 595 this.variableValue = varValue; 596 this.lag = lag; 597 } 598 599 public override bool Equals(object obj) { 600 var other = obj as DataForVariable; 601 if(other == null) return false; 602 return other.variableName.Equals(this.variableName) && 603 other.variableValue.Equals(this.variableValue) && 604 other.lag == this.lag; 605 } 606 607 public override int GetHashCode() { 608 return variableName.GetHashCode() ^ variableValue.GetHashCode() ^ lag; 609 } 610 } 611 #endregion 587 612 } 588 613 }
Note: See TracChangeset
for help on using the changeset viewer.