Changeset 14232 for branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression
- Timestamp:
- 08/03/16 18:54:14 (8 years ago)
- Location:
- branches/symbreg-factors-2650
- Files:
-
- 2 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/SymbolicRegressionConstantOptimizationEvaluator.cs
r14185 r14232 181 181 List<AutoDiff.Variable> parameters = new List<AutoDiff.Variable>(); 182 182 List<string> variableNames = new List<string>(); 183 List<string> categoricalVariableValues = new List<string>(); 183 184 184 185 AutoDiff.Term func; 185 if (!TryTransformToAutoDiff(tree.Root.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out func))186 if (!TryTransformToAutoDiff(tree.Root.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out func)) 186 187 throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree."); 187 if (variableNames.Count == 0) return 0.0; 188 if (variableNames.Count == 0) return 0.0; // gkronber: constant expressions always have a R² of 0.0 188 189 189 190 AutoDiff.IParametricCompiledTerm compiledFunc = func.Compile(variables.ToArray(), parameters.ToArray()); 190 191 191 List<SymbolicExpressionTreeTerminalNode> terminalNodes = null; 192 List<SymbolicExpressionTreeTerminalNode> terminalNodes = null; // gkronber only used for extraction of initial constants 192 193 if (updateVariableWeights) 193 194 terminalNodes = tree.Root.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>().ToList(); … … 222 223 foreach (var r in rows) { 223 224 for (int col = 0; col < variableNames.Count; col++) { 224 x[row, col] = ds.GetDoubleValue(variableNames[col], r); 225 if (ds.VariableHasType<double>(variableNames[col])) { 226 x[row, col] = ds.GetDoubleValue(variableNames[col], r); 227 } else if (ds.VariableHasType<string>(variableNames[col])) { 228 x[row, col] = ds.GetStringValue(variableNames[col], r) == categoricalVariableValues[col] ? 1 : 0; 229 } else throw new InvalidProgramException("found a variable of unknown type"); 225 230 } 226 231 row++; … … 286 291 } 287 292 288 private static bool TryTransformToAutoDiff(ISymbolicExpressionTreeNode node, List<AutoDiff.Variable> variables, List<AutoDiff.Variable> parameters, List<string> variableNames, bool updateVariableWeights, out AutoDiff.Term term) { 293 private static bool TryTransformToAutoDiff(ISymbolicExpressionTreeNode node, List<AutoDiff.Variable> variables, List<AutoDiff.Variable> parameters, 294 List<string> variableNames, List<string> categoricalVariableValues, bool updateVariableWeights, out AutoDiff.Term term) { 289 295 if (node.Symbol is Constant) { 290 296 var var = new AutoDiff.Variable(); … … 298 304 parameters.Add(par); 299 305 variableNames.Add(varNode.VariableName); 306 categoricalVariableValues.Add(string.Empty); // as a value as placeholder (variableNames.Length == catVariableValues.Length) 300 307 301 308 if (updateVariableWeights) { … … 308 315 return true; 309 316 } 317 if (node.Symbol is FactorVariable) { 318 // nothing to update in this case (like a variable without a weight) 319 // values are only 0 or 1 and set in x accordingly 320 var factorNode = node as FactorVariableTreeNode; 321 var par = new AutoDiff.Variable(); 322 parameters.Add(par); 323 variableNames.Add(factorNode.VariableName); 324 categoricalVariableValues.Add(factorNode.VariableValue); 325 term = par; 326 return true; 327 } 310 328 if (node.Symbol is Addition) { 311 329 List<AutoDiff.Term> terms = new List<Term>(); 312 330 foreach (var subTree in node.Subtrees) { 313 331 AutoDiff.Term t; 314 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, updateVariableWeights, out t)) {332 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 315 333 term = null; 316 334 return false; … … 325 343 for (int i = 0; i < node.SubtreeCount; i++) { 326 344 AutoDiff.Term t; 327 if (!TryTransformToAutoDiff(node.GetSubtree(i), variables, parameters, variableNames, updateVariableWeights, out t)) {345 if (!TryTransformToAutoDiff(node.GetSubtree(i), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 328 346 term = null; 329 347 return false; … … 340 358 foreach (var subTree in node.Subtrees) { 341 359 AutoDiff.Term t; 342 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, updateVariableWeights, out t)) {360 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 343 361 term = null; 344 362 return false; … … 355 373 foreach (var subTree in node.Subtrees) { 356 374 AutoDiff.Term t; 357 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, updateVariableWeights, out t)) {375 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 358 376 term = null; 359 377 return false; … … 367 385 if (node.Symbol is Logarithm) { 368 386 AutoDiff.Term t; 369 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {387 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 370 388 term = null; 371 389 return false; … … 377 395 if (node.Symbol is Exponential) { 378 396 AutoDiff.Term t; 379 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {397 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 380 398 term = null; 381 399 return false; … … 387 405 if (node.Symbol is Square) { 388 406 AutoDiff.Term t; 389 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {407 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 390 408 term = null; 391 409 return false; … … 397 415 if (node.Symbol is SquareRoot) { 398 416 AutoDiff.Term t; 399 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {417 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 400 418 term = null; 401 419 return false; … … 407 425 if (node.Symbol is Sine) { 408 426 AutoDiff.Term t; 409 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {427 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 410 428 term = null; 411 429 return false; … … 417 435 if (node.Symbol is Cosine) { 418 436 AutoDiff.Term t; 419 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {437 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 420 438 term = null; 421 439 return false; … … 427 445 if (node.Symbol is Tangent) { 428 446 AutoDiff.Term t; 429 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {447 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 430 448 term = null; 431 449 return false; … … 437 455 if (node.Symbol is Erf) { 438 456 AutoDiff.Term t; 439 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {457 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 440 458 term = null; 441 459 return false; … … 447 465 if (node.Symbol is Norm) { 448 466 AutoDiff.Term t; 449 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {467 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 450 468 term = null; 451 469 return false; … … 461 479 variables.Add(alpha); 462 480 AutoDiff.Term branchTerm; 463 if (TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out branchTerm)) {481 if (TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out branchTerm)) { 464 482 term = branchTerm * alpha + beta; 465 483 return true; … … 478 496 where 479 497 !(n.Symbol is Variable) && 498 !(n.Symbol is FactorVariable) && 480 499 !(n.Symbol is Constant) && 481 500 !(n.Symbol is Addition) && -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SymbolicRegressionSolutionImpactValuesCalculator.cs
r14185 r14232 41 41 [StorableConstructor] 42 42 protected SymbolicRegressionSolutionImpactValuesCalculator(bool deserializing) : base(deserializing) { } 43 public override double CalculateReplacementValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows) {44 var regressionModel = (ISymbolicRegressionModel)model;45 var regressionProblemData = (IRegressionProblemData)problemData;46 47 return CalculateReplacementValue(node, regressionModel.SymbolicExpressionTree, regressionModel.Interpreter, regressionProblemData.Dataset, rows);48 }49 50 public override double CalculateImpactValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows, double qualityForImpactsCalculation = double.NaN) {51 double impactValue, replacementValue, newQualityForImpactsCalculation;52 CalculateImpactAndReplacementValues(model, node, problemData, rows, out impactValue, out replacementValue, out newQualityForImpactsCalculation, qualityForImpactsCalculation);53 return impactValue;54 }55 43 56 44 public override void CalculateImpactAndReplacementValues(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, 57 45 IDataAnalysisProblemData problemData, IEnumerable<int> rows, out double impactValue, out double replacementValue, out double newQualityForImpactsCalculation, 58 double qualityForImpactsCalculation = Double.NaN) {46 double qualityForImpactsCalculation = double.NaN) { 59 47 var regressionModel = (ISymbolicRegressionModel)model; 60 48 var regressionProblemData = (IRegressionProblemData)problemData; … … 63 51 var targetValues = dataset.GetDoubleValues(regressionProblemData.TargetVariable, rows); 64 52 65 OnlineCalculatorError errorState;66 53 if (double.IsNaN(qualityForImpactsCalculation)) 67 54 qualityForImpactsCalculation = CalculateQualityForImpacts(regressionModel, regressionProblemData, rows); 68 69 replacementValue = CalculateReplacementValue(regressionModel, node, regressionProblemData, rows);70 var constantNode = new ConstantTreeNode(new Constant()) { Value = replacementValue };71 55 72 56 var cloner = new Cloner(); … … 76 60 var tempModelParentNode = tempModelNode.Parent; 77 61 int i = tempModelParentNode.IndexOfSubtree(tempModelNode); 78 tempModelParentNode.RemoveSubtree(i);79 tempModelParentNode.InsertSubtree(i, constantNode);80 62 81 var estimatedValues = tempModel.GetEstimatedValues(dataset, rows); 82 double r = OnlinePearsonsRCalculator.Calculate(targetValues, estimatedValues, out errorState); 83 if (errorState != OnlineCalculatorError.None) r = 0.0; 84 newQualityForImpactsCalculation = r * r; 63 double bestReplacementValue = 0.0; 64 double bestImpactValue = double.NegativeInfinity; 65 newQualityForImpactsCalculation = qualityForImpactsCalculation; // initialize 66 // try the potentially reasonable replacement values and use the best one 67 foreach (var repValue in CalculateReplacementValues(node, regressionModel.SymbolicExpressionTree, regressionModel.Interpreter, regressionProblemData.Dataset, regressionProblemData.TrainingIndices)) { 85 68 86 impactValue = qualityForImpactsCalculation - newQualityForImpactsCalculation; 69 tempModelParentNode.RemoveSubtree(i); 70 71 var constantNode = new ConstantTreeNode(new Constant()) { Value = repValue }; 72 73 tempModelParentNode.InsertSubtree(i, constantNode); 74 75 var estimatedValues = tempModel.GetEstimatedValues(dataset, rows); 76 OnlineCalculatorError errorState; 77 double r = OnlinePearsonsRCalculator.Calculate(targetValues, estimatedValues, out errorState); 78 if (errorState != OnlineCalculatorError.None) r = 0.0; 79 newQualityForImpactsCalculation = r * r; 80 81 impactValue = qualityForImpactsCalculation - newQualityForImpactsCalculation; 82 if (impactValue > bestImpactValue) { 83 bestImpactValue = impactValue; 84 bestReplacementValue = repValue; 85 } 86 } 87 replacementValue = bestReplacementValue; 88 impactValue = bestImpactValue; 87 89 } 88 90
Note: See TracChangeset
for help on using the changeset viewer.