Changeset 14232
- Timestamp:
- 08/03/16 18:54:14 (8 years ago)
- Location:
- branches/symbreg-factors-2650
- Files:
-
- 24 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification.Views/3.4/InteractiveSymbolicClassificationSolutionSimplifierViewBase.cs
r14185 r14232 59 59 return model; 60 60 } 61 protected override Dictionary<ISymbolicExpressionTreeNode, double> CalculateReplacementValues(ISymbolicExpressionTree tree) {62 return tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix().ToDictionary(63 n => n,64 n => calculator.CalculateReplacementValue(Content.Model, n, Content.ProblemData, Content.ProblemData.TrainingIndices)65 );66 }67 68 protected override Dictionary<ISymbolicExpressionTreeNode, double> CalculateImpactValues(ISymbolicExpressionTree tree) {69 var values = CalculateImpactAndReplacementValues(tree);70 return values.ToDictionary(x => x.Key, x => x.Value.Item1);71 }72 61 73 62 protected override Dictionary<ISymbolicExpressionTreeNode, Tuple<double, double>> CalculateImpactAndReplacementValues(ISymbolicExpressionTree tree) { -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Classification/3.4/SymbolicClassificationSolutionImpactValuesCalculator.cs
r14185 r14232 40 40 protected SymbolicClassificationSolutionImpactValuesCalculator(bool deserializing) : base(deserializing) { } 41 41 42 public override double CalculateReplacementValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows) { 43 var classificationModel = (ISymbolicClassificationModel)model; 44 var classificationProblemData = (IClassificationProblemData)problemData; 45 46 return CalculateReplacementValue(node, classificationModel.SymbolicExpressionTree, classificationModel.Interpreter, classificationProblemData.Dataset, rows); 47 } 48 49 public override double CalculateImpactValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows, double qualityForImpactsCalculation = double.NaN) { 50 double impactValue, replacementValue; 51 double newQualityForImpactsCalculation; 52 CalculateImpactAndReplacementValues(model, node, problemData, rows, out impactValue, out replacementValue, out newQualityForImpactsCalculation, qualityForImpactsCalculation); 53 return impactValue; 54 } 55 56 public override void CalculateImpactAndReplacementValues(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, 57 IDataAnalysisProblemData problemData, IEnumerable<int> rows, out double impactValue, out double replacementValue, out double newQualityForImpactsCalculation, 42 public override void CalculateImpactAndReplacementValues(ISymbolicDataAnalysisModel model, 43 ISymbolicExpressionTreeNode node, 44 IDataAnalysisProblemData problemData, IEnumerable<int> rows, out double impactValue, out double replacementValue, 45 out double newQualityForImpactsCalculation, 58 46 double qualityForImpactsCalculation = Double.NaN) { 59 47 var classificationModel = (ISymbolicClassificationModel)model; … … 63 51 qualityForImpactsCalculation = CalculateQualityForImpacts(classificationModel, classificationProblemData, rows); 64 52 65 replacementValue = CalculateReplacementValue(classificationModel, node, classificationProblemData, rows);66 var constantNode = new ConstantTreeNode(new Constant()) { Value = replacementValue };67 53 68 54 var cloner = new Cloner(); … … 72 58 var tempModelParentNode = tempModelNode.Parent; 73 59 int i = tempModelParentNode.IndexOfSubtree(tempModelNode); 74 tempModelParentNode.RemoveSubtree(i); 75 tempModelParentNode.InsertSubtree(i, constantNode); 60 double bestReplacementValue = 0.0; 61 double bestImpactValue = double.NegativeInfinity; 62 newQualityForImpactsCalculation = qualityForImpactsCalculation; // initialize 63 // try the potentially reasonable replacement values and use the best one 64 foreach (var repValue in CalculateReplacementValues(node, classificationModel.SymbolicExpressionTree, classificationModel.Interpreter, classificationProblemData.Dataset, classificationProblemData.TrainingIndices)) { 65 tempModelParentNode.RemoveSubtree(i); 76 66 77 OnlineCalculatorError errorState; 78 var dataset = classificationProblemData.Dataset; 79 var targetClassValues = dataset.GetDoubleValues(classificationProblemData.TargetVariable, rows); 80 var estimatedClassValues = tempModel.GetEstimatedClassValues(dataset, rows); 81 newQualityForImpactsCalculation = OnlineAccuracyCalculator.Calculate(targetClassValues, estimatedClassValues, out errorState); 82 if (errorState != OnlineCalculatorError.None) newQualityForImpactsCalculation = 0.0; 67 var constantNode = new ConstantTreeNode(new Constant()) { Value = repValue }; 68 tempModelParentNode.InsertSubtree(i, constantNode); 83 69 84 impactValue = qualityForImpactsCalculation - newQualityForImpactsCalculation; 70 var dataset = classificationProblemData.Dataset; 71 var targetClassValues = dataset.GetDoubleValues(classificationProblemData.TargetVariable, rows); 72 var estimatedClassValues = tempModel.GetEstimatedClassValues(dataset, rows); 73 OnlineCalculatorError errorState; 74 newQualityForImpactsCalculation = OnlineAccuracyCalculator.Calculate(targetClassValues, estimatedClassValues, 75 out errorState); 76 if (errorState != OnlineCalculatorError.None) newQualityForImpactsCalculation = 0.0; 77 78 impactValue = qualityForImpactsCalculation - newQualityForImpactsCalculation; 79 80 if (impactValue > bestImpactValue) { 81 bestImpactValue = impactValue; 82 bestReplacementValue = repValue; 83 } 84 } 85 replacementValue = bestReplacementValue; 86 impactValue = bestImpactValue; 85 87 } 86 88 -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views/3.4/InteractiveSymbolicRegressionSolutionSimplifierView.cs
r14185 r14232 48 48 } 49 49 50 protected override Dictionary<ISymbolicExpressionTreeNode, double> CalculateReplacementValues(ISymbolicExpressionTree tree) {51 return tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix().ToDictionary(52 n => n,53 n => calculator.CalculateReplacementValue(Content.Model, n, Content.ProblemData, Content.ProblemData.TrainingIndices)54 );55 }56 57 protected override Dictionary<ISymbolicExpressionTreeNode, double> CalculateImpactValues(ISymbolicExpressionTree tree) {58 var values = CalculateImpactAndReplacementValues(tree);59 return values.ToDictionary(x => x.Key, x => x.Value.Item1);60 }61 50 62 51 protected override Dictionary<ISymbolicExpressionTreeNode, Tuple<double, double>> CalculateImpactAndReplacementValues(ISymbolicExpressionTree tree) { -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/Evaluators/SymbolicRegressionConstantOptimizationEvaluator.cs
r14185 r14232 181 181 List<AutoDiff.Variable> parameters = new List<AutoDiff.Variable>(); 182 182 List<string> variableNames = new List<string>(); 183 List<string> categoricalVariableValues = new List<string>(); 183 184 184 185 AutoDiff.Term func; 185 if (!TryTransformToAutoDiff(tree.Root.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out func))186 if (!TryTransformToAutoDiff(tree.Root.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out func)) 186 187 throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree."); 187 if (variableNames.Count == 0) return 0.0; 188 if (variableNames.Count == 0) return 0.0; // gkronber: constant expressions always have a R² of 0.0 188 189 189 190 AutoDiff.IParametricCompiledTerm compiledFunc = func.Compile(variables.ToArray(), parameters.ToArray()); 190 191 191 List<SymbolicExpressionTreeTerminalNode> terminalNodes = null; 192 List<SymbolicExpressionTreeTerminalNode> terminalNodes = null; // gkronber only used for extraction of initial constants 192 193 if (updateVariableWeights) 193 194 terminalNodes = tree.Root.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>().ToList(); … … 222 223 foreach (var r in rows) { 223 224 for (int col = 0; col < variableNames.Count; col++) { 224 x[row, col] = ds.GetDoubleValue(variableNames[col], r); 225 if (ds.VariableHasType<double>(variableNames[col])) { 226 x[row, col] = ds.GetDoubleValue(variableNames[col], r); 227 } else if (ds.VariableHasType<string>(variableNames[col])) { 228 x[row, col] = ds.GetStringValue(variableNames[col], r) == categoricalVariableValues[col] ? 1 : 0; 229 } else throw new InvalidProgramException("found a variable of unknown type"); 225 230 } 226 231 row++; … … 286 291 } 287 292 288 private static bool TryTransformToAutoDiff(ISymbolicExpressionTreeNode node, List<AutoDiff.Variable> variables, List<AutoDiff.Variable> parameters, List<string> variableNames, bool updateVariableWeights, out AutoDiff.Term term) { 293 private static bool TryTransformToAutoDiff(ISymbolicExpressionTreeNode node, List<AutoDiff.Variable> variables, List<AutoDiff.Variable> parameters, 294 List<string> variableNames, List<string> categoricalVariableValues, bool updateVariableWeights, out AutoDiff.Term term) { 289 295 if (node.Symbol is Constant) { 290 296 var var = new AutoDiff.Variable(); … … 298 304 parameters.Add(par); 299 305 variableNames.Add(varNode.VariableName); 306 categoricalVariableValues.Add(string.Empty); // as a value as placeholder (variableNames.Length == catVariableValues.Length) 300 307 301 308 if (updateVariableWeights) { … … 308 315 return true; 309 316 } 317 if (node.Symbol is FactorVariable) { 318 // nothing to update in this case (like a variable without a weight) 319 // values are only 0 or 1 and set in x accordingly 320 var factorNode = node as FactorVariableTreeNode; 321 var par = new AutoDiff.Variable(); 322 parameters.Add(par); 323 variableNames.Add(factorNode.VariableName); 324 categoricalVariableValues.Add(factorNode.VariableValue); 325 term = par; 326 return true; 327 } 310 328 if (node.Symbol is Addition) { 311 329 List<AutoDiff.Term> terms = new List<Term>(); 312 330 foreach (var subTree in node.Subtrees) { 313 331 AutoDiff.Term t; 314 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, updateVariableWeights, out t)) {332 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 315 333 term = null; 316 334 return false; … … 325 343 for (int i = 0; i < node.SubtreeCount; i++) { 326 344 AutoDiff.Term t; 327 if (!TryTransformToAutoDiff(node.GetSubtree(i), variables, parameters, variableNames, updateVariableWeights, out t)) {345 if (!TryTransformToAutoDiff(node.GetSubtree(i), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 328 346 term = null; 329 347 return false; … … 340 358 foreach (var subTree in node.Subtrees) { 341 359 AutoDiff.Term t; 342 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, updateVariableWeights, out t)) {360 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 343 361 term = null; 344 362 return false; … … 355 373 foreach (var subTree in node.Subtrees) { 356 374 AutoDiff.Term t; 357 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, updateVariableWeights, out t)) {375 if (!TryTransformToAutoDiff(subTree, variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 358 376 term = null; 359 377 return false; … … 367 385 if (node.Symbol is Logarithm) { 368 386 AutoDiff.Term t; 369 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {387 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 370 388 term = null; 371 389 return false; … … 377 395 if (node.Symbol is Exponential) { 378 396 AutoDiff.Term t; 379 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {397 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 380 398 term = null; 381 399 return false; … … 387 405 if (node.Symbol is Square) { 388 406 AutoDiff.Term t; 389 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {407 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 390 408 term = null; 391 409 return false; … … 397 415 if (node.Symbol is SquareRoot) { 398 416 AutoDiff.Term t; 399 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {417 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 400 418 term = null; 401 419 return false; … … 407 425 if (node.Symbol is Sine) { 408 426 AutoDiff.Term t; 409 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {427 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 410 428 term = null; 411 429 return false; … … 417 435 if (node.Symbol is Cosine) { 418 436 AutoDiff.Term t; 419 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {437 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 420 438 term = null; 421 439 return false; … … 427 445 if (node.Symbol is Tangent) { 428 446 AutoDiff.Term t; 429 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {447 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 430 448 term = null; 431 449 return false; … … 437 455 if (node.Symbol is Erf) { 438 456 AutoDiff.Term t; 439 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {457 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 440 458 term = null; 441 459 return false; … … 447 465 if (node.Symbol is Norm) { 448 466 AutoDiff.Term t; 449 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out t)) {467 if (!TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out t)) { 450 468 term = null; 451 469 return false; … … 461 479 variables.Add(alpha); 462 480 AutoDiff.Term branchTerm; 463 if (TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out branchTerm)) {481 if (TryTransformToAutoDiff(node.GetSubtree(0), variables, parameters, variableNames, categoricalVariableValues, updateVariableWeights, out branchTerm)) { 464 482 term = branchTerm * alpha + beta; 465 483 return true; … … 478 496 where 479 497 !(n.Symbol is Variable) && 498 !(n.Symbol is FactorVariable) && 480 499 !(n.Symbol is Constant) && 481 500 !(n.Symbol is Addition) && -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SymbolicRegressionSolutionImpactValuesCalculator.cs
r14185 r14232 41 41 [StorableConstructor] 42 42 protected SymbolicRegressionSolutionImpactValuesCalculator(bool deserializing) : base(deserializing) { } 43 public override double CalculateReplacementValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows) {44 var regressionModel = (ISymbolicRegressionModel)model;45 var regressionProblemData = (IRegressionProblemData)problemData;46 47 return CalculateReplacementValue(node, regressionModel.SymbolicExpressionTree, regressionModel.Interpreter, regressionProblemData.Dataset, rows);48 }49 50 public override double CalculateImpactValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows, double qualityForImpactsCalculation = double.NaN) {51 double impactValue, replacementValue, newQualityForImpactsCalculation;52 CalculateImpactAndReplacementValues(model, node, problemData, rows, out impactValue, out replacementValue, out newQualityForImpactsCalculation, qualityForImpactsCalculation);53 return impactValue;54 }55 43 56 44 public override void CalculateImpactAndReplacementValues(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, 57 45 IDataAnalysisProblemData problemData, IEnumerable<int> rows, out double impactValue, out double replacementValue, out double newQualityForImpactsCalculation, 58 double qualityForImpactsCalculation = Double.NaN) {46 double qualityForImpactsCalculation = double.NaN) { 59 47 var regressionModel = (ISymbolicRegressionModel)model; 60 48 var regressionProblemData = (IRegressionProblemData)problemData; … … 63 51 var targetValues = dataset.GetDoubleValues(regressionProblemData.TargetVariable, rows); 64 52 65 OnlineCalculatorError errorState;66 53 if (double.IsNaN(qualityForImpactsCalculation)) 67 54 qualityForImpactsCalculation = CalculateQualityForImpacts(regressionModel, regressionProblemData, rows); 68 69 replacementValue = CalculateReplacementValue(regressionModel, node, regressionProblemData, rows);70 var constantNode = new ConstantTreeNode(new Constant()) { Value = replacementValue };71 55 72 56 var cloner = new Cloner(); … … 76 60 var tempModelParentNode = tempModelNode.Parent; 77 61 int i = tempModelParentNode.IndexOfSubtree(tempModelNode); 78 tempModelParentNode.RemoveSubtree(i);79 tempModelParentNode.InsertSubtree(i, constantNode);80 62 81 var estimatedValues = tempModel.GetEstimatedValues(dataset, rows); 82 double r = OnlinePearsonsRCalculator.Calculate(targetValues, estimatedValues, out errorState); 83 if (errorState != OnlineCalculatorError.None) r = 0.0; 84 newQualityForImpactsCalculation = r * r; 63 double bestReplacementValue = 0.0; 64 double bestImpactValue = double.NegativeInfinity; 65 newQualityForImpactsCalculation = qualityForImpactsCalculation; // initialize 66 // try the potentially reasonable replacement values and use the best one 67 foreach (var repValue in CalculateReplacementValues(node, regressionModel.SymbolicExpressionTree, regressionModel.Interpreter, regressionProblemData.Dataset, regressionProblemData.TrainingIndices)) { 85 68 86 impactValue = qualityForImpactsCalculation - newQualityForImpactsCalculation; 69 tempModelParentNode.RemoveSubtree(i); 70 71 var constantNode = new ConstantTreeNode(new Constant()) { Value = repValue }; 72 73 tempModelParentNode.InsertSubtree(i, constantNode); 74 75 var estimatedValues = tempModel.GetEstimatedValues(dataset, rows); 76 OnlineCalculatorError errorState; 77 double r = OnlinePearsonsRCalculator.Calculate(targetValues, estimatedValues, out errorState); 78 if (errorState != OnlineCalculatorError.None) r = 0.0; 79 newQualityForImpactsCalculation = r * r; 80 81 impactValue = qualityForImpactsCalculation - newQualityForImpactsCalculation; 82 if (impactValue > bestImpactValue) { 83 bestImpactValue = impactValue; 84 bestReplacementValue = repValue; 85 } 86 } 87 replacementValue = bestReplacementValue; 88 impactValue = bestImpactValue; 87 89 } 88 90 -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.TimeSeriesPrognosis.Views/3.4/InteractiveSymbolicTimeSeriesPrognosisSolutionSimplifierView.cs
r14185 r14232 88 88 } 89 89 90 protected override Dictionary<ISymbolicExpressionTreeNode, double> CalculateReplacementValues(ISymbolicExpressionTree tree) {91 var replacementValues = new Dictionary<ISymbolicExpressionTreeNode, double>();92 foreach (var componentBranch in tree.Root.GetSubtree(0).Subtrees)93 foreach (ISymbolicExpressionTreeNode node in componentBranch.IterateNodesPrefix()) {94 replacementValues[node] = CalculateReplacementValue(node, tree);95 }96 return replacementValues;97 }98 99 protected override Dictionary<ISymbolicExpressionTreeNode, double> CalculateImpactValues(ISymbolicExpressionTree tree) {100 var impactAndReplacementValues = CalculateImpactAndReplacementValues(tree);101 return impactAndReplacementValues.ToDictionary(x => x.Key, x => x.Value.Item1); // item1 of the tuple is the impact value102 }103 104 90 private double CalculateReplacementValue(ISymbolicExpressionTreeNode node, ISymbolicExpressionTree sourceTree) { 105 91 // remove old ADFs -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic.Views/3.4/InteractiveSymbolicDataAnalysisSolutionSimplifierView.cs
r14185 r14232 174 174 } 175 175 176 protected abstract Dictionary<ISymbolicExpressionTreeNode, double> CalculateReplacementValues(ISymbolicExpressionTree tree);177 protected abstract Dictionary<ISymbolicExpressionTreeNode, double> CalculateImpactValues(ISymbolicExpressionTree tree);178 176 protected abstract Dictionary<ISymbolicExpressionTreeNode, Tuple<double, double>> CalculateImpactAndReplacementValues(ISymbolicExpressionTree tree); 179 177 protected abstract void UpdateModel(ISymbolicExpressionTree tree); -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisVariableFrequencyAnalyzer.cs
r14185 r14232 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Globalization; 24 25 using System.Linq; 25 26 using HeuristicLab.Analysis; … … 41 42 private const string VariableFrequenciesParameterName = "VariableFrequencies"; 42 43 private const string AggregateLaggedVariablesParameterName = "AggregateLaggedVariables"; 44 private const string AggregateFactorVariablesParameterName = "AggregateFactorVariables"; 43 45 private const string VariableImpactsParameterName = "VariableImpacts"; 44 46 … … 52 54 public IValueLookupParameter<BoolValue> AggregateLaggedVariablesParameter { 53 55 get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateLaggedVariablesParameterName]; } 56 } 57 public IValueLookupParameter<BoolValue> AggregateFactorVariablesParameter { 58 get { return (IValueLookupParameter<BoolValue>)Parameters[AggregateFactorVariablesParameterName]; } 54 59 } 55 60 #endregion … … 59 64 set { AggregateLaggedVariablesParameter.Value = value; } 60 65 } 66 public BoolValue AggregateFactorVariables { 67 get { return AggregateFactorVariablesParameter.ActualValue; } 68 set { AggregateFactorVariablesParameter.Value = value; } 69 } 61 70 #endregion 62 71 [StorableConstructor] … … 70 79 Parameters.Add(new LookupParameter<DoubleMatrix>(VariableImpactsParameterName, "The relative variable relevance calculated as the average relative variable frequency over the whole run.")); 71 80 Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateLaggedVariablesParameterName, "Switch that determines whether all references to a variable should be aggregated regardless of time-offsets. Turn off to analyze all variable references with different time offsets separately.", new BoolValue(true))); 81 Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateFactorVariablesParameterName, "Switch that determines whether all references to factor variables should be aggregated regardless of the value. Turn off to analyze all factor variable references with different values separately.", new BoolValue(true))); 82 } 83 84 [StorableHook(HookType.AfterDeserialization)] 85 private void AfterDeserialization() { 86 // BackwardsCompatibility3.3 87 #region Backwards compatible code, remove with 3.4 88 if (!Parameters.ContainsKey(AggregateFactorVariablesParameterName)) { 89 Parameters.Add(new ValueLookupParameter<BoolValue>(AggregateFactorVariablesParameterName, "Switch that determines whether all references to factor variables should be aggregated regardless of the value. Turn off to analyze all factor variable references with different values separately.", new BoolValue(true))); 90 } 91 #endregion 72 92 } 73 93 … … 93 113 int numberOfValues = datatable.Rows.Select(r => r.Values.Count).DefaultIfEmpty().First(); 94 114 95 foreach (var pair in SymbolicDataAnalysisVariableFrequencyAnalyzer.CalculateVariableFrequencies(expressions, AggregateLaggedVariables.Value)) {115 foreach (var pair in CalculateVariableFrequencies(expressions, AggregateLaggedVariables.Value, AggregateFactorVariables.Value)) { 96 116 if (!datatable.Rows.ContainsKey(pair.Key)) { 97 117 // initialize a new row for the variable and pad with zeros … … 128 148 } 129 149 130 public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<ISymbolicExpressionTree> trees, bool aggregateLaggedVariables = true) { 150 public static IEnumerable<KeyValuePair<string, double>> CalculateVariableFrequencies(IEnumerable<ISymbolicExpressionTree> trees, 151 bool aggregateLaggedVariables = true, bool aggregateFactorVariables = true) { 131 152 132 153 var variableFrequencies = trees 133 .SelectMany(t => GetVariableReferences(t, aggregateLaggedVariables ))154 .SelectMany(t => GetVariableReferences(t, aggregateLaggedVariables, aggregateFactorVariables)) 134 155 .GroupBy(pair => pair.Key, pair => pair.Value) 135 156 .ToDictionary(g => g.Key, g => (double)g.Sum()); … … 141 162 } 142 163 143 private static IEnumerable<KeyValuePair<string, int>> GetVariableReferences(ISymbolicExpressionTree tree, bool aggregateLaggedVariables = true) { 164 private static IEnumerable<KeyValuePair<string, int>> GetVariableReferences(ISymbolicExpressionTree tree, 165 bool aggregateLaggedVariables = true, bool aggregateFactorVariables = true) { 144 166 Dictionary<string, int> references = new Dictionary<string, int>(); 145 167 if (aggregateLaggedVariables) { … … 151 173 var varCondNode = node as VariableConditionTreeNode; 152 174 IncReferenceCount(references, varCondNode.VariableName); 175 } else if (node.Symbol is FactorVariable) { 176 var factorNode = node as FactorVariableTreeNode; 177 if (aggregateFactorVariables) { 178 IncReferenceCount(references, factorNode.VariableName); 179 } else { 180 IncReferenceCount(references, factorNode.ToString()); 181 } 153 182 } 154 183 }); 155 184 } else { 156 GetVariableReferences(references, tree.Root, 0 );185 GetVariableReferences(references, tree.Root, 0, aggregateFactorVariables); 157 186 } 158 187 return references; 159 188 } 160 189 161 private static void GetVariableReferences(Dictionary<string, int> references, ISymbolicExpressionTreeNode node, int currentLag ) {190 private static void GetVariableReferences(Dictionary<string, int> references, ISymbolicExpressionTreeNode node, int currentLag, bool aggregateFactorVariables) { 162 191 if (node.Symbol is LaggedVariable) { 163 192 var laggedVarNode = node as LaggedVariableTreeNode; … … 166 195 var varNode = node as VariableTreeNode; 167 196 IncReferenceCount(references, varNode.VariableName, currentLag); 197 } else if (node.Symbol is FactorVariable) { 198 var factorNode = node as FactorVariableTreeNode; 199 if (aggregateFactorVariables) { 200 IncReferenceCount(references, factorNode.VariableName, currentLag); 201 } else { 202 IncReferenceCount(references, factorNode.ToString(), currentLag); 203 } 168 204 } else if (node.Symbol is VariableCondition) { 169 205 var varCondNode = node as VariableConditionTreeNode; 170 206 IncReferenceCount(references, varCondNode.VariableName, currentLag); 171 GetVariableReferences(references, node.GetSubtree(0), currentLag );172 GetVariableReferences(references, node.GetSubtree(1), currentLag );207 GetVariableReferences(references, node.GetSubtree(0), currentLag, aggregateFactorVariables); 208 GetVariableReferences(references, node.GetSubtree(1), currentLag, aggregateFactorVariables); 173 209 } else if (node.Symbol is Integral) { 174 210 var laggedNode = node as LaggedTreeNode; 175 211 for (int l = laggedNode.Lag; l <= 0; l++) { 176 GetVariableReferences(references, node.GetSubtree(0), currentLag + l );212 GetVariableReferences(references, node.GetSubtree(0), currentLag + l, aggregateFactorVariables); 177 213 } 178 214 } else if (node.Symbol is Derivative) { 179 215 for (int l = -4; l <= 0; l++) { 180 GetVariableReferences(references, node.GetSubtree(0), currentLag + l );216 GetVariableReferences(references, node.GetSubtree(0), currentLag + l, aggregateFactorVariables); 181 217 } 182 218 } else if (node.Symbol is TimeLag) { 183 219 var laggedNode = node as LaggedTreeNode; 184 GetVariableReferences(references, node.GetSubtree(0), currentLag + laggedNode.Lag );220 GetVariableReferences(references, node.GetSubtree(0), currentLag + laggedNode.Lag, aggregateFactorVariables); 185 221 } else { 186 222 foreach (var subtree in node.Subtrees) { 187 GetVariableReferences(references, subtree, currentLag );223 GetVariableReferences(references, subtree, currentLag, aggregateFactorVariables); 188 224 } 189 225 } -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Grammars/ArithmeticExpressionGrammar.cs
r14185 r14232 53 53 constant.MaxValue = 20; 54 54 var variableSymbol = new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable(); 55 var factorVariableSymbol = new FactorVariable(); 55 56 56 var allSymbols = new List<Symbol>() { add, sub, mul, div, constant, variableSymbol };57 var allSymbols = new List<Symbol>() { add, sub, mul, div, constant, variableSymbol, factorVariableSymbol }; 57 58 var functionSymbols = new List<Symbol>() { add, sub, mul, div }; 58 59 … … 65 66 SetSubtreeCount(constant, 0, 0); 66 67 SetSubtreeCount(variableSymbol, 0, 0); 68 SetSubtreeCount(factorVariableSymbol, 0, 0); 67 69 68 70 // allow each symbol as child of the start symbol -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Grammars/FullFunctionalExpressionGrammar.cs
r14185 r14232 115 115 constant.MaxValue = 20; 116 116 var variableSymbol = new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable(); 117 var factorVariable = new FactorVariable(); 117 118 var laggedVariable = new LaggedVariable(); 118 119 laggedVariable.InitialFrequency = 0.0; … … 123 124 var allSymbols = new List<Symbol>() { add, sub, mul, div, mean, sin, cos, tan, log, square, pow, sqrt, root, exp, 124 125 airyA, airyB, bessel, cosineIntegral, dawson, erf, expIntegralEi, fresnelCosineIntegral, fresnelSineIntegral, gamma, hypCosineIntegral, hypSineIntegral, norm, psi, sineIntegral, 125 @if, gt, lt, and, or, not,xor, timeLag, integral, derivative, constant, variableSymbol, laggedVariable,autoregressiveVariable, variableCondition };126 @if, gt, lt, and, or, not,xor, timeLag, integral, derivative, constant, variableSymbol, factorVariable, laggedVariable,autoregressiveVariable, variableCondition }; 126 127 var unaryFunctionSymbols = new List<Symbol>() { square, sqrt, sin, cos, tan, log, exp, not, timeLag, integral, derivative, 127 128 airyA, airyB, bessel, cosineIntegral, dawson, erf, expIntegralEi, fresnelCosineIntegral, fresnelSineIntegral, gamma, hypCosineIntegral, hypSineIntegral, norm, psi, sineIntegral … … 130 131 var binaryFunctionSymbols = new List<Symbol>() { pow, root, gt, lt, variableCondition }; 131 132 var ternarySymbols = new List<Symbol>() { add, sub, mul, div, mean, and, or, xor }; 132 var terminalSymbols = new List<Symbol>() { variableSymbol, constant, laggedVariable, autoregressiveVariable };133 var terminalSymbols = new List<Symbol>() { variableSymbol, factorVariable, constant, laggedVariable, autoregressiveVariable }; 133 134 134 135 foreach (var symb in allSymbols) -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Grammars/TypeCoherentExpressionGrammar.cs
r14185 r14232 104 104 constant.MaxValue = 20; 105 105 var variableSymbol = new Variable(); 106 var factorVariable = new FactorVariable(); 106 107 var laggedVariable = new LaggedVariable(); 107 108 var autoregressiveVariable = new AutoregressiveTargetVariable(); … … 114 115 var specialFunctions = new GroupSymbol(SpecialFunctionsName, new List<ISymbol> { airyA, airyB, bessel, cosineIntegral, dawson, erf, expIntegralEi, 115 116 fresnelCosineIntegral,fresnelSineIntegral,gamma,hypCosineIntegral,hypSineIntegral,norm, psi, sineIntegral}); 116 var terminalSymbols = new GroupSymbol(TerminalsName, new List<ISymbol> { constant, variableSymbol });117 var terminalSymbols = new GroupSymbol(TerminalsName, new List<ISymbol> { constant, variableSymbol, factorVariable }); 117 118 var realValuedSymbols = new GroupSymbol(RealValuedSymbolsName, new List<ISymbol>() { arithmeticSymbols, trigonometricSymbols, exponentialAndLogarithmicSymbols, specialFunctions, terminalSymbols }); 118 119 … … 122 123 var comparisonSymbols = new GroupSymbol(ComparisonsName, new List<ISymbol> { gt, lt }); 123 124 var booleanOperationSymbols = new GroupSymbol(BooleanOperatorsName, new List<ISymbol> { and, or, not, xor }); 124 var conditionalSymbols = new GroupSymbol(ConditionalSymbolsName, new List<ISymbol> { conditionSymbols, comparisonSymbols, booleanOperationSymbols }); 125 var conditionalSymbols = new GroupSymbol(ConditionalSymbolsName, new List<ISymbol> { conditionSymbols, comparisonSymbols, booleanOperationSymbols }); // TODO: factorVariableBool? 125 126 126 127 var timeSeriesSymbols = new GroupSymbol(TimeSeriesSymbolsName, new List<ISymbol> { timeLag, integral, derivative, laggedVariable, autoregressiveVariable }); -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.csproj
r14024 r14232 198 198 <Compile Include="Symbols\AiryB.cs" /> 199 199 <Compile Include="Symbols\Bessel.cs" /> 200 <Compile Include="Symbols\FactorVariable.cs" /> 201 <Compile Include="Symbols\FactorVariableTreeNode.cs" /> 200 202 <Compile Include="Symbols\Xor.cs" /> 201 203 <Compile Include="Symbols\Erf.cs" /> -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interfaces/ISymbolicDataAnalysisImpactValuesCalculator.cs
r12720 r14232 5 5 namespace HeuristicLab.Problems.DataAnalysis.Symbolic { 6 6 public interface ISymbolicDataAnalysisSolutionImpactValuesCalculator : IItem { 7 double CalculateReplacementValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows);8 double CalculateImpactValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows, double qualityForImpactsCalculation = double.NaN);9 7 void CalculateImpactAndReplacementValues(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, 10 8 IEnumerable<int> rows, out double impactValue, out double replacementValue, out double newQualityForImpactsCalculation, double qualityForImpactsCalculation = double.NaN); -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interpreter/OpCodes.cs
r14185 r14232 83 83 public const byte Erf = 43; 84 84 public const byte Bessel = 44; 85 public const byte FactorVariable = 46; 85 86 86 87 private static Dictionary<Type, byte> symbolToOpcode = new Dictionary<Type, byte>() { … … 130 131 { typeof(Norm), OpCodes.Norm}, 131 132 { typeof(Erf), OpCodes.Erf}, 132 { typeof(Bessel), OpCodes.Bessel} 133 { typeof(Bessel), OpCodes.Bessel}, 134 { typeof(FactorVariable), OpCodes.FactorVariable } 133 135 }; 134 136 -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interpreter/SymbolicDataAnalysisExpressionTreeILEmittingInterpreter.cs
r14185 r14232 66 66 private static MethodInfo erf = thisType.GetMethod("Erf", new Type[] { typeof(double) }); 67 67 private static MethodInfo bessel = thisType.GetMethod("Bessel", new Type[] { typeof(double) }); 68 private static MethodInfo string_eq = typeof(string).GetMethod("Equals", new Type[] {typeof(string)}); 68 69 #endregion 69 70 … … 627 628 return; 628 629 } 630 case OpCodes.FactorVariable: { 631 FactorVariableTreeNode varNode = currentInstr.dynamicNode as FactorVariableTreeNode; 632 il.Emit(System.Reflection.Emit.OpCodes.Ldarg_1); // load columns array 633 il.Emit(System.Reflection.Emit.OpCodes.Ldc_I4, (int)currentInstr.data); 634 // load correct column of the current variable 635 il.Emit(System.Reflection.Emit.OpCodes.Ldelem_Ref); 636 il.Emit(System.Reflection.Emit.OpCodes.Ldarg_0); // rowIndex 637 if (!state.InLaggedContext) { 638 il.Emit(System.Reflection.Emit.OpCodes.Call, listGetValue); 639 il.Emit(System.Reflection.Emit.OpCodes.Ldc_R8, varNode.VariableValue); 640 il.Emit(System.Reflection.Emit.OpCodes.Call, string_eq); 641 // TODO: convert bool to 1 / 0? 642 } else { 643 var nanResult = il.DefineLabel(); 644 var normalResult = il.DefineLabel(); 645 il.Emit(System.Reflection.Emit.OpCodes.Dup); 646 il.Emit(System.Reflection.Emit.OpCodes.Ldc_I4_0); 647 il.Emit(System.Reflection.Emit.OpCodes.Blt, nanResult); 648 il.Emit(System.Reflection.Emit.OpCodes.Dup); 649 il.Emit(System.Reflection.Emit.OpCodes.Ldc_I4, ds.Rows); 650 il.Emit(System.Reflection.Emit.OpCodes.Bge, nanResult); 651 il.Emit(System.Reflection.Emit.OpCodes.Call, listGetValue); 652 il.Emit(System.Reflection.Emit.OpCodes.Ldc_R8, varNode.VariableValue); 653 il.Emit(System.Reflection.Emit.OpCodes.Call, string_eq); 654 // TODO: convert bool to 1 / 0? 655 il.Emit(System.Reflection.Emit.OpCodes.Br, normalResult); 656 il.MarkLabel(nanResult); 657 il.Emit(System.Reflection.Emit.OpCodes.Pop); // rowIndex 658 il.Emit(System.Reflection.Emit.OpCodes.Pop); // column reference 659 il.Emit(System.Reflection.Emit.OpCodes.Ldc_R8, double.NaN); 660 il.MarkLabel(normalResult); 661 } 662 return; 663 } 629 664 case OpCodes.LagVariable: { 630 665 var nanResult = il.DefineLabel(); -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interpreter/SymbolicDataAnalysisExpressionTreeInterpreter.cs
r14185 r14232 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Linq; 24 25 using HeuristicLab.Common; 25 26 using HeuristicLab.Core; … … 143 144 var variableTreeNode = (VariableTreeNode)instr.dynamicNode; 144 145 instr.data = dataset.GetReadOnlyDoubleValues(variableTreeNode.VariableName); 146 } else if (instr.opCode == OpCodes.FactorVariable) { 147 var factorTreeNode = instr.dynamicNode as FactorVariableTreeNode; 148 instr.data = dataset.GetReadOnlyStringValues(factorTreeNode.VariableName); 145 149 } else if (instr.opCode == OpCodes.LagVariable) { 146 150 var laggedVariableTreeNode = (LaggedVariableTreeNode)instr.dynamicNode; … … 455 459 return ((IList<double>)currentInstr.data)[row] * variableTreeNode.Weight; 456 460 } 461 case OpCodes.FactorVariable: { 462 if (row < 0 || row >= dataset.Rows) return double.NaN; 463 var factorVarTreeNode = currentInstr.dynamicNode as FactorVariableTreeNode; 464 return ((IList<string>)currentInstr.data)[row] == factorVarTreeNode.VariableValue ? 1 : 0; 465 } 457 466 case OpCodes.LagVariable: { 458 467 var laggedVariableTreeNode = (LaggedVariableTreeNode)currentInstr.dynamicNode; -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Interpreter/SymbolicDataAnalysisExpressionTreeLinearInterpreter.cs
r14185 r14232 147 147 var variableTreeNode = (VariableTreeNode)instr.dynamicNode; 148 148 instr.value = ((IList<double>)instr.data)[row] * variableTreeNode.Weight; 149 } 150 } else if (instr.opCode == OpCodes.FactorVariable) { 151 if (row < 0 || row >= dataset.Rows) instr.value = double.NaN; 152 else { 153 var factorTreeNode = instr.dynamicNode as FactorVariableTreeNode; 154 instr.value = ((IList<string>)instr.data)[row] == factorTreeNode.VariableValue ? 1 : 0; 149 155 } 150 156 } else if (instr.opCode == OpCodes.LagVariable) { … … 392 398 } 393 399 break; 400 case OpCodes.FactorVariable: { 401 var factorVariableTreeNode = instr.dynamicNode as FactorVariableTreeNode; 402 instr.data = dataset.GetReadOnlyStringValues(factorVariableTreeNode.VariableName); 403 } 404 break; 394 405 case OpCodes.LagVariable: { 395 406 var laggedVariableTreeNode = (LaggedVariableTreeNode)instr.dynamicNode; -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisModelComplexityCalculator.cs
r14185 r14232 39 39 return 1; 40 40 } 41 case OpCodes.Variable: { 41 case OpCodes.Variable: 42 case OpCodes.FactorVariable: { 42 43 return 2; 43 44 } 44 case OpCodes.Add: 45 case OpCodes.Add: 45 46 case OpCodes.Sub: { 46 47 double complexity = 0; … … 50 51 return complexity; 51 52 } 52 case OpCodes.Mul: 53 case OpCodes.Mul: 53 54 case OpCodes.Div: { 54 55 double complexity = 1; … … 60 61 } 61 62 case OpCodes.Sin: 62 case OpCodes.Cos: 63 case OpCodes.Cos: 63 64 case OpCodes.Tan: 64 case OpCodes.Exp: 65 case OpCodes.Exp: 65 66 case OpCodes.Log: { 66 67 double complexity = CalculateComplexity(node.GetSubtree(0)); … … 75 76 return complexity * complexity * complexity; 76 77 } 77 case OpCodes.Power: 78 case OpCodes.Power: 78 79 case OpCodes.Root: { 79 80 double complexity = CalculateComplexity(node.GetSubtree(0)); -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisProblem.cs
r14185 r14232 208 208 209 209 protected virtual void UpdateGrammar() { 210 SymbolicExpressionTreeGrammar.MaximumFunctionArguments = MaximumFunctionArguments.Value; 211 SymbolicExpressionTreeGrammar.MaximumFunctionDefinitions = MaximumFunctionDefinitions.Value; 212 foreach (var varSymbol in SymbolicExpressionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Variable>()) { 210 var problemData = ProblemData; 211 var ds = problemData.Dataset; 212 var grammar = SymbolicExpressionTreeGrammar; 213 grammar.MaximumFunctionArguments = MaximumFunctionArguments.Value; 214 grammar.MaximumFunctionDefinitions = MaximumFunctionDefinitions.Value; 215 foreach (var varSymbol in grammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.Variable>()) { 213 216 if (!varSymbol.Fixed) { 214 varSymbol.AllVariableNames = ProblemData.InputVariables.Select(x => x.Value);215 varSymbol.VariableNames = ProblemData.AllowedInputVariables;217 varSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<double>(x)); 218 varSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType<double>(x)); 216 219 } 217 220 } 218 foreach (var varSymbol in SymbolicExpressionTreeGrammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.VariableCondition>()) { 221 foreach (var factorSymbol in grammar.Symbols.OfType<FactorVariable>()) { 222 if (!factorSymbol.Fixed) { 223 factorSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<string>(x)); 224 factorSymbol.VariableNames = problemData.AllowedInputVariables.Where(x => ds.VariableHasType<string>(x)); 225 factorSymbol.VariableValues = factorSymbol.VariableNames 226 .ToDictionary(varName => varName, varName => ds.GetStringValues(varName).Distinct().ToList()); 227 } 228 } 229 foreach (var varSymbol in grammar.Symbols.OfType<HeuristicLab.Problems.DataAnalysis.Symbolic.VariableCondition>()) { 219 230 if (!varSymbol.Fixed) { 220 varSymbol.AllVariableNames = ProblemData.InputVariables.Select(x => x.Value);221 varSymbol.VariableNames = ProblemData.AllowedInputVariables;231 varSymbol.AllVariableNames = problemData.InputVariables.Select(x => x.Value).Where(x => ds.VariableHasType<double>(x)); 232 varSymbol.VariableNames = problemData.AllowedInputVariables; 222 233 } 223 234 } -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisSolutionImpactValuesCalculator.cs
r14185 r14232 21 21 22 22 using System.Collections.Generic; 23 using System.Linq; 23 24 using HeuristicLab.Common; 24 25 using HeuristicLab.Core; … … 36 37 [StorableConstructor] 37 38 protected SymbolicDataAnalysisSolutionImpactValuesCalculator(bool deserializing) : base(deserializing) { } 38 public abstract double CalculateReplacementValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows);39 public abstract double CalculateImpactValue(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows, double qualityForImpactsCalculation = double.NaN);40 39 public abstract void CalculateImpactAndReplacementValues(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable<int> rows, out double impactValue, out double replacementValue, out double newQualityForImpactsCalculation, double qualityForImpactsCalculation = double.NaN); 41 40 42 protected static double CalculateReplacementValue(ISymbolicExpressionTreeNode node, ISymbolicExpressionTree sourceTree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,41 protected IEnumerable<double> CalculateReplacementValues(ISymbolicExpressionTreeNode node, ISymbolicExpressionTree sourceTree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, 43 42 IDataset dataset, IEnumerable<int> rows) { 44 43 //optimization: constant nodes return always the same value 45 44 ConstantTreeNode constantNode = node as ConstantTreeNode; 46 if (constantNode != null) return constantNode.Value; 45 FactorVariableTreeNode factorNode = node as FactorVariableTreeNode; 46 if (constantNode != null) { 47 yield return constantNode.Value; 48 } else if (factorNode != null) { 49 // valid replacements are either all off or all on 50 yield return 0; 51 yield return 1; 52 } else { 53 var rootSymbol = new ProgramRootSymbol().CreateTreeNode(); 54 var startSymbol = new StartSymbol().CreateTreeNode(); 55 rootSymbol.AddSubtree(startSymbol); 56 startSymbol.AddSubtree((ISymbolicExpressionTreeNode)node.Clone()); 47 57 48 var rootSymbol = new ProgramRootSymbol().CreateTreeNode(); 49 var startSymbol = new StartSymbol().CreateTreeNode(); 50 rootSymbol.AddSubtree(startSymbol); 51 startSymbol.AddSubtree((ISymbolicExpressionTreeNode)node.Clone()); 52 53 var tempTree = new SymbolicExpressionTree(rootSymbol); 54 // clone ADFs of source tree 55 for (int i = 1; i < sourceTree.Root.SubtreeCount; i++) { 56 tempTree.Root.AddSubtree((ISymbolicExpressionTreeNode)sourceTree.Root.GetSubtree(i).Clone()); 58 var tempTree = new SymbolicExpressionTree(rootSymbol); 59 // clone ADFs of source tree 60 for (int i = 1; i < sourceTree.Root.SubtreeCount; i++) { 61 tempTree.Root.AddSubtree((ISymbolicExpressionTreeNode)sourceTree.Root.GetSubtree(i).Clone()); 62 } 63 yield return interpreter.GetSymbolicExpressionTreeValues(tempTree, dataset, rows).Median(); 64 yield return interpreter.GetSymbolicExpressionTreeValues(tempTree, dataset, rows).Average(); // TODO perf 57 65 } 58 return interpreter.GetSymbolicExpressionTreeValues(tempTree, dataset, rows).Median();59 66 } 60 67 } -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/Dataset.cs
r13761 r14232 168 168 get { return variableValues.Where(p => p.Value is List<double>).Select(p => p.Key); } 169 169 } 170 171 public IEnumerable<string> StringVariables { 172 get { return variableValues.Where(p => p.Value is List<string>).Select(p => p.Key); } 173 } 174 170 175 public IEnumerable<double> GetDoubleValues(string variableName) { 171 176 return GetValues<double>(variableName); … … 189 194 return GetValues<double>(variableName, rows); 190 195 } 196 197 public string GetStringValue(string variableName, int row) { 198 var values = GetValues<string>(variableName); 199 return values[row]; 200 } 201 202 public IEnumerable<string> GetStringValues(string variableName, IEnumerable<int> rows) { 203 return GetValues<string>(variableName, rows); 204 } 205 public ReadOnlyCollection<string> GetReadOnlyStringValues(string variableName) { 206 var values = GetValues<string>(variableName); 207 return values.AsReadOnly(); 208 } 209 191 210 private IEnumerable<T> GetValues<T>(string variableName, IEnumerable<int> rows) { 192 211 var values = GetValues<T>(variableName); -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/DataAnalysisProblemData.cs
r14185 r14232 131 131 protected DataAnalysisProblemData(IDataset dataset, IEnumerable<string> allowedInputVariables, IEnumerable<ITransformation> transformations = null) { 132 132 if (dataset == null) throw new ArgumentNullException("The dataset must not be null."); 133 if (allowedInputVariables == null) throw new ArgumentNullException("The allowed InputVariables must not be null.");134 135 if (allowedInputVariables.Except(dataset.DoubleVariables). Any())136 throw new ArgumentException("All allowed input variables must be present in the dataset and of type double .");137 138 var inputVariables = new CheckedItemList<StringValue>(dataset.DoubleVariables. Select(x => new StringValue(x)));133 if (allowedInputVariables == null) throw new ArgumentNullException("The allowed input variables must not be null."); 134 135 if (allowedInputVariables.Except(dataset.DoubleVariables).Except(dataset.StringVariables).Any()) 136 throw new ArgumentException("All allowed input variables must be present in the dataset and of type double or string."); 137 138 var inputVariables = new CheckedItemList<StringValue>(dataset.DoubleVariables.Concat(dataset.StringVariables).Select(x => new StringValue(x))); 139 139 foreach (StringValue x in inputVariables) 140 140 inputVariables.SetItemCheckedState(x, allowedInputVariables.Contains(x.Value)); -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataAnalysisProblemData.cs
r14185 r14232 30 30 31 31 IDataset Dataset { get; } 32 ICheckedItemList<StringValue> InputVariables { get; } 32 ICheckedItemList<StringValue> InputVariables { get; } // TODO: check all usages of InputVariables (distinguish between doubles and strings) 33 33 IEnumerable<string> AllowedInputVariables { get; } 34 34 -
branches/symbreg-factors-2650/HeuristicLab.Problems.DataAnalysis/3.4/Interfaces/IDataset.cs
r14185 r14232 30 30 IEnumerable<string> VariableNames { get; } 31 31 IEnumerable<string> DoubleVariables { get; } 32 IEnumerable<string> StringVariables { get; } 33 34 bool VariableHasType<T>(string variableName); 32 35 33 36 double GetDoubleValue(string variableName, int row); … … 36 39 ReadOnlyCollection<double> GetReadOnlyDoubleValues(string variableName); 37 40 41 string GetStringValue(string variableName, int row); 38 42 IEnumerable<string> GetStringValues(string variableName); 43 IEnumerable<string> GetStringValues(string variableName, IEnumerable<int> rows); 44 ReadOnlyCollection<string> GetReadOnlyStringValues(string VariableName); 45 39 46 IEnumerable<DateTime> GetDateTimeValues(string variableName); 40 47 }
Note: See TracChangeset
for help on using the changeset viewer.