Changeset 13948 for branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis
- Timestamp:
- 06/29/16 10:36:52 (8 years ago)
- Location:
- branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis
- Files:
-
- 43 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
/trunk/sources/HeuristicLab.Algorithms.DataAnalysis (added) merged: 13889,13891,13895,13898,13917,13921-13922,13941
- Property svn:mergeinfo changed
-
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers/OneR.cs
r13092 r13948 139 139 } 140 140 141 var model = new OneRClassificationModel( bestVariable, bestSplits.Select(s => s.thresholdValue).ToArray(), bestSplits.Select(s => s.classValue).ToArray(), bestMissingValuesClass);141 var model = new OneRClassificationModel(problemData.TargetVariable, bestVariable, bestSplits.Select(s => s.thresholdValue).ToArray(), bestSplits.Select(s => s.classValue).ToArray(), bestMissingValuesClass); 142 142 var solution = new OneRClassificationSolution(model, (IClassificationProblemData)problemData.Clone()); 143 143 -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/BaselineClassifiers/OneRClassificationModel.cs
r13098 r13948 31 31 [StorableClass] 32 32 [Item("OneR Classification Model", "A model that uses intervals for one variable to determine the class.")] 33 public class OneRClassificationModel : NamedItem, IClassificationModel { 33 public class OneRClassificationModel : ClassificationModel { 34 public override IEnumerable<string> VariablesUsedForPrediction { 35 get { return new[] { Variable }; } 36 } 37 34 38 [Storable] 35 39 protected string variable; … … 66 70 public override IDeepCloneable Clone(Cloner cloner) { return new OneRClassificationModel(this, cloner); } 67 71 68 public OneRClassificationModel(string variable, double[] splits, double[] classes, double missingValuesClass = double.NaN)69 : base( ) {72 public OneRClassificationModel(string targetVariable, string variable, double[] splits, double[] classes, double missingValuesClass = double.NaN) 73 : base(targetVariable) { 70 74 if (splits.Length != classes.Length) { 71 75 throw new ArgumentException("Number of splits and classes has to be equal."); … … 84 88 // uses sorting to return the values in the order of rows, instead of using nested for loops 85 89 // to avoid O(n²) runtime 86 public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {90 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 87 91 var values = dataset.GetDoubleValues(Variable, rows).ToArray(); 88 92 var rowsArray = rows.ToArray(); … … 108 112 } 109 113 110 public IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) {114 public override IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { 111 115 return new OneRClassificationSolution(this, new ClassificationProblemData(problemData)); 112 116 } -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/GBM/GradientBoostingRegressionAlgorithm.cs
r13724 r13948 64 64 private const string SetSeedRandomlyParameterName = "SetSeedRandomly"; 65 65 private const string CreateSolutionParameterName = "CreateSolution"; 66 private const string StoreRunsParameterName = "StoreRuns"; 66 67 private const string RegressionAlgorithmSolutionResultParameterName = "RegressionAlgorithmResult"; 67 68 … … 106 107 get { return (IFixedValueParameter<BoolValue>)Parameters[CreateSolutionParameterName]; } 107 108 } 109 public IFixedValueParameter<BoolValue> StoreRunsParameter { 110 get { return (IFixedValueParameter<BoolValue>)Parameters[StoreRunsParameterName]; } 111 } 108 112 109 113 #endregion … … 144 148 get { return CreateSolutionParameter.Value.Value; } 145 149 set { CreateSolutionParameter.Value.Value = value; } 150 } 151 152 public bool StoreRuns { 153 get { return StoreRunsParameter.Value.Value; } 154 set { StoreRunsParameter.Value.Value = value; } 146 155 } 147 156 … … 178 187 var regressionAlgs = new ItemSet<IAlgorithm>(new IAlgorithm[] { 179 188 new RandomForestRegression(), 180 sgp, 189 sgp, 181 190 mctsSymbReg 182 191 }); … … 206 215 "Flag that indicates if a solution should be produced at the end of the run", new BoolValue(true))); 207 216 Parameters[CreateSolutionParameterName].Hidden = true; 217 Parameters.Add(new FixedValueParameter<BoolValue>(StoreRunsParameterName, 218 "Flag that indicates if the results of the individual runs should be stored for detailed analysis", new BoolValue(false))); 219 Parameters[StoreRunsParameterName].Hidden = true; 208 220 } 209 221 … … 218 230 219 231 var table = new DataTable("Qualities"); 220 table.Rows.Add(new DataRow(" Loss(train)"));221 table.Rows.Add(new DataRow(" Loss(test)"));232 table.Rows.Add(new DataRow("R² (train)")); 233 table.Rows.Add(new DataRow("R² (test)")); 222 234 Results.Add(new Result("Qualities", table)); 223 235 var curLoss = new DoubleValue(); 224 236 var curTestLoss = new DoubleValue(); 225 Results.Add(new Result(" Loss(train)", curLoss));226 Results.Add(new Result(" Loss(test)", curTestLoss));237 Results.Add(new Result("R² (train)", curLoss)); 238 Results.Add(new Result("R² (test)", curTestLoss)); 227 239 var runCollection = new RunCollection(); 228 Results.Add(new Result("Runs", runCollection)); 240 if (StoreRuns) 241 Results.Add(new Result("Runs", runCollection)); 229 242 230 243 // init 231 244 var problemData = Problem.ProblemData; 232 var targetVarName = Problem.ProblemData.TargetVariable;245 var targetVarName = problemData.TargetVariable; 233 246 var activeVariables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable }); 234 247 var modifiableDataset = new ModifiableDataset( … … 273 286 IRegressionModel model; 274 287 IRun run; 288 275 289 // try to find a model. The algorithm might fail to produce a model. In this case we just retry until the iterations are exhausted 276 if (TryExecute(alg, RegressionAlgorithmResult, out model, out run)) {290 if (TryExecute(alg, rand.Next(), RegressionAlgorithmResult, out model, out run)) { 277 291 int row = 0; 278 292 // update predictions for training and test … … 303 317 } 304 318 305 runCollection.Add(run); 306 table.Rows["Loss (train)"].Values.Add(curLoss.Value); 307 table.Rows["Loss (test)"].Values.Add(curTestLoss.Value); 319 if (StoreRuns) 320 runCollection.Add(run); 321 table.Rows["R² (train)"].Values.Add(curLoss.Value); 322 table.Rows["R² (test)"].Values.Add(curTestLoss.Value); 308 323 iterations.Value = i + 1; 309 324 } … … 317 332 // just produce an ensemble solution for now (TODO: correct scaling or linear regression for ensemble model weights) 318 333 319 var ensembleModel = new RegressionEnsembleModel(models) { AverageModelEstimates = false }; 320 var ensembleSolution = ensembleModel.CreateRegressionSolution((IRegressionProblemData)problemData.Clone()); 334 var ensembleSolution = CreateEnsembleSolution(models, (IRegressionProblemData)problemData.Clone()); 321 335 Results.Add(new Result("EnsembleSolution", ensembleSolution)); 322 336 } … … 326 340 alg.Prepare(true); 327 341 } 342 } 343 344 private static IRegressionEnsembleSolution CreateEnsembleSolution(List<IRegressionModel> models, 345 IRegressionProblemData problemData) { 346 var rows = problemData.TrainingPartition.Size; 347 var features = models.Count; 348 double[,] inputMatrix = new double[rows, features + 1]; 349 //add model estimates 350 for (int m = 0; m < models.Count; m++) { 351 var model = models[m]; 352 var estimates = model.GetEstimatedValues(problemData.Dataset, problemData.TrainingIndices); 353 int estimatesCounter = 0; 354 foreach (var estimate in estimates) { 355 inputMatrix[estimatesCounter, m] = estimate; 356 estimatesCounter++; 357 } 358 } 359 360 //add target 361 var targets = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices); 362 int targetCounter = 0; 363 foreach (var target in targets) { 364 inputMatrix[targetCounter, models.Count] = target; 365 targetCounter++; 366 } 367 368 alglib.linearmodel lm = new alglib.linearmodel(); 369 alglib.lrreport ar = new alglib.lrreport(); 370 double[] coefficients; 371 int retVal = 1; 372 alglib.lrbuildz(inputMatrix, rows, features, out retVal, out lm, out ar); 373 if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution"); 374 375 alglib.lrunpack(lm, out coefficients, out features); 376 377 var ensembleModel = new RegressionEnsembleModel(models, coefficients.Take(models.Count)) { AverageModelEstimates = false }; 378 var ensembleSolution = (IRegressionEnsembleSolution)ensembleModel.CreateRegressionSolution(problemData); 379 return ensembleSolution; 328 380 } 329 381 … … 391 443 progRootNode.AddSubtree(startNode); 392 444 var t = new SymbolicExpressionTree(progRootNode); 393 var combinedModel = new SymbolicRegressionModel( t, interpreter, lowerLimit, upperLimit);445 var combinedModel = new SymbolicRegressionModel(problemData.TargetVariable, t, interpreter, lowerLimit, upperLimit); 394 446 var sol = new SymbolicRegressionSolution(combinedModel, problemData); 395 447 return sol; … … 405 457 } 406 458 407 private static bool TryExecute(IAlgorithm alg, string regressionAlgorithmResultName, out IRegressionModel model, out IRun run) {459 private static bool TryExecute(IAlgorithm alg, int seed, string regressionAlgorithmResultName, out IRegressionModel model, out IRun run) { 408 460 model = null; 461 SetSeed(alg, seed); 409 462 using (var wh = new AutoResetEvent(false)) { 410 EventHandler<EventArgs<Exception>> handler = (sender, args) => wh.Set(); 463 Exception ex = null; 464 EventHandler<EventArgs<Exception>> handler = (sender, args) => { 465 ex = args.Value; 466 wh.Set(); 467 }; 411 468 EventHandler handler2 = (sender, args) => wh.Set(); 412 469 alg.ExceptionOccurred += handler; … … 417 474 wh.WaitOne(); 418 475 476 if (ex != null) throw new AggregateException(ex); 419 477 run = alg.Runs.Last(); 478 alg.Runs.Clear(); 420 479 var sols = alg.Results.Select(r => r.Value).OfType<IRegressionSolution>(); 421 480 if (!sols.Any()) return false; … … 444 503 return model != null; 445 504 } 505 506 private static void SetSeed(IAlgorithm alg, int seed) { 507 // no common interface for algs that use a PRNG -> use naming convention to set seed 508 var paramItem = alg as IParameterizedItem; 509 510 if (paramItem.Parameters.ContainsKey("SetSeedRandomly")) { 511 ((BoolValue)paramItem.Parameters["SetSeedRandomly"].ActualValue).Value = false; 512 ((IntValue)paramItem.Parameters["Seed"].ActualValue).Value = seed; 513 } else { 514 throw new ArgumentException("Base learner does not have a seed parameter (algorithm {0})", alg.Name); 515 } 516 517 } 446 518 } 447 519 } -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/CovarianceFunctions/CovarianceProduct.cs
r13784 r13948 94 94 public static IList<double> GetGradient(double[,] x, int i, int j, List<ParameterizedCovarianceFunction> factorFunctions) { 95 95 var covariances = factorFunctions.Select(f => f.Covariance(x, i, j)).ToArray(); 96 var gr = new List<double>( factorFunctions.Sum(f => f.CovarianceGradient(x, i, j).Count));96 var gr = new List<double>(); 97 97 for (int ii = 0; ii < factorFunctions.Count; ii++) { 98 98 foreach (var g in factorFunctions[ii].CovarianceGradient(x, i, j)) { -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/CovarianceFunctions/CovarianceSum.cs
r13784 r13948 88 88 sum.CrossCovariance = (x, xt, i, j) => functions.Select(e => e.CrossCovariance(x, xt, i, j)).Sum(); 89 89 sum.CovarianceGradient = (x, i, j) => { 90 var g = new List<double>( functions.Sum(e => e.CovarianceGradient(x, i, j).Count));90 var g = new List<double>(); 91 91 foreach (var e in functions) 92 92 g.AddRange(e.CovarianceGradient(x, i, j)); -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs
r13823 r13948 34 34 [StorableClass] 35 35 [Item("GaussianProcessModel", "Represents a Gaussian process posterior.")] 36 public sealed class GaussianProcessModel : NamedItem, IGaussianProcessModel { 36 public sealed class GaussianProcessModel : RegressionModel, IGaussianProcessModel { 37 public override IEnumerable<string> VariablesUsedForPrediction { 38 get { return allowedInputVariables; } 39 } 40 37 41 [Storable] 38 42 private double negativeLogLikelihood; … … 61 65 get { return meanFunction; } 62 66 } 63 [Storable] 64 private string targetVariable; 65 public string TargetVariable { 66 get { return targetVariable; } 67 } 67 68 68 [Storable] 69 69 private string[] allowedInputVariables; … … 128 128 this.trainingDataset = cloner.Clone(original.trainingDataset); 129 129 this.negativeLogLikelihood = original.negativeLogLikelihood; 130 this.targetVariable = original.targetVariable;131 130 this.sqrSigmaNoise = original.sqrSigmaNoise; 132 131 if (original.meanParameter != null) { … … 147 146 IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction, 148 147 bool scaleInputs = true) 149 : base( ) {148 : base(targetVariable) { 150 149 this.name = ItemName; 151 150 this.description = ItemDescription; 152 151 this.meanFunction = (IMeanFunction)meanFunction.Clone(); 153 152 this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone(); 154 this.targetVariable = targetVariable;155 153 this.allowedInputVariables = allowedInputVariables.ToArray(); 156 154 … … 182 180 183 181 IEnumerable<double> y; 184 y = ds.GetDoubleValues( targetVariable, rows);182 y = ds.GetDoubleValues(TargetVariable, rows); 185 183 186 184 int n = x.GetLength(0); … … 301 299 302 300 #region IRegressionModel Members 303 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {301 public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 304 302 return GetEstimatedValuesHelper(dataset, rows); 305 303 } 306 public GaussianProcessRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {304 public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 307 305 return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData)); 308 }309 IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {310 return CreateRegressionSolution(problemData);311 306 } 312 307 #endregion … … 392 387 } 393 388 } 389 394 390 } 395 391 } -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/StudentTProcessModel.cs
r13823 r13948 34 34 [StorableClass] 35 35 [Item("StudentTProcessModel", "Represents a Student-t process posterior.")] 36 public sealed class StudentTProcessModel : NamedItem, IGaussianProcessModel { 36 public sealed class StudentTProcessModel : RegressionModel, IGaussianProcessModel { 37 public override IEnumerable<string> VariablesUsedForPrediction { 38 get { return allowedInputVariables; } 39 } 40 37 41 [Storable] 38 42 private double negativeLogLikelihood; … … 61 65 get { return meanFunction; } 62 66 } 63 [Storable] 64 private string targetVariable; 65 public string TargetVariable { 66 get { return targetVariable; } 67 } 67 68 68 [Storable] 69 69 private string[] allowedInputVariables; … … 131 131 this.trainingDataset = cloner.Clone(original.trainingDataset); 132 132 this.negativeLogLikelihood = original.negativeLogLikelihood; 133 this.targetVariable = original.targetVariable;134 133 if (original.meanParameter != null) { 135 134 this.meanParameter = (double[])original.meanParameter.Clone(); … … 151 150 IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction, 152 151 bool scaleInputs = true) 153 : base( ) {152 : base(targetVariable) { 154 153 this.name = ItemName; 155 154 this.description = ItemDescription; 156 155 this.meanFunction = (IMeanFunction)meanFunction.Clone(); 157 156 this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone(); 158 this.targetVariable = targetVariable;159 157 this.allowedInputVariables = allowedInputVariables.ToArray(); 160 158 … … 186 184 187 185 IEnumerable<double> y; 188 y = ds.GetDoubleValues( targetVariable, rows);186 y = ds.GetDoubleValues(TargetVariable, rows); 189 187 190 188 int n = x.GetLength(0); … … 318 316 319 317 #region IRegressionModel Members 320 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {318 public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 321 319 return GetEstimatedValuesHelper(dataset, rows); 322 320 } 323 public GaussianProcessRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {321 public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 324 322 return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData)); 325 }326 IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {327 return CreateRegressionSolution(problemData);328 323 } 329 324 #endregion -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesModel.cs
r13157 r13948 33 33 [Item("Gradient boosted tree model", "")] 34 34 // this is essentially a collection of weighted regression models 35 public sealed class GradientBoostedTreesModel : NamedItem, IGradientBoostedTreesModel {35 public sealed class GradientBoostedTreesModel : RegressionModel, IGradientBoostedTreesModel { 36 36 // BackwardsCompatibility3.4 for allowing deserialization & serialization of old models 37 37 #region Backwards compatible code, remove with 3.5 … … 58 58 #endregion 59 59 60 public override IEnumerable<string> VariablesUsedForPrediction { 61 get { return models.SelectMany(x => x.VariablesUsedForPrediction).Distinct().OrderBy(x => x); } 62 } 63 60 64 private readonly IList<IRegressionModel> models; 61 65 public IEnumerable<IRegressionModel> Models { get { return models; } } … … 77 81 } 78 82 [Obsolete("The constructor of GBTModel should not be used directly anymore (use GBTModelSurrogate instead)")] 79 publicGradientBoostedTreesModel(IEnumerable<IRegressionModel> models, IEnumerable<double> weights)80 : base( "Gradient boosted tree model", string.Empty) {83 internal GradientBoostedTreesModel(IEnumerable<IRegressionModel> models, IEnumerable<double> weights) 84 : base(string.Empty, "Gradient boosted tree model", string.Empty) { 81 85 this.models = new List<IRegressionModel>(models); 82 86 this.weights = new List<double>(weights); … … 89 93 } 90 94 91 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {95 public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 92 96 // allocate target array go over all models and add up weighted estimation for each row 93 97 if (!rows.Any()) return Enumerable.Empty<double>(); // return immediately if rows is empty. This prevents multiple iteration over lazy rows enumerable. … … 105 109 } 106 110 107 public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {111 public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 108 112 return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone()); 109 113 } 114 110 115 } 111 116 } -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesModelSurrogate.cs
r13157 r13948 22 22 23 23 using System.Collections.Generic; 24 using System.Linq; 24 25 using HeuristicLab.Common; 25 26 using HeuristicLab.Core; … … 33 34 // recalculate the actual GBT model on demand 34 35 [Item("Gradient boosted tree model", "")] 35 public sealed class GradientBoostedTreesModelSurrogate : NamedItem, IGradientBoostedTreesModel {36 public sealed class GradientBoostedTreesModelSurrogate : RegressionModel, IGradientBoostedTreesModel { 36 37 // don't store the actual model! 37 38 private IGradientBoostedTreesModel actualModel; // the actual model is only recalculated when necessary … … 55 56 56 57 58 public override IEnumerable<string> VariablesUsedForPrediction { 59 get { return actualModel.Models.SelectMany(x => x.VariablesUsedForPrediction).Distinct().OrderBy(x => x); } 60 } 61 57 62 [StorableConstructor] 58 63 private GradientBoostedTreesModelSurrogate(bool deserializing) : base(deserializing) { } … … 73 78 74 79 // create only the surrogate model without an actual model 75 public GradientBoostedTreesModelSurrogate(IRegressionProblemData trainingProblemData, uint seed, ILossFunction lossFunction, int iterations, int maxSize, double r, double m, double nu) 76 : base("Gradient boosted tree model", string.Empty) { 80 public GradientBoostedTreesModelSurrogate(IRegressionProblemData trainingProblemData, uint seed, 81 ILossFunction lossFunction, int iterations, int maxSize, double r, double m, double nu) 82 : base(trainingProblemData.TargetVariable, "Gradient boosted tree model", string.Empty) { 77 83 this.trainingProblemData = trainingProblemData; 78 84 this.seed = seed; … … 86 92 87 93 // wrap an actual model in a surrograte 88 public GradientBoostedTreesModelSurrogate(IRegressionProblemData trainingProblemData, uint seed, ILossFunction lossFunction, int iterations, int maxSize, double r, double m, double nu, IGradientBoostedTreesModel model) 94 public GradientBoostedTreesModelSurrogate(IRegressionProblemData trainingProblemData, uint seed, 95 ILossFunction lossFunction, int iterations, int maxSize, double r, double m, double nu, 96 IGradientBoostedTreesModel model) 89 97 : this(trainingProblemData, seed, lossFunction, iterations, maxSize, r, m, nu) { 90 98 this.actualModel = model; … … 96 104 97 105 // forward message to actual model (recalculate model first if necessary) 98 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {106 public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 99 107 if (actualModel == null) actualModel = RecalculateModel(); 100 108 return actualModel.GetEstimatedValues(dataset, rows); 101 109 } 102 110 103 public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {111 public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 104 112 return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone()); 105 113 } 106 107 114 108 115 private IGradientBoostedTreesModel RecalculateModel() { -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/RegressionTreeBuilder.cs
r13065 r13948 180 180 181 181 182 // processes potential splits from the queue as long as splits are leftand the maximum size of the tree is not reached182 // processes potential splits from the queue as long as splits are remaining and the maximum size of the tree is not reached 183 183 private void CreateRegressionTreeFromQueue(int maxNodes, ILossFunction lossFunction) { 184 184 while (queue.Any() && curTreeNodeIdx + 1 < maxNodes) { // two nodes are created in each loop … … 204 204 205 205 // overwrite existing leaf node with an internal node 206 tree[f.ParentNodeIdx] = new RegressionTreeModel.TreeNode(f.SplittingVariable, f.SplittingThreshold, leftTreeIdx, rightTreeIdx );206 tree[f.ParentNodeIdx] = new RegressionTreeModel.TreeNode(f.SplittingVariable, f.SplittingThreshold, leftTreeIdx, rightTreeIdx, weightLeft: (splitIdx - startIdx + 1) / (double)(endIdx - startIdx + 1)); 207 207 } 208 208 } -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/RegressionTreeModel.cs
r13030 r13948 34 34 [StorableClass] 35 35 [Item("RegressionTreeModel", "Represents a decision tree for regression.")] 36 public sealed class RegressionTreeModel : NamedItem, IRegressionModel { 36 public sealed class RegressionTreeModel : RegressionModel { 37 public override IEnumerable<string> VariablesUsedForPrediction { 38 get { return tree.Select(t => t.VarName).Where(v => v != TreeNode.NO_VARIABLE); } 39 } 37 40 38 41 // trees are represented as a flat array … … 40 43 public readonly static string NO_VARIABLE = null; 41 44 42 public TreeNode(string varName, double val, int leftIdx = -1, int rightIdx = -1 )45 public TreeNode(string varName, double val, int leftIdx = -1, int rightIdx = -1, double weightLeft = -1.0) 43 46 : this() { 44 47 VarName = varName; … … 46 49 LeftIdx = leftIdx; 47 50 RightIdx = rightIdx; 48 } 49 50 public string VarName { get; private set; } // name of the variable for splitting or NO_VARIABLE if terminal node 51 public double Val { get; private set; } // threshold 52 public int LeftIdx { get; private set; } 53 public int RightIdx { get; private set; } 51 WeightLeft = weightLeft; 52 } 53 54 public string VarName { get; internal set; } // name of the variable for splitting or NO_VARIABLE if terminal node 55 public double Val { get; internal set; } // threshold 56 public int LeftIdx { get; internal set; } 57 public int RightIdx { get; internal set; } 58 public double WeightLeft { get; internal set; } // for partial dependence plots (value in range [0..1] describes the fraction of training samples for the left sub-tree 59 54 60 55 61 // necessary because the default implementation of GetHashCode for structs in .NET would only return the hashcode of val here … … 64 70 LeftIdx.Equals(other.LeftIdx) && 65 71 RightIdx.Equals(other.RightIdx) && 72 WeightLeft.Equals(other.WeightLeft) && 66 73 EqualStrings(VarName, other.VarName); 67 74 } else { … … 79 86 private TreeNode[] tree; 80 87 81 [Storable] 88 #region old storable format 89 // remove with HL 3.4 90 [Storable(AllowOneWay = true)] 82 91 // to prevent storing the references to data caches in nodes 83 // TODO seeminglyit is bad (performance-wise) to persist tuples (tuples are used as keys in a dictionary)92 // seemingly, it is bad (performance-wise) to persist tuples (tuples are used as keys in a dictionary) 84 93 private Tuple<string, double, int, int>[] SerializedTree { 85 get { return tree.Select(t => Tuple.Create(t.VarName, t.Val, t.LeftIdx, t.RightIdx)).ToArray(); } 86 set { this.tree = value.Select(t => new TreeNode(t.Item1, t.Item2, t.Item3, t.Item4)).ToArray(); } 87 } 94 // get { return tree.Select(t => Tuple.Create(t.VarName, t.Val, t.LeftIdx, t.RightIdx)).ToArray(); } 95 set { this.tree = value.Select(t => new TreeNode(t.Item1, t.Item2, t.Item3, t.Item4, -1.0)).ToArray(); } // use a weight of -1.0 to indicate that partial dependence cannot be calculated for old models 96 } 97 #endregion 98 #region new storable format 99 [Storable] 100 private string[] SerializedTreeVarNames { 101 get { return tree.Select(t => t.VarName).ToArray(); } 102 set { 103 if (tree == null) tree = new TreeNode[value.Length]; 104 for (int i = 0; i < value.Length; i++) { 105 tree[i].VarName = value[i]; 106 } 107 } 108 } 109 [Storable] 110 private double[] SerializedTreeValues { 111 get { return tree.Select(t => t.Val).ToArray(); } 112 set { 113 if (tree == null) tree = new TreeNode[value.Length]; 114 for (int i = 0; i < value.Length; i++) { 115 tree[i].Val = value[i]; 116 } 117 } 118 } 119 [Storable] 120 private int[] SerializedTreeLeftIdx { 121 get { return tree.Select(t => t.LeftIdx).ToArray(); } 122 set { 123 if (tree == null) tree = new TreeNode[value.Length]; 124 for (int i = 0; i < value.Length; i++) { 125 tree[i].LeftIdx = value[i]; 126 } 127 } 128 } 129 [Storable] 130 private int[] SerializedTreeRightIdx { 131 get { return tree.Select(t => t.RightIdx).ToArray(); } 132 set { 133 if (tree == null) tree = new TreeNode[value.Length]; 134 for (int i = 0; i < value.Length; i++) { 135 tree[i].RightIdx = value[i]; 136 } 137 } 138 } 139 [Storable] 140 private double[] SerializedTreeWeightLeft { 141 get { return tree.Select(t => t.WeightLeft).ToArray(); } 142 set { 143 if (tree == null) tree = new TreeNode[value.Length]; 144 for (int i = 0; i < value.Length; i++) { 145 tree[i].WeightLeft = value[i]; 146 } 147 } 148 } 149 #endregion 150 151 152 88 153 89 154 [StorableConstructor] … … 98 163 } 99 164 100 internal RegressionTreeModel(TreeNode[] tree )101 : base( "RegressionTreeModel", "Represents a decision tree for regression.") {165 internal RegressionTreeModel(TreeNode[] tree, string target = "Target") 166 : base(target, "RegressionTreeModel", "Represents a decision tree for regression.") { 102 167 this.tree = tree; 103 168 } … … 108 173 if (node.VarName == TreeNode.NO_VARIABLE) 109 174 return node.Val; 110 111 if (columnCache[nodeIdx][row] <= node.Val) 175 if (columnCache[nodeIdx] == null) { 176 if (node.WeightLeft.IsAlmost(-1.0)) throw new InvalidOperationException("Cannot calculate partial dependence for trees loaded from older versions of HeuristicLab."); 177 // weighted average for partial dependence plot (recursive here because we need to calculate both sub-trees) 178 return node.WeightLeft * GetPredictionForRow(t, columnCache, node.LeftIdx, row) + 179 (1.0 - node.WeightLeft) * GetPredictionForRow(t, columnCache, node.RightIdx, row); 180 } else if (columnCache[nodeIdx][row] <= node.Val) 112 181 nodeIdx = node.LeftIdx; 113 182 else … … 121 190 } 122 191 123 public IEnumerable<double> GetEstimatedValues(IDataset ds, IEnumerable<int> rows) {192 public override IEnumerable<double> GetEstimatedValues(IDataset ds, IEnumerable<int> rows) { 124 193 // lookup columns for variableNames in one pass over the tree to speed up evaluation later on 125 194 ReadOnlyCollection<double>[] columnCache = new ReadOnlyCollection<double>[tree.Length]; … … 127 196 for (int i = 0; i < tree.Length; i++) { 128 197 if (tree[i].VarName != TreeNode.NO_VARIABLE) { 129 columnCache[i] = ds.GetReadOnlyDoubleValues(tree[i].VarName); 198 // tree models also support calculating estimations if not all variables used for training are available in the dataset 199 if (ds.ColumnNames.Contains(tree[i].VarName)) 200 columnCache[i] = ds.GetReadOnlyDoubleValues(tree[i].VarName); 130 201 } 131 202 } … … 133 204 } 134 205 135 public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {206 public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 136 207 return new RegressionSolution(this, new RegressionProblemData(problemData)); 137 208 } … … 148 219 } else { 149 220 return 150 TreeToString(n.LeftIdx, string.Format(CultureInfo.InvariantCulture, "{0}{1}{2} <= {3:F}", part, string.IsNullOrEmpty(part) ? "" : " and ", n.VarName, n.Val)) 151 + TreeToString(n.RightIdx, string.Format(CultureInfo.InvariantCulture, "{0}{1}{2} > {3:F}", part, string.IsNullOrEmpty(part) ? "" : " and ", n.VarName, n.Val)); 152 } 153 } 221 TreeToString(n.LeftIdx, string.Format(CultureInfo.InvariantCulture, "{0}{1}{2} <= {3:F} ({4:N3})", part, string.IsNullOrEmpty(part) ? "" : " and ", n.VarName, n.Val, n.WeightLeft)) 222 + TreeToString(n.RightIdx, string.Format(CultureInfo.InvariantCulture, "{0}{1}{2} > {3:F} ({4:N3}))", part, string.IsNullOrEmpty(part) ? "" : " and ", n.VarName, n.Val, 1.0 - n.WeightLeft)); 223 } 224 } 225 154 226 } 155 227 } -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearDiscriminantAnalysis.cs
r12509 r13948 111 111 IClassificationProblemData problemData, 112 112 IEnumerable<int> rows) { 113 var model = new SymbolicDiscriminantFunctionClassificationModel( tree, interpreter, new AccuracyMaximizationThresholdCalculator());113 var model = new SymbolicDiscriminantFunctionClassificationModel(problemData.TargetVariable, tree, interpreter, new AccuracyMaximizationThresholdCalculator()); 114 114 model.RecalculateModelParameters(problemData, rows); 115 115 return model; -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs
r13238 r13948 110 110 addition.AddSubtree(cNode); 111 111 112 SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel( tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), (IRegressionProblemData)problemData.Clone());112 SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), (IRegressionProblemData)problemData.Clone()); 113 113 solution.Model.Name = "Linear Regression Model"; 114 114 solution.Name = "Linear Regression Solution"; -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassification.cs
r13238 r13948 95 95 relClassError = alglib.mnlrelclserror(lm, inputMatrix, nRows); 96 96 97 MultinomialLogitClassificationSolution solution = new MultinomialLogitClassificationSolution( (IClassificationProblemData)problemData.Clone(), new MultinomialLogitModel(lm, targetVariable, allowedInputVariables, classValues));97 MultinomialLogitClassificationSolution solution = new MultinomialLogitClassificationSolution(new MultinomialLogitModel(lm, targetVariable, allowedInputVariables, classValues), (IClassificationProblemData)problemData.Clone()); 98 98 return solution; 99 99 } -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassificationSolution.cs
r12012 r13948 43 43 : base(original, cloner) { 44 44 } 45 public MultinomialLogitClassificationSolution( IClassificationProblemData problemData, MultinomialLogitModel logitModel)45 public MultinomialLogitClassificationSolution( MultinomialLogitModel logitModel,IClassificationProblemData problemData) 46 46 : base(logitModel, problemData) { 47 47 } -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitModel.cs
r12509 r13948 34 34 [StorableClass] 35 35 [Item("Multinomial Logit Model", "Represents a multinomial logit model for classification.")] 36 public sealed class MultinomialLogitModel : NamedItem, IClassificationModel {36 public sealed class MultinomialLogitModel : ClassificationModel { 37 37 38 38 private alglib.logitmodel logitModel; … … 48 48 } 49 49 50 [Storable] 51 private string targetVariable; 50 public override IEnumerable<string> VariablesUsedForPrediction { 51 get { return allowedInputVariables; } 52 } 53 52 54 [Storable] 53 55 private string[] allowedInputVariables; … … 64 66 logitModel = new alglib.logitmodel(); 65 67 logitModel.innerobj.w = (double[])original.logitModel.innerobj.w.Clone(); 66 targetVariable = original.targetVariable;67 68 allowedInputVariables = (string[])original.allowedInputVariables.Clone(); 68 69 classValues = (double[])original.classValues.Clone(); 69 70 } 70 71 public MultinomialLogitModel(alglib.logitmodel logitModel, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues) 71 : base( ) {72 : base(targetVariable) { 72 73 this.name = ItemName; 73 74 this.description = ItemDescription; 74 75 this.logitModel = logitModel; 75 this.targetVariable = targetVariable;76 76 this.allowedInputVariables = allowedInputVariables.ToArray(); 77 77 this.classValues = (double[])classValues.Clone(); … … 82 82 } 83 83 84 public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {84 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 85 85 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 86 86 … … 108 108 } 109 109 110 public MultinomialLogitClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { 111 return new MultinomialLogitClassificationSolution(new ClassificationProblemData(problemData), this); 112 } 113 IClassificationSolution IClassificationModel.CreateClassificationSolution(IClassificationProblemData problemData) { 114 return CreateClassificationSolution(problemData); 110 public override IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { 111 return new MultinomialLogitClassificationSolution(this, new ClassificationProblemData(problemData)); 115 112 } 116 113 … … 135 132 } 136 133 #endregion 134 137 135 } 138 136 } -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/MctsSymbolicRegression/MctsSymbolicRegressionStatic.cs
r13669 r13948 25 25 using System.Linq; 26 26 using HeuristicLab.Algorithms.DataAnalysis.MctsSymbolicRegression.Policies; 27 using HeuristicLab.Common;28 27 using HeuristicLab.Core; 29 28 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; … … 177 176 178 177 var t = new SymbolicExpressionTree(treeGen.Exec(bestCode, bestConsts, bestNParams, scalingFactor, scalingOffset)); 179 var model = new SymbolicRegressionModel( t, interpreter, lowerEstimationLimit, upperEstimationLimit);178 var model = new SymbolicRegressionModel(problemData.TargetVariable, t, interpreter, lowerEstimationLimit, upperEstimationLimit); 180 179 181 180 // model has already been scaled linearly in Eval -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaClassificationSolution.cs
r12012 r13948 40 40 : base(original, cloner) { 41 41 } 42 public NcaClassificationSolution(I ClassificationProblemData problemData, INcaModel ncaModel)42 public NcaClassificationSolution(INcaModel ncaModel, IClassificationProblemData problemData) 43 43 : base(ncaModel, problemData) { 44 44 } -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaModel.cs
r12509 r13948 30 30 [Item("NCA Model", "")] 31 31 [StorableClass] 32 public class NcaModel : NamedItem, INcaModel { 32 public class NcaModel : ClassificationModel, INcaModel { 33 public override IEnumerable<string> VariablesUsedForPrediction { 34 get { return allowedInputVariables; } 35 } 33 36 34 37 [Storable] … … 39 42 [Storable] 40 43 private string[] allowedInputVariables; 41 [Storable]42 private string targetVariable;43 44 [Storable] 44 45 private INearestNeighbourModel nnModel; … … 52 53 this.transformationMatrix = (double[,])original.transformationMatrix.Clone(); 53 54 this.allowedInputVariables = (string[])original.allowedInputVariables.Clone(); 54 this.targetVariable = original.targetVariable;55 55 this.nnModel = cloner.Clone(original.nnModel); 56 56 this.classValues = (double[])original.classValues.Clone(); 57 57 } 58 public NcaModel(int k, double[,] transformationMatrix, IDataset dataset, IEnumerable<int> rows, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues) { 58 public NcaModel(int k, double[,] transformationMatrix, IDataset dataset, IEnumerable<int> rows, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues) 59 : base(targetVariable) { 59 60 Name = ItemName; 60 61 Description = ItemDescription; 61 62 this.transformationMatrix = (double[,])transformationMatrix.Clone(); 62 63 this.allowedInputVariables = allowedInputVariables.ToArray(); 63 this.targetVariable = targetVariable;64 64 this.classValues = (double[])classValues.Clone(); 65 65 … … 72 72 } 73 73 74 public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {74 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 75 75 var ds = ReduceDataset(dataset, rows); 76 76 return nnModel.GetEstimatedClassValues(ds, Enumerable.Range(0, ds.Rows)); 77 77 } 78 78 79 public INcaClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) {80 return new NcaClassificationSolution( new ClassificationProblemData(problemData), this);79 public override IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { 80 return new NcaClassificationSolution(this, new ClassificationProblemData(problemData)); 81 81 } 82 82 83 I ClassificationSolution IClassificationModel.CreateClassificationSolution(IClassificationProblemData problemData) {84 return CreateClassificationSolution(problemData);83 INcaClassificationSolution INcaModel.CreateClassificationSolution(IClassificationProblemData problemData) { 84 return new NcaClassificationSolution(this, new ClassificationProblemData(problemData)); 85 85 } 86 86 … … 88 88 var data = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 89 89 90 var targets = dataset.GetDoubleValues( targetVariable, rows).ToArray();90 var targets = dataset.GetDoubleValues(TargetVariable, rows).ToArray(); 91 91 var result = new double[data.GetLength(0), transformationMatrix.GetLength(1) + 1]; 92 92 for (int i = 0; i < data.GetLength(0); i++) … … 104 104 .Range(0, transformationMatrix.GetLength(1)) 105 105 .Select(x => "X" + x.ToString()) 106 .Concat( targetVariable.ToEnumerable()),106 .Concat(TargetVariable.ToEnumerable()), 107 107 Reduce(dataset, rows)); 108 108 } -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs
r13238 r13948 81 81 public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k) { 82 82 var problemDataClone = (IClassificationProblemData)problemData.Clone(); 83 return new NearestNeighbourClassificationSolution( problemDataClone, Train(problemDataClone, k));83 return new NearestNeighbourClassificationSolution(Train(problemDataClone, k), problemDataClone); 84 84 } 85 85 -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassificationSolution.cs
r12012 r13948 43 43 : base(original, cloner) { 44 44 } 45 public NearestNeighbourClassificationSolution(I ClassificationProblemData problemData, INearestNeighbourModel nnModel)45 public NearestNeighbourClassificationSolution(INearestNeighbourModel nnModel, IClassificationProblemData problemData) 46 46 : base(nnModel, problemData) { 47 47 } -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs
r12509 r13948 34 34 [StorableClass] 35 35 [Item("NearestNeighbourModel", "Represents a nearest neighbour model for regression and classification.")] 36 public sealed class NearestNeighbourModel : NamedItem, INearestNeighbourModel {36 public sealed class NearestNeighbourModel : ClassificationModel, INearestNeighbourModel { 37 37 38 38 private alglib.nearestneighbor.kdtree kdTree; … … 48 48 } 49 49 50 [Storable] 51 private string targetVariable; 50 public override IEnumerable<string> VariablesUsedForPrediction { 51 get { return allowedInputVariables; } 52 } 53 52 54 [Storable] 53 55 private string[] allowedInputVariables; … … 91 93 92 94 k = original.k; 93 targetVariable = original.targetVariable;94 95 allowedInputVariables = (string[])original.allowedInputVariables.Clone(); 95 96 if (original.classValues != null) 96 97 this.classValues = (double[])original.classValues.Clone(); 97 98 } 98 public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues = null) { 99 public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues = null) 100 : base(targetVariable) { 99 101 Name = ItemName; 100 102 Description = ItemDescription; 101 103 this.k = k; 102 this.targetVariable = targetVariable;103 104 this.allowedInputVariables = allowedInputVariables.ToArray(); 104 105 … … 163 164 } 164 165 165 public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {166 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 166 167 if (classValues == null) throw new InvalidOperationException("No class values are defined."); 167 168 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); … … 201 202 } 202 203 203 public INearestNeighbourRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 204 return new NearestNeighbourRegressionSolution(new RegressionProblemData(problemData), this); 205 } 204 206 205 IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) { 207 return CreateRegressionSolution(problemData); 208 } 209 public INearestNeighbourClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { 210 return new NearestNeighbourClassificationSolution(new ClassificationProblemData(problemData), this); 211 } 212 IClassificationSolution IClassificationModel.CreateClassificationSolution(IClassificationProblemData problemData) { 213 return CreateClassificationSolution(problemData); 206 return new NearestNeighbourRegressionSolution(this, new RegressionProblemData(problemData)); 207 } 208 public override IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { 209 return new NearestNeighbourClassificationSolution(this, new ClassificationProblemData(problemData)); 214 210 } 215 211 -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs
r13238 r13948 80 80 public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k) { 81 81 var clonedProblemData = (IRegressionProblemData)problemData.Clone(); 82 return new NearestNeighbourRegressionSolution( clonedProblemData, Train(problemData, k));82 return new NearestNeighbourRegressionSolution(Train(problemData, k), clonedProblemData); 83 83 } 84 84 -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegressionSolution.cs
r12012 r13948 43 43 : base(original, cloner) { 44 44 } 45 public NearestNeighbourRegressionSolution(I RegressionProblemData problemData, INearestNeighbourModel nnModel)45 public NearestNeighbourRegressionSolution(INearestNeighbourModel nnModel, IRegressionProblemData problemData) 46 46 : base(nnModel, problemData) { 47 47 } -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkClassification.cs
r13238 r13948 220 220 221 221 var problemDataClone = (IClassificationProblemData)problemData.Clone(); 222 return new NeuralNetworkClassificationSolution( problemDataClone, new NeuralNetworkModel(multiLayerPerceptron, targetVariable, allowedInputVariables, problemDataClone.ClassValues.ToArray()));222 return new NeuralNetworkClassificationSolution(new NeuralNetworkModel(multiLayerPerceptron, targetVariable, allowedInputVariables, problemDataClone.ClassValues.ToArray()), problemDataClone); 223 223 } 224 224 #endregion -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkClassificationSolution.cs
r12012 r13948 43 43 : base(original, cloner) { 44 44 } 45 public NeuralNetworkClassificationSolution(I ClassificationProblemData problemData, INeuralNetworkModel nnModel)45 public NeuralNetworkClassificationSolution(INeuralNetworkModel nnModel, IClassificationProblemData problemData) 46 46 : base(nnModel, problemData) { 47 47 } -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleClassification.cs
r13238 r13948 204 204 relClassError = alglib.mlperelclserror(mlpEnsemble, inputMatrix, nRows); 205 205 var problemDataClone = (IClassificationProblemData)problemData.Clone(); 206 return new NeuralNetworkEnsembleClassificationSolution( problemDataClone, new NeuralNetworkEnsembleModel(mlpEnsemble, targetVariable, allowedInputVariables, problemDataClone.ClassValues.ToArray()));206 return new NeuralNetworkEnsembleClassificationSolution(new NeuralNetworkEnsembleModel(mlpEnsemble, targetVariable, allowedInputVariables, problemDataClone.ClassValues.ToArray()), problemDataClone); 207 207 } 208 208 #endregion -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleClassificationSolution.cs
r12012 r13948 43 43 : base(original, cloner) { 44 44 } 45 public NeuralNetworkEnsembleClassificationSolution(I ClassificationProblemData problemData, INeuralNetworkEnsembleModel nnModel)45 public NeuralNetworkEnsembleClassificationSolution(INeuralNetworkEnsembleModel nnModel, IClassificationProblemData problemData) 46 46 : base(nnModel, problemData) { 47 47 } -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleModel.cs
r12509 r13948 34 34 [StorableClass] 35 35 [Item("NeuralNetworkEnsembleModel", "Represents a neural network ensemble for regression and classification.")] 36 public sealed class NeuralNetworkEnsembleModel : NamedItem, INeuralNetworkEnsembleModel {36 public sealed class NeuralNetworkEnsembleModel : ClassificationModel, INeuralNetworkEnsembleModel { 37 37 38 38 private alglib.mlpensemble mlpEnsemble; … … 46 46 } 47 47 } 48 } 49 50 public override IEnumerable<string> VariablesUsedForPrediction { 51 get { return allowedInputVariables; } 48 52 } 49 53 … … 72 76 } 73 77 public NeuralNetworkEnsembleModel(alglib.mlpensemble mlpEnsemble, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues = null) 74 : base( ) {78 : base(targetVariable) { 75 79 this.name = ItemName; 76 80 this.description = ItemDescription; … … 103 107 } 104 108 105 public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {109 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 106 110 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 107 111 … … 129 133 } 130 134 131 public INeuralNetworkEnsembleRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 132 return new NeuralNetworkEnsembleRegressionSolution(new RegressionEnsembleProblemData(problemData), this); 133 } 134 IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) { 135 return CreateRegressionSolution(problemData); 136 } 137 public INeuralNetworkEnsembleClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { 138 return new NeuralNetworkEnsembleClassificationSolution(new ClassificationEnsembleProblemData(problemData), this); 139 } 140 IClassificationSolution IClassificationModel.CreateClassificationSolution(IClassificationProblemData problemData) { 141 return CreateClassificationSolution(problemData); 135 public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 136 return new NeuralNetworkEnsembleRegressionSolution(this, new RegressionEnsembleProblemData(problemData)); 137 } 138 public override IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { 139 return new NeuralNetworkEnsembleClassificationSolution(this, new ClassificationEnsembleProblemData(problemData)); 142 140 } 143 141 -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleRegression.cs
r13238 r13948 190 190 avgRelError = alglib.mlpeavgrelerror(mlpEnsemble, inputMatrix, nRows); 191 191 192 return new NeuralNetworkEnsembleRegressionSolution( (IRegressionProblemData)problemData.Clone(), new NeuralNetworkEnsembleModel(mlpEnsemble, targetVariable, allowedInputVariables));192 return new NeuralNetworkEnsembleRegressionSolution(new NeuralNetworkEnsembleModel(mlpEnsemble, targetVariable, allowedInputVariables), (IRegressionProblemData)problemData.Clone()); 193 193 } 194 194 #endregion -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleRegressionSolution.cs
r12012 r13948 43 43 : base(original, cloner) { 44 44 } 45 public NeuralNetworkEnsembleRegressionSolution(I RegressionProblemData problemData, INeuralNetworkEnsembleModel nnModel)45 public NeuralNetworkEnsembleRegressionSolution(INeuralNetworkEnsembleModel nnModel, IRegressionProblemData problemData) 46 46 : base(nnModel, problemData) { 47 47 RecalculateResults(); -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkModel.cs
r12817 r13948 34 34 [StorableClass] 35 35 [Item("NeuralNetworkModel", "Represents a neural network for regression and classification.")] 36 public sealed class NeuralNetworkModel : NamedItem, INeuralNetworkModel {36 public sealed class NeuralNetworkModel : ClassificationModel, INeuralNetworkModel { 37 37 38 38 private alglib.multilayerperceptron multiLayerPerceptron; … … 48 48 } 49 49 50 [Storable] 51 private string targetVariable; 50 public override IEnumerable<string> VariablesUsedForPrediction { 51 get { return allowedInputVariables; } 52 } 53 52 54 [Storable] 53 55 private string[] allowedInputVariables; … … 74 76 multiLayerPerceptron.innerobj.x = (double[])original.multiLayerPerceptron.innerobj.x.Clone(); 75 77 multiLayerPerceptron.innerobj.y = (double[])original.multiLayerPerceptron.innerobj.y.Clone(); 76 targetVariable = original.targetVariable;77 78 allowedInputVariables = (string[])original.allowedInputVariables.Clone(); 78 79 if (original.classValues != null) … … 80 81 } 81 82 public NeuralNetworkModel(alglib.multilayerperceptron multiLayerPerceptron, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues = null) 82 : base( ) {83 : base(targetVariable) { 83 84 this.name = ItemName; 84 85 this.description = ItemDescription; 85 86 this.multiLayerPerceptron = multiLayerPerceptron; 86 this.targetVariable = targetVariable;87 87 this.allowedInputVariables = allowedInputVariables.ToArray(); 88 88 if (classValues != null) … … 111 111 } 112 112 113 public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {113 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 114 114 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 115 115 … … 137 137 } 138 138 139 public INeuralNetworkRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 140 return new NeuralNetworkRegressionSolution(new RegressionProblemData(problemData), this); 141 } 142 IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) { 143 return CreateRegressionSolution(problemData); 144 } 145 public INeuralNetworkClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { 146 return new NeuralNetworkClassificationSolution(new ClassificationProblemData(problemData), this); 147 } 148 IClassificationSolution IClassificationModel.CreateClassificationSolution(IClassificationProblemData problemData) { 149 return CreateClassificationSolution(problemData); 139 public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 140 return new NeuralNetworkRegressionSolution(this, new RegressionProblemData(problemData)); 141 } 142 public override IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { 143 return new NeuralNetworkClassificationSolution(this, new ClassificationProblemData(problemData)); 150 144 } 151 145 -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkRegression.cs
r13238 r13948 207 207 avgRelError = alglib.mlpavgrelerror(multiLayerPerceptron, inputMatrix, nRows); 208 208 209 return new NeuralNetworkRegressionSolution( (IRegressionProblemData)problemData.Clone(), new NeuralNetworkModel(multiLayerPerceptron, targetVariable, allowedInputVariables));209 return new NeuralNetworkRegressionSolution(new NeuralNetworkModel(multiLayerPerceptron, targetVariable, allowedInputVariables), (IRegressionProblemData)problemData.Clone()); 210 210 } 211 211 #endregion -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkRegressionSolution.cs
r12012 r13948 43 43 : base(original, cloner) { 44 44 } 45 public NeuralNetworkRegressionSolution(I RegressionProblemData problemData, INeuralNetworkModel nnModel)45 public NeuralNetworkRegressionSolution(INeuralNetworkModel nnModel, IRegressionProblemData problemData) 46 46 : base(nnModel, problemData) { 47 47 } -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassification.cs
r13238 r13948 143 143 144 144 if (CreateSolution) { 145 var solution = new RandomForestClassificationSolution( (IClassificationProblemData)Problem.ProblemData.Clone(), model);145 var solution = new RandomForestClassificationSolution(model, (IClassificationProblemData)Problem.ProblemData.Clone()); 146 146 Results.Add(new Result(RandomForestClassificationModelResultName, "The random forest classification solution.", solution)); 147 147 } 148 148 } 149 149 150 150 // keep for compatibility with old API 151 151 public static RandomForestClassificationSolution CreateRandomForestClassificationSolution(IClassificationProblemData problemData, int nTrees, double r, double m, int seed, 152 152 out double rmsError, out double relClassificationError, out double outOfBagRmsError, out double outOfBagRelClassificationError) { 153 153 var model = CreateRandomForestClassificationModel(problemData, nTrees, r, m, seed, out rmsError, out relClassificationError, out outOfBagRmsError, out outOfBagRelClassificationError); 154 return new RandomForestClassificationSolution( (IClassificationProblemData)problemData.Clone(), model);154 return new RandomForestClassificationSolution(model, (IClassificationProblemData)problemData.Clone()); 155 155 } 156 156 -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassificationSolution.cs
r12012 r13948 43 43 : base(original, cloner) { 44 44 } 45 public RandomForestClassificationSolution(I ClassificationProblemData problemData, IRandomForestModel randomForestModel)45 public RandomForestClassificationSolution(IRandomForestModel randomForestModel, IClassificationProblemData problemData) 46 46 : base(randomForestModel, problemData) { 47 47 } -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs
r12509 r13948 34 34 [StorableClass] 35 35 [Item("RandomForestModel", "Represents a random forest for regression and classification.")] 36 public sealed class RandomForestModel : NamedItem, IRandomForestModel {36 public sealed class RandomForestModel : ClassificationModel, IRandomForestModel { 37 37 // not persisted 38 38 private alglib.decisionforest randomForest; … … 44 44 } 45 45 } 46 47 public override IEnumerable<string> VariablesUsedForPrediction { 48 get { return originalTrainingData.AllowedInputVariables; } 49 } 50 46 51 47 52 // instead of storing the data of the model itself … … 91 96 92 97 // random forest models can only be created through the static factory methods CreateRegressionModel and CreateClassificationModel 93 private RandomForestModel( alglib.decisionforest randomForest,98 private RandomForestModel(string targetVariable, alglib.decisionforest randomForest, 94 99 int seed, IDataAnalysisProblemData originalTrainingData, 95 100 int nTrees, double r, double m, double[] classValues = null) 96 : base( ) {101 : base(targetVariable) { 97 102 this.name = ItemName; 98 103 this.description = ItemDescription; … … 147 152 } 148 153 149 public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {154 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 150 155 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows); 151 156 AssertInputMatrix(inputData); … … 174 179 } 175 180 176 public IRandomForestRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 177 return new RandomForestRegressionSolution(new RegressionProblemData(problemData), this); 178 } 179 IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) { 180 return CreateRegressionSolution(problemData); 181 } 182 public IRandomForestClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { 183 return new RandomForestClassificationSolution(new ClassificationProblemData(problemData), this); 184 } 185 IClassificationSolution IClassificationModel.CreateClassificationSolution(IClassificationProblemData problemData) { 186 return CreateClassificationSolution(problemData); 181 182 public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 183 return new RandomForestRegressionSolution(this, new RegressionProblemData(problemData)); 184 } 185 public override IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { 186 return new RandomForestClassificationSolution(this, new ClassificationProblemData(problemData)); 187 187 } 188 188 … … 205 205 outOfBagRmsError = rep.oobrmserror; 206 206 207 return new RandomForestModel( dForest, seed, problemData, nTrees, r, m);207 return new RandomForestModel(problemData.TargetVariable, dForest, seed, problemData, nTrees, r, m); 208 208 } 209 209 … … 242 242 outOfBagRelClassificationError = rep.oobrelclserror; 243 243 244 return new RandomForestModel( dForest, seed, problemData, nTrees, r, m, classValues);244 return new RandomForestModel(problemData.TargetVariable, dForest, seed, problemData, nTrees, r, m, classValues); 245 245 } 246 246 -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegression.cs
r13238 r13948 143 143 144 144 if (CreateSolution) { 145 var solution = new RandomForestRegressionSolution( (IRegressionProblemData)Problem.ProblemData.Clone(), model);145 var solution = new RandomForestRegressionSolution(model, (IRegressionProblemData)Problem.ProblemData.Clone()); 146 146 Results.Add(new Result(RandomForestRegressionModelResultName, "The random forest regression solution.", solution)); 147 147 } … … 153 153 var model = CreateRandomForestRegressionModel(problemData, nTrees, r, m, seed, 154 154 out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError); 155 return new RandomForestRegressionSolution( (IRegressionProblemData)problemData.Clone(), model);155 return new RandomForestRegressionSolution(model, (IRegressionProblemData)problemData.Clone()); 156 156 } 157 157 -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegressionSolution.cs
r12012 r13948 43 43 : base(original, cloner) { 44 44 } 45 public RandomForestRegressionSolution(IR egressionProblemData problemData, IRandomForestModel randomForestModel)45 public RandomForestRegressionSolution(IRandomForestModel randomForestModel, IRegressionProblemData problemData) 46 46 : base(randomForestModel, problemData) { 47 47 } -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorMachineModel.cs
r12509 r13948 37 37 [StorableClass] 38 38 [Item("SupportVectorMachineModel", "Represents a support vector machine model.")] 39 public sealed class SupportVectorMachineModel : NamedItem, ISupportVectorMachineModel { 39 public sealed class SupportVectorMachineModel : ClassificationModel, ISupportVectorMachineModel { 40 public override IEnumerable<string> VariablesUsedForPrediction { 41 get { return allowedInputVariables; } 42 } 43 40 44 41 45 private svm_model model; … … 83 87 84 88 [Storable] 85 private string targetVariable;86 [Storable]87 89 private string[] allowedInputVariables; 88 90 [Storable] … … 96 98 this.model = original.model; 97 99 this.rangeTransform = original.rangeTransform; 98 this.targetVariable = original.targetVariable;99 100 this.allowedInputVariables = (string[])original.allowedInputVariables.Clone(); 100 101 if (original.classValues != null) … … 106 107 } 107 108 public SupportVectorMachineModel(svm_model model, RangeTransform rangeTransform, string targetVariable, IEnumerable<string> allowedInputVariables) 108 : base( ) {109 : base(targetVariable) { 109 110 this.name = ItemName; 110 111 this.description = ItemDescription; 111 112 this.model = model; 112 113 this.rangeTransform = rangeTransform; 113 this.targetVariable = targetVariable;114 114 this.allowedInputVariables = allowedInputVariables.ToArray(); 115 115 } … … 123 123 return GetEstimatedValuesHelper(dataset, rows); 124 124 } 125 public SupportVectorRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {125 public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 126 126 return new SupportVectorRegressionSolution(this, new RegressionProblemData(problemData)); 127 127 } 128 IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {129 return CreateRegressionSolution(problemData);130 }131 128 #endregion 132 129 133 130 #region IClassificationModel Members 134 public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {131 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 135 132 if (classValues == null) throw new NotSupportedException(); 136 133 // return the original class value instead of the predicted value of the model … … 152 149 } 153 150 154 public SupportVectorClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) {151 public override IClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { 155 152 return new SupportVectorClassificationSolution(this, new ClassificationProblemData(problemData)); 156 }157 IClassificationSolution IClassificationModel.CreateClassificationSolution(IClassificationProblemData problemData) {158 return CreateClassificationSolution(problemData);159 153 } 160 154 #endregion 161 155 private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) { 162 156 // calculate predictions for the currently requested rows 163 svm_problem problem = SupportVectorMachineUtil.CreateSvmProblem(dataset, targetVariable, allowedInputVariables, rows);157 svm_problem problem = SupportVectorMachineUtil.CreateSvmProblem(dataset, TargetVariable, allowedInputVariables, rows); 164 158 svm_problem scaledProblem = rangeTransform.Scale(problem); 165 159 -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/TimeSeries/AutoregressiveModeling.cs
r13238 r13948 134 134 135 135 var interpreter = new SymbolicTimeSeriesPrognosisExpressionTreeInterpreter(problemData.TargetVariable); 136 var model = new SymbolicTimeSeriesPrognosisModel( tree, interpreter);136 var model = new SymbolicTimeSeriesPrognosisModel(problemData.TargetVariable, tree, interpreter); 137 137 var solution = model.CreateTimeSeriesPrognosisSolution((ITimeSeriesPrognosisProblemData)problemData.Clone()); 138 138 return solution; -
branches/HeuristicLab.RegressionSolutionGradientView/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClusteringModel.cs
r12509 r13948 37 37 public static new Image StaticItemImage { 38 38 get { return HeuristicLab.Common.Resources.VSImageLibrary.Function; } 39 } 40 41 public IEnumerable<string> VariablesUsedForPrediction { 42 get { return allowedInputVariables; } 39 43 } 40 44
Note: See TracChangeset
for help on using the changeset viewer.