- Timestamp:
- 06/27/19 15:46:20 (5 years ago)
- Location:
- branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4
- Files:
-
- 2 added
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithm.cs
r17030 r17045 269 269 270 270 if (ModelCreation == ModelCreation.SurrogateModel) { 271 model = new GradientBoostedTreesModelSurrogate( problemData, (uint)Seed, lossFunction, Iterations, MaxSize, R, M, Nu, (GradientBoostedTreesModel)model);271 model = new GradientBoostedTreesModelSurrogate((GradientBoostedTreesModel)model, problemData, (uint)Seed, lossFunction, Iterations, MaxSize, R, M, Nu); 272 272 } 273 273 -
branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesModel.cs
r17030 r17045 34 34 // this is essentially a collection of weighted regression models 35 35 public sealed class GradientBoostedTreesModel : RegressionModel, IGradientBoostedTreesModel { 36 // BackwardsCompatibility3.4 for allowing deserialization & serialization of old models37 #region Backwards compatible code, remove with 3.538 39 36 [Storable(Name = "models")] 40 37 private IList<IRegressionModel> __persistedModels { … … 53 50 get { return weights; } 54 51 } 55 #endregion56 52 57 53 public override IEnumerable<string> VariablesUsedForPrediction { -
branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesModelSurrogate.cs
r16565 r17045 90 90 private Func<IGradientBoostedTreesModel> CreateLazyInitFunc(IGradientBoostedTreesModel clonedModel) { 91 91 return () => { 92 return clonedModel == null ? RecalculateModel() : clonedModel;92 return clonedModel ?? RecalculateModel(); 93 93 }; 94 94 } 95 95 96 96 // create only the surrogate model without an actual model 97 p ublicGradientBoostedTreesModelSurrogate(IRegressionProblemData trainingProblemData, uint seed,97 private GradientBoostedTreesModelSurrogate(IRegressionProblemData trainingProblemData, uint seed, 98 98 ILossFunction lossFunction, int iterations, int maxSize, double r, double m, double nu) 99 99 : base(trainingProblemData.TargetVariable, "Gradient boosted tree model", string.Empty) { … … 106 106 this.m = m; 107 107 this.nu = nu; 108 109 actualModel = new Lazy<IGradientBoostedTreesModel>(() => RecalculateModel()); 108 110 } 109 111 110 112 // wrap an actual model in a surrograte 111 public GradientBoostedTreesModelSurrogate(IRegressionProblemData trainingProblemData, uint seed, 112 ILossFunction lossFunction, int iterations, int maxSize, double r, double m, double nu, 113 IGradientBoostedTreesModel model) 113 public GradientBoostedTreesModelSurrogate(IGradientBoostedTreesModel model, IRegressionProblemData trainingProblemData, uint seed, 114 ILossFunction lossFunction, int iterations, int maxSize, double r, double m, double nu) 114 115 : this(trainingProblemData, seed, lossFunction, iterations, maxSize, r, m, nu) { 115 116 actualModel = new Lazy<IGradientBoostedTreesModel>(() => model); -
branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r17042 r17045 394 394 <Compile Include="RandomForest\RandomForestClassification.cs" /> 395 395 <Compile Include="RandomForest\RandomForestModel.cs" /> 396 <Compile Include="RandomForest\RandomForestModelFull.cs" /> 396 397 <Compile Include="RandomForest\RandomForestRegression.cs" /> 397 398 <Compile Include="RandomForest\RandomForestRegressionSolution.cs" /> 399 <Compile Include="RandomForest\RandomForestModelSurrogate.cs" /> 398 400 <Compile Include="RandomForest\RandomForestUtil.cs" /> 399 401 <Compile Include="SupportVectorMachine\SupportVectorClassification.cs" /> -
branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassification.cs
r16565 r17045 21 21 22 22 using System.Threading; 23 using HEAL.Attic; 23 24 using HeuristicLab.Common; 24 25 using HeuristicLab.Core; … … 26 27 using HeuristicLab.Optimization; 27 28 using HeuristicLab.Parameters; 28 using HEAL.Attic;29 29 using HeuristicLab.Problems.DataAnalysis; 30 30 … … 144 144 145 145 if (CreateSolution) { 146 var solution = new RandomForestClassificationSolution(model, (IClassificationProblemData)Problem.ProblemData.Clone());146 var solution = model.CreateClassificationSolution(Problem.ProblemData); 147 147 Results.Add(new Result(RandomForestClassificationModelResultName, "The random forest classification solution.", solution)); 148 148 } -
branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs
r16763 r17045 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using HEAL.Attic; 25 26 using HeuristicLab.Common; 26 27 using HeuristicLab.Core; 27 28 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; 28 using HEAL.Attic;29 29 using HeuristicLab.Problems.DataAnalysis; 30 30 using HeuristicLab.Problems.DataAnalysis.Symbolic; … … 34 34 /// Represents a random forest model for regression and classification 35 35 /// </summary> 36 [StorableType("A4F688CD-1F42-4103-8449-7DE52AEF6C69")] 36 [Obsolete("This class only exists for backwards compatibility reasons. Use RFModelSurrogate or RFModelFull instead.")] 37 [StorableType("9AA4CCC2-CD75-4471-8DF6-949E5B783642")] 37 38 [Item("RandomForestModel", "Represents a random forest for regression and classification.")] 38 39 public sealed class RandomForestModel : ClassificationModel, IRandomForestModel { … … 139 140 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 140 141 double[,] inputData = dataset.ToArray(AllowedInputVariables, rows); 141 AssertInputMatrix(inputData);142 RandomForestUtil.AssertInputMatrix(inputData); 142 143 143 144 int n = inputData.GetLength(0); … … 157 158 public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) { 158 159 double[,] inputData = dataset.ToArray(AllowedInputVariables, rows); 159 AssertInputMatrix(inputData);160 RandomForestUtil.AssertInputMatrix(inputData); 160 161 161 162 int n = inputData.GetLength(0); … … 175 176 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 176 177 double[,] inputData = dataset.ToArray(AllowedInputVariables, rows); 177 AssertInputMatrix(inputData);178 RandomForestUtil.AssertInputMatrix(inputData); 178 179 179 180 int n = inputData.GetLength(0); … … 315 316 316 317 alglib.dfreport rep; 317 var dForest = CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, 1, out rep);318 var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, 1, out rep); 318 319 319 320 rmsError = rep.rmserror; … … 353 354 354 355 alglib.dfreport rep; 355 var dForest = CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out rep);356 var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out rep); 356 357 357 358 rmsError = rep.rmserror; … … 361 362 362 363 return new RandomForestModel(problemData.TargetVariable, dForest, seed, problemData, nTrees, r, m, classValues); 363 }364 365 private static alglib.decisionforest CreateRandomForestModel(int seed, double[,] inputMatrix, int nTrees, double r, double m, int nClasses, out alglib.dfreport rep) {366 AssertParameters(r, m);367 AssertInputMatrix(inputMatrix);368 369 int info = 0;370 alglib.math.rndobject = new System.Random(seed);371 var dForest = new alglib.decisionforest();372 rep = new alglib.dfreport();373 int nRows = inputMatrix.GetLength(0);374 int nColumns = inputMatrix.GetLength(1);375 int sampleSize = Math.Max((int)Math.Round(r * nRows), 1);376 int nFeatures = Math.Max((int)Math.Round(m * (nColumns - 1)), 1);377 378 alglib.dforest.dfbuildinternal(inputMatrix, nRows, nColumns - 1, nClasses, nTrees, sampleSize, nFeatures, alglib.dforest.dfusestrongsplits + alglib.dforest.dfuseevs, ref info, dForest.innerobj, rep.innerobj);379 if (info != 1) throw new ArgumentException("Error in calculation of random forest model");380 return dForest;381 }382 383 private static void AssertParameters(double r, double m) {384 if (r <= 0 || r > 1) throw new ArgumentException("The R parameter for random forest modeling must be between 0 and 1.");385 if (m <= 0 || m > 1) throw new ArgumentException("The M parameter for random forest modeling must be between 0 and 1.");386 }387 388 private static void AssertInputMatrix(double[,] inputMatrix) {389 if (inputMatrix.ContainsNanOrInfinity())390 throw new NotSupportedException("Random forest modeling does not support NaN or infinity values in the input dataset.");391 364 } 392 365 -
branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegression.cs
r16565 r17045 20 20 #endregion 21 21 22 using System.Collections.Generic; 23 using System.Linq; 22 24 using System.Threading; 25 using HEAL.Attic; 23 26 using HeuristicLab.Common; 24 27 using HeuristicLab.Core; … … 26 29 using HeuristicLab.Optimization; 27 30 using HeuristicLab.Parameters; 28 using HEAL.Attic;29 31 using HeuristicLab.Problems.DataAnalysis; 30 32 … … 144 146 145 147 if (CreateSolution) { 146 var solution = new RandomForestRegressionSolution(model, (IRegressionProblemData)Problem.ProblemData.Clone());148 var solution = model.CreateRegressionSolution(Problem.ProblemData); 147 149 Results.Add(new Result(RandomForestRegressionModelResultName, "The random forest regression solution.", solution)); 148 150 } 149 151 } 152 150 153 151 154 // keep for compatibility with old API … … 157 160 } 158 161 159 public static RandomForestModel CreateRandomForestRegressionModel(IRegressionProblemData problemData, int nTrees, 160 double r, double m, int seed, 161 out double rmsError, out double avgRelError, out double outOfBagRmsError, out double outOfBagAvgRelError) { 162 return RandomForestModel.CreateRegressionModel(problemData, nTrees, r, m, seed, 163 rmsError: out rmsError, avgRelError: out avgRelError, outOfBagRmsError: out outOfBagRmsError, outOfBagAvgRelError: out outOfBagAvgRelError); 162 public static RandomForestModelFull CreateRandomForestRegressionModel(IRegressionProblemData problemData, int nTrees, 163 double r, double m, int seed, 164 out double rmsError, out double avgRelError, out double outOfBagRmsError, out double outOfBagAvgRelError) { 165 return CreateRandomForestRegressionModel(problemData, problemData.TrainingIndices, nTrees, r, m, seed, out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError); 166 } 167 168 public static RandomForestModelFull CreateRandomForestRegressionModel(IRegressionProblemData problemData, IEnumerable<int> trainingIndices, int nTrees, double r, double m, int seed, 169 out double rmsError, out double avgRelError, out double outOfBagRmsError, out double outOfBagAvgRelError) { 170 171 var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable }); 172 double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices); 173 174 alglib.dfreport rep; 175 var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, 1, out rep); 176 177 rmsError = rep.rmserror; 178 outOfBagRmsError = rep.oobrmserror; 179 avgRelError = rep.avgrelerror; 180 outOfBagAvgRelError = rep.oobavgrelerror; 181 182 return new RandomForestModelFull(dForest, problemData.TargetVariable, problemData.AllowedInputVariables); 183 184 //return RandomForestModel.CreateRegressionModel(problemData, nTrees, r, m, seed, 185 //rmsError: out rmsError, avgRelError: out avgRelError, outOfBagRmsError: out outOfBagRmsError, outOfBagAvgRelError: out outOfBagAvgRelError); 164 186 } 165 187 -
branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestUtil.cs
r16565 r17045 27 27 using System.Linq.Expressions; 28 28 using System.Threading.Tasks; 29 using HEAL.Attic; 29 30 using HeuristicLab.Common; 30 31 using HeuristicLab.Core; 31 32 using HeuristicLab.Data; 32 33 using HeuristicLab.Parameters; 33 using HEAL.Attic;34 34 using HeuristicLab.Problems.DataAnalysis; 35 35 using HeuristicLab.Random; … … 89 89 90 90 public static class RandomForestUtil { 91 public static void AssertParameters(double r, double m) { 92 if (r <= 0 || r > 1) throw new ArgumentException("The R parameter for random forest modeling must be between 0 and 1."); 93 if (m <= 0 || m > 1) throw new ArgumentException("The M parameter for random forest modeling must be between 0 and 1."); 94 } 95 96 public static void AssertInputMatrix(double[,] inputMatrix) { 97 if (inputMatrix.ContainsNanOrInfinity()) 98 throw new NotSupportedException("Random forest modeling does not support NaN or infinity values in the input dataset."); 99 } 100 101 internal static alglib.decisionforest CreateRandomForestModel(int seed, double[,] inputMatrix, int nTrees, double r, double m, int nClasses, out alglib.dfreport rep) { 102 RandomForestUtil.AssertParameters(r, m); 103 RandomForestUtil.AssertInputMatrix(inputMatrix); 104 105 int info = 0; 106 alglib.math.rndobject = new System.Random(seed); 107 var dForest = new alglib.decisionforest(); 108 rep = new alglib.dfreport(); 109 int nRows = inputMatrix.GetLength(0); 110 int nColumns = inputMatrix.GetLength(1); 111 int sampleSize = Math.Max((int)Math.Round(r * nRows), 1); 112 int nFeatures = Math.Max((int)Math.Round(m * (nColumns - 1)), 1); 113 114 alglib.dforest.dfbuildinternal(inputMatrix, nRows, nColumns - 1, nClasses, nTrees, sampleSize, nFeatures, alglib.dforest.dfusestrongsplits + alglib.dforest.dfuseevs, ref info, dForest.innerobj, rep.innerobj); 115 if (info != 1) throw new ArgumentException("Error in calculation of random forest model"); 116 return dForest; 117 } 118 119 91 120 private static void CrossValidate(IRegressionProblemData problemData, Tuple<IEnumerable<int>, IEnumerable<int>>[] partitions, int nTrees, double r, double m, int seed, out double avgTestMse) { 92 121 avgTestMse = 0;
Note: See TracChangeset
for help on using the changeset viewer.