- Timestamp:
- 09/25/15 14:39:59 (9 years ago)
- Location:
- branches/gteufl
- Files:
-
- 7 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/gteufl
- Property svn:ignore
-
old new 8 8 FxCopResults.txt 9 9 Google.ProtocolBuffers-0.9.1.dll 10 Google.ProtocolBuffers-2.4.1.473.dll 10 11 HeuristicLab 3.3.5.1.ReSharper.user 11 12 HeuristicLab 3.3.6.0.ReSharper.user 12 13 HeuristicLab.4.5.resharper.user 13 14 HeuristicLab.ExtLibs.6.0.ReSharper.user 15 HeuristicLab.Scripting.Development 14 16 HeuristicLab.resharper.user 15 17 ProtoGen.exe … … 17 19 _ReSharper.HeuristicLab 18 20 _ReSharper.HeuristicLab 3.3 21 _ReSharper.HeuristicLab 3.3 Tests 19 22 _ReSharper.HeuristicLab.ExtLibs 20 23 bin 21 24 protoc.exe 22 _ReSharper.HeuristicLab 3.3 Tests 23 Google.ProtocolBuffers-2.4.1.473.dll 25 obj
-
- Property svn:mergeinfo changed
-
Property
svn:global-ignores
set to
*.nuget
packages
- Property svn:ignore
-
branches/gteufl/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
-
branches/gteufl/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassification.cs
r9456 r12969 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 3Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 20 20 #endregion 21 21 22 using System;23 using System.Collections.Generic;24 using System.Linq;25 22 using HeuristicLab.Common; 26 23 using HeuristicLab.Core; … … 36 33 /// </summary> 37 34 [Item("Random Forest Classification", "Random forest classification data analysis algorithm (wrapper for ALGLIB).")] 38 [Creatable( "Data Analysis")]35 [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 120)] 39 36 [StorableClass] 40 37 public sealed class RandomForestClassification : FixedDataAnalysisAlgorithm<IClassificationProblem> { … … 131 128 public static IClassificationSolution CreateRandomForestClassificationSolution(IClassificationProblemData problemData, int nTrees, double r, double m, int seed, 132 129 out double rmsError, out double relClassificationError, out double outOfBagRmsError, out double outOfBagRelClassificationError) { 133 if (r <= 0 || r > 1) throw new ArgumentException("The R parameter in the random forest regression must be between 0 and 1."); 134 if (m <= 0 || m > 1) throw new ArgumentException("The M parameter in the random forest regression must be between 0 and 1."); 135 136 alglib.math.rndobject = new System.Random(seed); 137 138 Dataset dataset = problemData.Dataset; 139 string targetVariable = problemData.TargetVariable; 140 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 141 IEnumerable<int> rows = problemData.TrainingIndices; 142 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 143 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 144 throw new NotSupportedException("Random forest classification does not support NaN or infinity values in the input dataset."); 145 146 int info = 0; 147 alglib.decisionforest dForest = new alglib.decisionforest(); 148 alglib.dfreport rep = new alglib.dfreport(); ; 149 int nRows = inputMatrix.GetLength(0); 150 int nColumns = inputMatrix.GetLength(1); 151 int sampleSize = Math.Max((int)Math.Round(r * nRows), 1); 152 int nFeatures = Math.Max((int)Math.Round(m * (nColumns - 1)), 1); 153 154 155 double[] classValues = problemData.ClassValues.ToArray(); 156 int nClasses = problemData.Classes; 157 // map original class values to values [0..nClasses-1] 158 Dictionary<double, double> classIndices = new Dictionary<double, double>(); 159 for (int i = 0; i < nClasses; i++) { 160 classIndices[classValues[i]] = i; 161 } 162 for (int row = 0; row < nRows; row++) { 163 inputMatrix[row, nColumns - 1] = classIndices[inputMatrix[row, nColumns - 1]]; 164 } 165 // execute random forest algorithm 166 alglib.dforest.dfbuildinternal(inputMatrix, nRows, nColumns - 1, nClasses, nTrees, sampleSize, nFeatures, alglib.dforest.dfusestrongsplits + alglib.dforest.dfuseevs, ref info, dForest.innerobj, rep.innerobj); 167 if (info != 1) throw new ArgumentException("Error in calculation of random forest classification solution"); 168 169 rmsError = rep.rmserror; 170 outOfBagRmsError = rep.oobrmserror; 171 relClassificationError = rep.relclserror; 172 outOfBagRelClassificationError = rep.oobrelclserror; 173 return new RandomForestClassificationSolution((IClassificationProblemData)problemData.Clone(), new RandomForestModel(dForest, targetVariable, allowedInputVariables, classValues)); 130 var model = RandomForestModel.CreateClassificationModel(problemData, nTrees, r, m, seed, out rmsError, out relClassificationError, out outOfBagRmsError, out outOfBagRelClassificationError); 131 return new RandomForestClassificationSolution((IClassificationProblemData)problemData.Clone(), model); 174 132 } 175 133 #endregion -
branches/gteufl/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassificationSolution.cs
r9456 r12969 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 3Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. -
branches/gteufl/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs
r9456 r12969 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 3Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 35 35 [Item("RandomForestModel", "Represents a random forest for regression and classification.")] 36 36 public sealed class RandomForestModel : NamedItem, IRandomForestModel { 37 37 // not persisted 38 38 private alglib.decisionforest randomForest; 39 public alglib.decisionforest RandomForest { 40 get { return randomForest; } 41 set { 42 if (value != randomForest) { 43 if (value == null) throw new ArgumentNullException(); 44 randomForest = value; 45 OnChanged(EventArgs.Empty); 46 } 47 } 48 } 49 50 [Storable] 51 private string targetVariable; 52 [Storable] 53 private string[] allowedInputVariables; 39 private alglib.decisionforest RandomForest { 40 get { 41 // recalculate lazily 42 if (randomForest.innerobj.trees == null || randomForest.innerobj.trees.Length == 0) RecalculateModel(); 43 return randomForest; 44 } 45 } 46 47 // instead of storing the data of the model itself 48 // we instead only store data necessary to recalculate the same model lazily on demand 49 [Storable] 50 private int seed; 51 [Storable] 52 private IDataAnalysisProblemData originalTrainingData; 54 53 [Storable] 55 54 private double[] classValues; 55 [Storable] 56 private int nTrees; 57 [Storable] 58 private double r; 59 [Storable] 60 private double m; 61 62 56 63 [StorableConstructor] 57 64 private RandomForestModel(bool deserializing) 58 65 : base(deserializing) { 59 if (deserializing)60 66 // for backwards compatibility (loading old solutions) 67 randomForest = new alglib.decisionforest(); 61 68 } 62 69 private RandomForestModel(RandomForestModel original, Cloner cloner) … … 67 74 randomForest.innerobj.ntrees = original.randomForest.innerobj.ntrees; 68 75 randomForest.innerobj.nvars = original.randomForest.innerobj.nvars; 69 randomForest.innerobj.trees = (double[])original.randomForest.innerobj.trees.Clone(); 70 targetVariable = original.targetVariable; 71 allowedInputVariables = (string[])original.allowedInputVariables.Clone(); 72 if (original.classValues != null) 73 this.classValues = (double[])original.classValues.Clone(); 74 } 75 public RandomForestModel(alglib.decisionforest randomForest, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues = null) 76 // we assume that the trees array (double[]) is immutable in alglib 77 randomForest.innerobj.trees = original.randomForest.innerobj.trees; 78 79 // allowedInputVariables is immutable so we don't need to clone 80 allowedInputVariables = original.allowedInputVariables; 81 82 // clone data which is necessary to rebuild the model 83 this.seed = original.seed; 84 this.originalTrainingData = cloner.Clone(original.originalTrainingData); 85 // classvalues is immutable so we don't need to clone 86 this.classValues = original.classValues; 87 this.nTrees = original.nTrees; 88 this.r = original.r; 89 this.m = original.m; 90 } 91 92 // random forest models can only be created through the static factory methods CreateRegressionModel and CreateClassificationModel 93 private RandomForestModel(alglib.decisionforest randomForest, 94 int seed, IDataAnalysisProblemData originalTrainingData, 95 int nTrees, double r, double m, double[] classValues = null) 76 96 : base() { 77 97 this.name = ItemName; 78 98 this.description = ItemDescription; 99 // the model itself 79 100 this.randomForest = randomForest; 80 this.targetVariable = targetVariable; 81 this.allowedInputVariables = allowedInputVariables.ToArray(); 82 if (classValues != null) 83 this.classValues = (double[])classValues.Clone(); 101 // data which is necessary for recalculation of the model 102 this.seed = seed; 103 this.originalTrainingData = (IDataAnalysisProblemData)originalTrainingData.Clone(); 104 this.classValues = classValues; 105 this.nTrees = nTrees; 106 this.r = r; 107 this.m = m; 84 108 } 85 109 … … 88 112 } 89 113 90 public IEnumerable<double> GetEstimatedValues(Dataset dataset, IEnumerable<int> rows) { 91 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 114 private void RecalculateModel() { 115 double rmsError, oobRmsError, relClassError, oobRelClassError; 116 var regressionProblemData = originalTrainingData as IRegressionProblemData; 117 var classificationProblemData = originalTrainingData as IClassificationProblemData; 118 if (regressionProblemData != null) { 119 var model = CreateRegressionModel(regressionProblemData, 120 nTrees, r, m, seed, out rmsError, out oobRmsError, 121 out relClassError, out oobRelClassError); 122 randomForest = model.randomForest; 123 } else if (classificationProblemData != null) { 124 var model = CreateClassificationModel(classificationProblemData, 125 nTrees, r, m, seed, out rmsError, out oobRmsError, 126 out relClassError, out oobRelClassError); 127 randomForest = model.randomForest; 128 } 129 } 130 131 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 132 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows); 133 AssertInputMatrix(inputData); 92 134 93 135 int n = inputData.GetLength(0); … … 100 142 x[column] = inputData[row, column]; 101 143 } 102 alglib.dfprocess( randomForest, x, ref y);144 alglib.dfprocess(RandomForest, x, ref y); 103 145 yield return y[0]; 104 146 } 105 147 } 106 148 107 public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) { 108 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 149 public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 150 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows); 151 AssertInputMatrix(inputData); 109 152 110 153 int n = inputData.GetLength(0); 111 154 int columns = inputData.GetLength(1); 112 155 double[] x = new double[columns]; 113 double[] y = new double[ randomForest.innerobj.nclasses];156 double[] y = new double[RandomForest.innerobj.nclasses]; 114 157 115 158 for (int row = 0; row < n; row++) { … … 144 187 } 145 188 146 #region events 147 public event EventHandler Changed; 148 private void OnChanged(EventArgs e) { 149 var handlers = Changed; 150 if (handlers != null) 151 handlers(this, e); 152 } 153 #endregion 154 155 #region persistence 189 public static RandomForestModel CreateRegressionModel(IRegressionProblemData problemData, int nTrees, double r, double m, int seed, 190 out double rmsError, out double outOfBagRmsError, out double avgRelError, out double outOfBagAvgRelError) { 191 return CreateRegressionModel(problemData, problemData.TrainingIndices, nTrees, r, m, seed, out rmsError, out avgRelError, out outOfBagAvgRelError, out outOfBagRmsError); 192 } 193 194 public static RandomForestModel CreateRegressionModel(IRegressionProblemData problemData, IEnumerable<int> trainingIndices, int nTrees, double r, double m, int seed, 195 out double rmsError, out double outOfBagRmsError, out double avgRelError, out double outOfBagAvgRelError) { 196 var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable }); 197 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(problemData.Dataset, variables, trainingIndices); 198 199 alglib.dfreport rep; 200 var dForest = CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, 1, out rep); 201 202 rmsError = rep.rmserror; 203 avgRelError = rep.avgrelerror; 204 outOfBagAvgRelError = rep.oobavgrelerror; 205 outOfBagRmsError = rep.oobrmserror; 206 207 return new RandomForestModel(dForest, seed, problemData, nTrees, r, m); 208 } 209 210 public static RandomForestModel CreateClassificationModel(IClassificationProblemData problemData, int nTrees, double r, double m, int seed, 211 out double rmsError, out double outOfBagRmsError, out double relClassificationError, out double outOfBagRelClassificationError) { 212 return CreateClassificationModel(problemData, problemData.TrainingIndices, nTrees, r, m, seed, out rmsError, out outOfBagRmsError, out relClassificationError, out outOfBagRelClassificationError); 213 } 214 215 public static RandomForestModel CreateClassificationModel(IClassificationProblemData problemData, IEnumerable<int> trainingIndices, int nTrees, double r, double m, int seed, 216 out double rmsError, out double outOfBagRmsError, out double relClassificationError, out double outOfBagRelClassificationError) { 217 218 var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable }); 219 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(problemData.Dataset, variables, trainingIndices); 220 221 var classValues = problemData.ClassValues.ToArray(); 222 int nClasses = classValues.Length; 223 224 // map original class values to values [0..nClasses-1] 225 var classIndices = new Dictionary<double, double>(); 226 for (int i = 0; i < nClasses; i++) { 227 classIndices[classValues[i]] = i; 228 } 229 230 int nRows = inputMatrix.GetLength(0); 231 int nColumns = inputMatrix.GetLength(1); 232 for (int row = 0; row < nRows; row++) { 233 inputMatrix[row, nColumns - 1] = classIndices[inputMatrix[row, nColumns - 1]]; 234 } 235 236 alglib.dfreport rep; 237 var dForest = CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out rep); 238 239 rmsError = rep.rmserror; 240 outOfBagRmsError = rep.oobrmserror; 241 relClassificationError = rep.relclserror; 242 outOfBagRelClassificationError = rep.oobrelclserror; 243 244 return new RandomForestModel(dForest, seed, problemData, nTrees, r, m, classValues); 245 } 246 247 private static alglib.decisionforest CreateRandomForestModel(int seed, double[,] inputMatrix, int nTrees, double r, double m, int nClasses, out alglib.dfreport rep) { 248 AssertParameters(r, m); 249 AssertInputMatrix(inputMatrix); 250 251 int info = 0; 252 alglib.math.rndobject = new System.Random(seed); 253 var dForest = new alglib.decisionforest(); 254 rep = new alglib.dfreport(); 255 int nRows = inputMatrix.GetLength(0); 256 int nColumns = inputMatrix.GetLength(1); 257 int sampleSize = Math.Max((int)Math.Round(r * nRows), 1); 258 int nFeatures = Math.Max((int)Math.Round(m * (nColumns - 1)), 1); 259 260 alglib.dforest.dfbuildinternal(inputMatrix, nRows, nColumns - 1, nClasses, nTrees, sampleSize, nFeatures, alglib.dforest.dfusestrongsplits + alglib.dforest.dfuseevs, ref info, dForest.innerobj, rep.innerobj); 261 if (info != 1) throw new ArgumentException("Error in calculation of random forest model"); 262 return dForest; 263 } 264 265 private static void AssertParameters(double r, double m) { 266 if (r <= 0 || r > 1) throw new ArgumentException("The R parameter for random forest modeling must be between 0 and 1."); 267 if (m <= 0 || m > 1) throw new ArgumentException("The M parameter for random forest modeling must be between 0 and 1."); 268 } 269 270 private static void AssertInputMatrix(double[,] inputMatrix) { 271 if (inputMatrix.Cast<double>().Any(x => Double.IsNaN(x) || Double.IsInfinity(x))) 272 throw new NotSupportedException("Random forest modeling does not support NaN or infinity values in the input dataset."); 273 } 274 275 #region persistence for backwards compatibility 276 // when the originalTrainingData is null this means the model was loaded from an old file 277 // therefore, we cannot use the new persistence mechanism because the original data is not available anymore 278 // in such cases we still store the compete model 279 private bool IsCompatibilityLoaded { get { return originalTrainingData == null; } } 280 281 private string[] allowedInputVariables; 282 [Storable(Name = "allowedInputVariables")] 283 private string[] AllowedInputVariables { 284 get { 285 if (IsCompatibilityLoaded) return allowedInputVariables; 286 else return originalTrainingData.AllowedInputVariables.ToArray(); 287 } 288 set { allowedInputVariables = value; } 289 } 156 290 [Storable] 157 291 private int RandomForestBufSize { 158 292 get { 159 return randomForest.innerobj.bufsize; 293 if (IsCompatibilityLoaded) return randomForest.innerobj.bufsize; 294 else return 0; 160 295 } 161 296 set { … … 166 301 private int RandomForestNClasses { 167 302 get { 168 return randomForest.innerobj.nclasses; 303 if (IsCompatibilityLoaded) return randomForest.innerobj.nclasses; 304 else return 0; 169 305 } 170 306 set { … … 175 311 private int RandomForestNTrees { 176 312 get { 177 return randomForest.innerobj.ntrees; 313 if (IsCompatibilityLoaded) return randomForest.innerobj.ntrees; 314 else return 0; 178 315 } 179 316 set { … … 184 321 private int RandomForestNVars { 185 322 get { 186 return randomForest.innerobj.nvars; 323 if (IsCompatibilityLoaded) return randomForest.innerobj.nvars; 324 else return 0; 187 325 } 188 326 set { … … 193 331 private double[] RandomForestTrees { 194 332 get { 195 return randomForest.innerobj.trees; 333 if (IsCompatibilityLoaded) return randomForest.innerobj.trees; 334 else return new double[] { }; 196 335 } 197 336 set { -
branches/gteufl/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegression.cs
r9456 r12969 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 3Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 20 20 #endregion 21 21 22 using System;23 using System.Collections.Generic;24 using System.Linq;25 22 using HeuristicLab.Common; 26 23 using HeuristicLab.Core; … … 36 33 /// </summary> 37 34 [Item("Random Forest Regression", "Random forest regression data analysis algorithm (wrapper for ALGLIB).")] 38 [Creatable( "Data Analysis")]35 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 120)] 39 36 [StorableClass] 40 37 public sealed class RandomForestRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> { … … 130 127 public static IRegressionSolution CreateRandomForestRegressionSolution(IRegressionProblemData problemData, int nTrees, double r, double m, int seed, 131 128 out double rmsError, out double avgRelError, out double outOfBagRmsError, out double outOfBagAvgRelError) { 132 if (r <= 0 || r > 1) throw new ArgumentException("The R parameter in the random forest regression must be between 0 and 1."); 133 if (m <= 0 || m > 1) throw new ArgumentException("The M parameter in the random forest regression must be between 0 and 1."); 134 135 alglib.math.rndobject = new System.Random(seed); 136 137 Dataset dataset = problemData.Dataset; 138 string targetVariable = problemData.TargetVariable; 139 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 140 IEnumerable<int> rows = problemData.TrainingIndices; 141 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 142 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 143 throw new NotSupportedException("Random forest regression does not support NaN or infinity values in the input dataset."); 144 145 int info = 0; 146 alglib.decisionforest dForest = new alglib.decisionforest(); 147 alglib.dfreport rep = new alglib.dfreport(); ; 148 int nRows = inputMatrix.GetLength(0); 149 int nColumns = inputMatrix.GetLength(1); 150 int sampleSize = Math.Max((int)Math.Round(r * nRows), 1); 151 int nFeatures = Math.Max((int)Math.Round(m * (nColumns - 1)), 1); 152 153 alglib.dforest.dfbuildinternal(inputMatrix, nRows, nColumns - 1, 1, nTrees, sampleSize, nFeatures, alglib.dforest.dfusestrongsplits + alglib.dforest.dfuseevs, ref info, dForest.innerobj, rep.innerobj); 154 if (info != 1) throw new ArgumentException("Error in calculation of random forest regression solution"); 155 156 rmsError = rep.rmserror; 157 avgRelError = rep.avgrelerror; 158 outOfBagAvgRelError = rep.oobavgrelerror; 159 outOfBagRmsError = rep.oobrmserror; 160 161 return new RandomForestRegressionSolution((IRegressionProblemData)problemData.Clone(), new RandomForestModel(dForest, targetVariable, allowedInputVariables)); 129 var model = RandomForestModel.CreateRegressionModel(problemData, nTrees, r, m, seed, out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError); 130 return new RandomForestRegressionSolution((IRegressionProblemData)problemData.Clone(), model); 162 131 } 163 132 #endregion -
branches/gteufl/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegressionSolution.cs
r9456 r12969 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 3Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab.
Note: See TracChangeset
for help on using the changeset viewer.