- Timestamp:
- 04/09/21 19:41:33 (3 years ago)
- Location:
- trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest
- Files:
-
- 5 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassification.cs
r17180 r17931 207 207 inputMatrix[row, nColumns - 1] = classIndices[inputMatrix[row, nColumns - 1]]; 208 208 } 209 210 alglib.dfreport rep; 211 var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out rep); 209 210 var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out var rep); 212 211 213 212 rmsError = rep.rmserror; … … 216 215 outOfBagRelClassificationError = rep.oobrelclserror; 217 216 218 return new RandomForestModelFull(dForest, problemData.TargetVariable, problemData.AllowedInputVariables, classValues);217 return new RandomForestModelFull(dForest, nTrees, problemData.TargetVariable, problemData.AllowedInputVariables, classValues); 219 218 } 220 219 #endregion -
trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs
r17180 r17931 20 20 #endregion 21 21 22 extern alias alglib_3_7; 23 22 24 using System; 23 25 using System.Collections.Generic; … … 39 41 public sealed class RandomForestModel : ClassificationModel, IRandomForestModel { 40 42 // not persisted 41 private alglib .decisionforest randomForest;42 private alglib .decisionforest RandomForest {43 private alglib_3_7.alglib.decisionforest randomForest; 44 private alglib_3_7.alglib.decisionforest RandomForest { 43 45 get { 44 46 // recalculate lazily … … 74 76 private RandomForestModel(StorableConstructorFlag _) : base(_) { 75 77 // for backwards compatibility (loading old solutions) 76 randomForest = new alglib .decisionforest();78 randomForest = new alglib_3_7.alglib.decisionforest(); 77 79 } 78 80 private RandomForestModel(RandomForestModel original, Cloner cloner) 79 81 : base(original, cloner) { 80 randomForest = new alglib .decisionforest();82 randomForest = new alglib_3_7.alglib.decisionforest(); 81 83 randomForest.innerobj.bufsize = original.randomForest.innerobj.bufsize; 82 84 randomForest.innerobj.nclasses = original.randomForest.innerobj.nclasses; … … 100 102 101 103 // random forest models can only be created through the static factory methods CreateRegressionModel and CreateClassificationModel 102 private RandomForestModel(string targetVariable, alglib .decisionforest randomForest,104 private RandomForestModel(string targetVariable, alglib_3_7.alglib.decisionforest randomForest, 103 105 int seed, IDataAnalysisProblemData originalTrainingData, 104 106 int nTrees, double r, double m, double[] classValues = null) … … 151 153 x[column] = inputData[row, column]; 152 154 } 153 alglib .dfprocess(RandomForest, x, ref y);155 alglib_3_7.alglib.dfprocess(RandomForest, x, ref y); 154 156 yield return y[0]; 155 157 } … … 169 171 x[column] = inputData[row, column]; 170 172 } 171 alglib .dforest.dfprocessraw(RandomForest.innerobj, x, ref ys);173 alglib_3_7.alglib.dforest.dfprocessraw(RandomForest.innerobj, x, ref ys); 172 174 yield return ys.VariancePop(); 173 175 } … … 187 189 x[column] = inputData[row, column]; 188 190 } 189 alglib .dfprocess(randomForest, x, ref y);191 alglib_3_7.alglib.dfprocess(randomForest, x, ref y); 190 192 // find class for with the largest probability value 191 193 int maxProbClassIndex = 0; … … 315 317 double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices); 316 318 317 alglib.dfreport rep; 318 var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, 1, out rep); 319 var dForest = RandomForestUtil.CreateRandomForestModelAlglib_3_7(seed, inputMatrix, nTrees, r, m, 1, out var rep); 319 320 320 321 rmsError = rep.rmserror; … … 353 354 } 354 355 355 alglib.dfreport rep; 356 var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out rep); 356 var dForest = RandomForestUtil.CreateRandomForestModelAlglib_3_7(seed, inputMatrix, nTrees, r, m, nClasses, out var rep); 357 357 358 358 rmsError = rep.rmserror; -
trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModelAlglib_3_7.cs
r17926 r17931 20 20 #endregion 21 21 22 extern alias alglib_3_7; 22 23 using System; 23 24 using System.Collections.Generic; … … 31 32 32 33 namespace HeuristicLab.Algorithms.DataAnalysis { 34 33 35 [StorableType("9C797DF0-1169-4381-A732-6DAB90802839")] 34 [Item("RandomForestModelFull", "Represents a random forest for regression and classification.")] 35 public sealed class RandomForestModelFull : ClassificationModel, IRandomForestModel { 36 [Item("RandomForestModel (alglib 3.7)", "Represents a random forest for regression and classification.")] 37 [Obsolete("This version uses alglib version 3.7. Use RandomForestModelFull instead.")] 38 public sealed class RandomForestModelAlglib_3_7 : ClassificationModel, IRandomForestModel { 36 39 37 40 public override IEnumerable<string> VariablesUsedForPrediction { … … 50 53 51 54 // not persisted 52 private alglib .decisionforest randomForest;55 private alglib_3_7.alglib.decisionforest randomForest; 53 56 54 57 [Storable] … … 79 82 80 83 [StorableConstructor] 81 private RandomForestModel Full(StorableConstructorFlag _) : base(_) {82 randomForest = new alglib .decisionforest();83 } 84 85 private RandomForestModel Full(RandomForestModelFulloriginal, Cloner cloner) : base(original, cloner) {86 randomForest = new alglib .decisionforest();84 private RandomForestModelAlglib_3_7(StorableConstructorFlag _) : base(_) { 85 randomForest = new alglib_3_7.alglib.decisionforest(); 86 } 87 88 private RandomForestModelAlglib_3_7(RandomForestModelAlglib_3_7 original, Cloner cloner) : base(original, cloner) { 89 randomForest = new alglib_3_7.alglib.decisionforest(); 87 90 randomForest.innerobj.bufsize = original.randomForest.innerobj.bufsize; 88 91 randomForest.innerobj.nclasses = original.randomForest.innerobj.nclasses; … … 96 99 } 97 100 public override IDeepCloneable Clone(Cloner cloner) { 98 return new RandomForestModel Full(this, cloner);99 } 100 101 public RandomForestModel Full(alglib.decisionforest decisionForest, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<double> classValues = null) : base(targetVariable) {101 return new RandomForestModelAlglib_3_7(this, cloner); 102 } 103 104 public RandomForestModelAlglib_3_7(alglib_3_7.alglib.decisionforest decisionForest, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<double> classValues = null) : base(targetVariable) { 102 105 this.name = ItemName; 103 106 this.description = ItemDescription; … … 151 154 x[column] = inputData[row, column]; 152 155 } 153 alglib .dfprocess(randomForest, x, ref y);156 alglib_3_7.alglib.dfprocess(randomForest, x, ref y); 154 157 yield return y[0]; 155 158 } … … 168 171 x[column] = inputData[row, column]; 169 172 } 170 alglib .dforest.dfprocessraw(randomForest.innerobj, x, ref ys);173 alglib_3_7.alglib.dforest.dfprocessraw(randomForest.innerobj, x, ref ys); 171 174 yield return ys.VariancePop(); 172 175 } … … 186 189 x[column] = inputData[row, column]; 187 190 } 188 alglib .dfprocess(randomForest, x, ref y);191 alglib_3_7::alglib.dfprocess(randomForest, x, ref y); 189 192 // find class for with the largest probability value 190 193 int maxProbClassIndex = 0; -
trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModelFull.cs
r17180 r17931 31 31 32 32 namespace HeuristicLab.Algorithms.DataAnalysis { 33 [StorableType(" 9C797DF0-1169-4381-A732-6DAB90802839")]33 [StorableType("55412E08-DAD4-4C2E-9181-C142E7EA9474")] 34 34 [Item("RandomForestModelFull", "Represents a random forest for regression and classification.")] 35 35 public sealed class RandomForestModelFull : ClassificationModel, IRandomForestModel { … … 42 42 private double[] classValues; 43 43 44 public int NumClasses => classValues == null ? 0 : classValues.Length; 45 44 46 [Storable] 45 47 private string[] inputVariables; 46 48 49 [Storable] 47 50 public int NumberOfTrees { 48 get { return RandomForestNTrees; }51 get; private set; 49 52 } 50 53 … … 53 56 54 57 [Storable] 55 private int RandomForestBufSize { 56 get { return randomForest.innerobj.bufsize; } 57 set { randomForest.innerobj.bufsize = value; } 58 } 59 [Storable] 60 private int RandomForestNClasses { 61 get { return randomForest.innerobj.nclasses; } 62 set { randomForest.innerobj.nclasses = value; } 63 } 64 [Storable] 65 private int RandomForestNTrees { 66 get { return randomForest.innerobj.ntrees; } 67 set { randomForest.innerobj.ntrees = value; } 68 } 69 [Storable] 70 private int RandomForestNVars { 71 get { return randomForest.innerobj.nvars; } 72 set { randomForest.innerobj.nvars = value; } 73 } 74 [Storable] 75 private double[] RandomForestTrees { 76 get { return randomForest.innerobj.trees; } 77 set { randomForest.innerobj.trees = value; } 58 private string RandomForestSerialized { 59 get { alglib.dfserialize(randomForest, out var serialized); return serialized; } 60 set { if (value != null) alglib.dfunserialize(value, out randomForest); } 78 61 } 79 62 80 63 [StorableConstructor] 81 private RandomForestModelFull(StorableConstructorFlag _) : base(_) { 82 randomForest = new alglib.decisionforest(); 83 } 64 private RandomForestModelFull(StorableConstructorFlag _) : base(_) { } 84 65 85 66 private RandomForestModelFull(RandomForestModelFull original, Cloner cloner) : base(original, cloner) { 86 randomForest = new alglib.decisionforest(); 87 randomForest.innerobj.bufsize = original.randomForest.innerobj.bufsize; 88 randomForest.innerobj.nclasses = original.randomForest.innerobj.nclasses; 89 randomForest.innerobj.ntrees = original.randomForest.innerobj.ntrees; 90 randomForest.innerobj.nvars = original.randomForest.innerobj.nvars; 91 randomForest.innerobj.trees = (double[])original.randomForest.innerobj.trees.Clone(); 67 if (original.randomForest != null) 68 randomForest = (alglib.decisionforest)original.randomForest.make_copy(); 69 NumberOfTrees = original.NumberOfTrees; 92 70 93 71 // following fields are immutable so we don't need to clone them … … 99 77 } 100 78 101 public RandomForestModelFull(alglib.decisionforest decisionForest, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<double> classValues = null) : base(targetVariable) {79 public RandomForestModelFull(alglib.decisionforest decisionForest, int nTrees, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<double> classValues = null) : base(targetVariable) { 102 80 this.name = ItemName; 103 81 this.description = ItemDescription; 104 82 105 randomForest = decisionForest; 83 randomForest = (alglib.decisionforest)decisionForest.make_copy(); 84 NumberOfTrees = nTrees; 106 85 107 86 this.inputVariables = inputVariables.ToArray(); … … 147 126 double[] y = new double[1]; 148 127 128 alglib.dfcreatebuffer(randomForest, out var buf); 149 129 for (int row = 0; row < n; row++) { 150 130 for (int column = 0; column < columns; column++) { 151 131 x[column] = inputData[row, column]; 152 132 } 153 alglib.df process(randomForest, x, ref y);133 alglib.dftsprocess(randomForest, buf, x, ref y); // thread-safe process (as long as separate buffers are used) 154 134 yield return y[0]; 155 135 } … … 168 148 x[column] = inputData[row, column]; 169 149 } 170 alglib.dforest.dfprocessraw(randomForest.innerobj, x, ref ys); 150 lock (randomForest) 151 alglib.dforest.dfprocessraw(randomForest.innerobj, x, ref ys, null); 171 152 yield return ys.VariancePop(); 172 153 } … … 180 161 int columns = inputData.GetLength(1); 181 162 double[] x = new double[columns]; 182 double[] y = new double[randomForest.innerobj.nclasses]; 183 163 double[] y = new double[NumClasses]; 164 165 alglib.dfcreatebuffer(randomForest, out var buf); 184 166 for (int row = 0; row < n; row++) { 185 167 for (int column = 0; column < columns; column++) { 186 168 x[column] = inputData[row, column]; 187 169 } 188 alglib.df process(randomForest, x, ref y);170 alglib.dftsprocess(randomForest, buf, x, ref y); 189 171 // find class for with the largest probability value 190 172 int maxProbClassIndex = 0; -
trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegression.cs
r17180 r17931 20 20 #endregion 21 21 22 extern alias alglib_3_7; 23 using alglib_3_7; 22 24 using System.Collections.Generic; 23 25 using System.Linq; … … 200 202 outOfBagAvgRelError = rep.oobavgrelerror; 201 203 202 return new RandomForestModelFull(dForest, problemData.TargetVariable, problemData.AllowedInputVariables);204 return new RandomForestModelFull(dForest, nTrees, problemData.TargetVariable, problemData.AllowedInputVariables); 203 205 } 204 206 -
trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestUtil.cs
r17180 r17931 22 22 #endregion 23 23 24 extern alias alglib_3_7; 25 24 26 using System; 25 27 using System.Collections.Generic; … … 103 105 RandomForestUtil.AssertInputMatrix(inputMatrix); 104 106 107 int nRows = inputMatrix.GetLength(0); 108 int nColumns = inputMatrix.GetLength(1); 109 110 alglib.dfbuildercreate(out var dfbuilder); 111 alglib.dfbuildersetdataset(dfbuilder, inputMatrix, nRows, nColumns - 1, nClasses); 112 alglib.dfbuildersetimportancenone(dfbuilder); // do not calculate importance (TODO add this feature) 113 alglib.dfbuildersetrdfalgo(dfbuilder, 0); // only one algorithm supported in version 3.17 114 alglib.dfbuildersetrdfsplitstrength(dfbuilder, 2); // 0 = split at the random position, fastest one 115 // 1 = split at the middle of the range 116 // 2 = strong split at the best point of the range (default) 117 alglib.dfbuildersetrndvarsratio(dfbuilder, m); 118 alglib.dfbuildersetsubsampleratio(dfbuilder, r); 119 alglib.dfbuildersetseed(dfbuilder, seed); 120 alglib.dfbuilderbuildrandomforest(dfbuilder, nTrees, out var dForest, out rep); 121 return dForest; 122 } 123 internal static alglib_3_7.alglib.decisionforest CreateRandomForestModelAlglib_3_7(int seed, double[,] inputMatrix, int nTrees, double r, double m, int nClasses, out alglib_3_7.alglib.dfreport rep) { 124 RandomForestUtil.AssertParameters(r, m); 125 RandomForestUtil.AssertInputMatrix(inputMatrix); 126 105 127 int info = 0; 106 alglib .math.rndobject = new System.Random(seed);107 var dForest = new alglib .decisionforest();108 rep = new alglib .dfreport();128 alglib_3_7.alglib.math.rndobject = new System.Random(seed); 129 var dForest = new alglib_3_7.alglib.decisionforest(); 130 rep = new alglib_3_7.alglib.dfreport(); 109 131 int nRows = inputMatrix.GetLength(0); 110 132 int nColumns = inputMatrix.GetLength(1); … … 112 134 int nFeatures = Math.Max((int)Math.Round(m * (nColumns - 1)), 1); 113 135 114 alglib .dforest.dfbuildinternal(inputMatrix, nRows, nColumns - 1, nClasses, nTrees, sampleSize, nFeatures, alglib.dforest.dfusestrongsplits +alglib.dforest.dfuseevs, ref info, dForest.innerobj, rep.innerobj);136 alglib_3_7.alglib.dforest.dfbuildinternal(inputMatrix, nRows, nColumns - 1, nClasses, nTrees, sampleSize, nFeatures, alglib_3_7.alglib.dforest.dfusestrongsplits + alglib_3_7.alglib.dforest.dfuseevs, ref info, dForest.innerobj, rep.innerobj); 115 137 if (info != 1) throw new ArgumentException("Error in calculation of random forest model"); 116 138 return dForest; … … 123 145 var targetVariable = GetTargetVariableName(problemData); 124 146 foreach (var tuple in partitions) { 125 double rmsError, avgRelError, outOfBagAvgRelError, outOfBagRmsError;126 147 var trainingRandomForestPartition = tuple.Item1; 127 148 var testRandomForestPartition = tuple.Item2; 128 var model = RandomForestModel.CreateRegressionModel(problemData, trainingRandomForestPartition, nTrees, r, m, seed, out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError); 149 var model = RandomForestRegression.CreateRandomForestRegressionModel(problemData, trainingRandomForestPartition, nTrees, r, m, seed, 150 out var rmsError, out var avgRelError, out var outOfBagRmsError, out var outOfBagAvgRelError); 129 151 var estimatedValues = model.GetEstimatedValues(ds, testRandomForestPartition); 130 152 var targetValues = ds.GetDoubleValues(targetVariable, testRandomForestPartition); … … 143 165 var targetVariable = GetTargetVariableName(problemData); 144 166 foreach (var tuple in partitions) { 145 double rmsError, avgRelError, outOfBagAvgRelError, outOfBagRmsError;146 167 var trainingRandomForestPartition = tuple.Item1; 147 168 var testRandomForestPartition = tuple.Item2; 148 var model = RandomForestModel.CreateClassificationModel(problemData, trainingRandomForestPartition, nTrees, r, m, seed, out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError); 169 var model = RandomForestClassification.CreateRandomForestClassificationModel(problemData, trainingRandomForestPartition, nTrees, r, m, seed, 170 out var rmsError, out var avgRelError, out var outOfBagRmsError, out var outOfBagAvgRelError); 149 171 var estimatedValues = model.GetEstimatedClassValues(ds, testRandomForestPartition); 150 172 var targetValues = ds.GetDoubleValues(targetVariable, testRandomForestPartition); … … 176 198 var parameters = new RFParameter(); 177 199 for (int i = 0; i < setters.Count; ++i) { setters[i](parameters, parameterValues[i]); } 178 double rmsError, outOfBagRmsError, avgRelError, outOfBagAvgRelError;179 RandomForestModel.CreateRegressionModel(problemData, problemData.TrainingIndices, parameters.N, parameters.R, parameters.M, seed, out rmsError, out outOfBagRmsError, out avgRelError, outoutOfBagAvgRelError);200 RandomForestRegression.CreateRandomForestRegressionModel(problemData, problemData.TrainingIndices, parameters.N, parameters.R, parameters.M, seed, 201 out var rmsError, out var outOfBagRmsError, out var avgRelError, out var outOfBagAvgRelError); 180 202 181 203 lock (locker) { … … 208 230 var parameters = new RFParameter(); 209 231 for (int i = 0; i < setters.Count; ++i) { setters[i](parameters, parameterValues[i]); } 210 double rmsError, outOfBagRmsError, avgRelError, outOfBagAvgRelError; 211 RandomForestModel.CreateClassificationModel(problemData, problemData.TrainingIndices, parameters.N, parameters.R, parameters.M, seed, 212 out rmsError, out outOfBagRmsError, out avgRelError, out outOfBagAvgRelError); 232 RandomForestClassification.CreateRandomForestClassificationModel(problemData, problemData.TrainingIndices, parameters.N, parameters.R, parameters.M, seed, 233 out var rmsError, out var outOfBagRmsError, out var avgRelError, out var outOfBagAvgRelError); 213 234 214 235 lock (locker) {
Note: See TracChangeset
for help on using the changeset viewer.