Changeset 17050
- Timestamp:
- 06/28/19 13:58:06 (5 years ago)
- Location:
- branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis
- Files:
-
- 8 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
/trunk/HeuristicLab.Algorithms.DataAnalysis (added) merged: 17043-17044
- Property svn:mergeinfo changed
-
branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4
- Property svn:mergeinfo changed
/trunk/HeuristicLab.Algorithms.DataAnalysis/3.4 (added) merged: 17043-17044
- Property svn:mergeinfo changed
-
branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r17045 r17050 391 391 <Compile Include="Plugin.cs" /> 392 392 <Compile Include="Properties\AssemblyInfo.cs" /> 393 <Compile Include="RandomForest\ModelCreation.cs" /> 393 394 <Compile Include="RandomForest\RandomForestClassificationSolution.cs" /> 394 395 <Compile Include="RandomForest\RandomForestClassification.cs" /> -
branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/ModelCreation.cs
r17049 r17050 23 23 using HEAL.Attic; 24 24 25 namespace HeuristicLab.Algorithms.DataAnalysis. GradientBoostedTrees{25 namespace HeuristicLab.Algorithms.DataAnalysis.RandomForest { 26 26 27 27 /// <summary> … … 31 31 /// Model - the complete model will be stored (consider the amount of memory needed) 32 32 /// </summary> 33 [StorableType(" EE55C357-C4B3-4662-B40B-D1D06A851809")]33 [StorableType("3869899B-1848-4628-AF27-3B6FDE5840D6")] 34 34 public enum ModelCreation { 35 35 QualityOnly, -
branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassification.cs
r17045 r17050 20 20 #endregion 21 21 22 using System.Collections.Generic; 23 using System.Linq; 22 24 using System.Threading; 23 25 using HEAL.Attic; 26 using HeuristicLab.Algorithms.DataAnalysis.RandomForest; 24 27 using HeuristicLab.Common; 25 28 using HeuristicLab.Core; … … 43 46 private const string SeedParameterName = "Seed"; 44 47 private const string SetSeedRandomlyParameterName = "SetSeedRandomly"; 45 private const string CreateSolutionParameterName = "CreateSolution";48 private const string ModelCreationParameterName = "ModelCreation"; 46 49 47 50 #region parameter properties … … 61 64 get { return (IFixedValueParameter<BoolValue>)Parameters[SetSeedRandomlyParameterName]; } 62 65 } 63 p ublic IFixedValueParameter<BoolValue> CreateSolutionParameter {64 get { return (IFixedValueParameter< BoolValue>)Parameters[CreateSolutionParameterName]; }66 private IFixedValueParameter<EnumValue<ModelCreation>> ModelCreationParameter { 67 get { return (IFixedValueParameter<EnumValue<ModelCreation>>)Parameters[ModelCreationParameterName]; } 65 68 } 66 69 #endregion … … 86 89 set { SetSeedRandomlyParameter.Value.Value = value; } 87 90 } 88 public bool CreateSolution {89 get { return CreateSolutionParameter.Value.Value; }90 set { CreateSolutionParameter.Value.Value = value; }91 public ModelCreation ModelCreation { 92 get { return ModelCreationParameter.Value.Value; } 93 set { ModelCreationParameter.Value.Value = value; } 91 94 } 92 95 #endregion … … 105 108 Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0))); 106 109 Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true))); 107 Parameters.Add(new FixedValueParameter< BoolValue>(CreateSolutionParameterName, "Flag that indicates if a solution should be produced at the end of the run", new BoolValue(true)));108 Parameters[ CreateSolutionParameterName].Hidden = true;110 Parameters.Add(new FixedValueParameter<EnumValue<ModelCreation>>(ModelCreationParameterName, "Defines the results produced at the end of the run (Surrogate => Less disk space, lazy recalculation of model)", new EnumValue<ModelCreation>(ModelCreation.Model))); 111 Parameters[ModelCreationParameterName].Hidden = true; 109 112 110 113 Problem = new ClassificationProblem(); … … 121 124 if (!Parameters.ContainsKey((SetSeedRandomlyParameterName))) 122 125 Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true))); 123 if (!Parameters.ContainsKey(CreateSolutionParameterName)) { 124 Parameters.Add(new FixedValueParameter<BoolValue>(CreateSolutionParameterName, "Flag that indicates if a solution should be produced at the end of the run", new BoolValue(true))); 125 Parameters[CreateSolutionParameterName].Hidden = true; 126 127 // parameter type has been changed 128 if (Parameters.ContainsKey("CreateSolution")) { 129 var createSolutionParam = Parameters["CreateSolution"] as FixedValueParameter<BoolValue>; 130 Parameters.Remove(createSolutionParam); 131 132 ModelCreation value = createSolutionParam.Value.Value ? ModelCreation.Model : ModelCreation.QualityOnly; 133 Parameters.Add(new FixedValueParameter<EnumValue<ModelCreation>>(ModelCreationParameterName, "Defines the results produced at the end of the run (Surrogate => Less disk space, lazy recalculation of model)", new EnumValue<ModelCreation>(value))); 134 Parameters[ModelCreationParameterName].Hidden = true; 126 135 } 127 136 #endregion … … 138 147 139 148 var model = CreateRandomForestClassificationModel(Problem.ProblemData, NumberOfTrees, R, M, Seed, out rmsError, out relClassificationError, out outOfBagRmsError, out outOfBagRelClassificationError); 149 140 150 Results.Add(new Result("Root mean square error", "The root of the mean of squared errors of the random forest regression solution on the training set.", new DoubleValue(rmsError))); 141 151 Results.Add(new Result("Relative classification error", "Relative classification error of the random forest regression solution on the training set.", new PercentValue(relClassificationError))); … … 143 153 Results.Add(new Result("Relative classification error (out-of-bag)", "The out-of-bag relative classification error of the random forest regression solution.", new PercentValue(outOfBagRelClassificationError))); 144 154 145 if (CreateSolution) { 146 var solution = model.CreateClassificationSolution(Problem.ProblemData); 155 156 IClassificationSolution solution = null; 157 if (ModelCreation == ModelCreation.Model) { 158 solution = model.CreateClassificationSolution(Problem.ProblemData); 159 } else if (ModelCreation == ModelCreation.SurrogateModel) { 160 var problemData = Problem.ProblemData; 161 var surrogateModel = new RandomForestModelSurrogate(model, problemData.TargetVariable, problemData, Seed, NumberOfTrees, R, M, problemData.ClassValues.ToArray()); 162 163 solution = surrogateModel.CreateClassificationSolution(problemData); 164 } 165 166 if (solution != null) { 147 167 Results.Add(new Result(RandomForestClassificationModelResultName, "The random forest classification solution.", solution)); 148 168 } … … 157 177 } 158 178 159 public static RandomForestModel CreateRandomForestClassificationModel(IClassificationProblemData problemData, int nTrees, double r, double m, int seed, 179 public static RandomForestModelFull CreateRandomForestClassificationModel(IClassificationProblemData problemData, int nTrees, double r, double m, int seed, 180 out double rmsError, out double avgRelError, out double outOfBagRmsError, out double outOfBagAvgRelError) { 181 var model = CreateRandomForestClassificationModel(problemData, problemData.TrainingIndices, nTrees, r, m, seed, out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError); 182 return model; 183 } 184 185 public static RandomForestModelFull CreateRandomForestClassificationModel(IClassificationProblemData problemData, IEnumerable<int> trainingIndices, int nTrees, double r, double m, int seed, 160 186 out double rmsError, out double relClassificationError, out double outOfBagRmsError, out double outOfBagRelClassificationError) { 161 return RandomForestModel.CreateClassificationModel(problemData, nTrees, r, m, seed, 162 rmsError: out rmsError, relClassificationError: out relClassificationError, outOfBagRmsError: out outOfBagRmsError, outOfBagRelClassificationError: out outOfBagRelClassificationError); 187 188 var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable }); 189 double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices); 190 191 var classValues = problemData.ClassValues.ToArray(); 192 int nClasses = classValues.Length; 193 194 // map original class values to values [0..nClasses-1] 195 var classIndices = new Dictionary<double, double>(); 196 for (int i = 0; i < nClasses; i++) { 197 classIndices[classValues[i]] = i; 198 } 199 200 int nRows = inputMatrix.GetLength(0); 201 int nColumns = inputMatrix.GetLength(1); 202 for (int row = 0; row < nRows; row++) { 203 inputMatrix[row, nColumns - 1] = classIndices[inputMatrix[row, nColumns - 1]]; 204 } 205 206 alglib.dfreport rep; 207 var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out rep); 208 209 rmsError = rep.rmserror; 210 outOfBagRmsError = rep.oobrmserror; 211 relClassificationError = rep.relclserror; 212 outOfBagRelClassificationError = rep.oobrelclserror; 213 214 return new RandomForestModelFull(dForest, problemData.TargetVariable, problemData.AllowedInputVariables, classValues); 163 215 } 164 216 #endregion -
branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs
r17045 r17050 34 34 /// Represents a random forest model for regression and classification 35 35 /// </summary> 36 [Obsolete("This class only exists for backwards compatibility reasons . Use RFModelSurrogate or RFModelFull instead.")]36 [Obsolete("This class only exists for backwards compatibility reasons for stored models with the XML Persistence. Use RFModelSurrogate or RFModelFull instead.")] 37 37 [StorableType("9AA4CCC2-CD75-4471-8DF6-949E5B783642")] 38 38 [Item("RandomForestModel", "Represents a random forest for regression and classification.")] -
branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModelFull.cs
r17049 r17050 100 100 101 101 public RandomForestModelFull(alglib.decisionforest decisionForest, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<double> classValues = null) : base(targetVariable) { 102 this.name = ItemName; 103 this.description = ItemDescription; 104 102 105 randomForest = decisionForest; 103 106 -
branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModelSurrogate.cs
r17049 r17050 115 115 IRandomForestModel randomForestModel = null; 116 116 117 //TODO Refactor to new methods118 117 double rmsError, oobRmsError, relClassError, oobRelClassError; 119 118 var classificationProblemData = originalTrainingData as IClassificationProblemData; 120 119 121 120 if (originalTrainingData is IRegressionProblemData regressionProblemData) { 122 randomForestModel = RandomForest Model.CreateRegressionModel(regressionProblemData,121 randomForestModel = RandomForestRegression.CreateRandomForestRegressionModel(regressionProblemData, 123 122 nTrees, r, m, seed, out rmsError, out oobRmsError, 124 123 out relClassError, out oobRelClassError); 125 124 } else if (classificationProblemData != null) { 126 randomForestModel = RandomForest Model.CreateClassificationModel(classificationProblemData,125 randomForestModel = RandomForestClassification.CreateRandomForestClassificationModel(classificationProblemData, 127 126 nTrees, r, m, seed, out rmsError, out oobRmsError, 128 127 out relClassError, out oobRelClassError); -
branches/2952_RF-ModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegression.cs
r17045 r17050 24 24 using System.Threading; 25 25 using HEAL.Attic; 26 using HeuristicLab.Algorithms.DataAnalysis.RandomForest; 26 27 using HeuristicLab.Common; 27 28 using HeuristicLab.Core; … … 45 46 private const string SeedParameterName = "Seed"; 46 47 private const string SetSeedRandomlyParameterName = "SetSeedRandomly"; 47 private const string CreateSolutionParameterName = "CreateSolution";48 private const string ModelCreationParameterName = "ModelCreation"; 48 49 49 50 #region parameter properties … … 63 64 get { return (IFixedValueParameter<BoolValue>)Parameters[SetSeedRandomlyParameterName]; } 64 65 } 65 p ublic IFixedValueParameter<BoolValue> CreateSolutionParameter {66 get { return (IFixedValueParameter< BoolValue>)Parameters[CreateSolutionParameterName]; }66 private IFixedValueParameter<EnumValue<ModelCreation>> ModelCreationParameter { 67 get { return (IFixedValueParameter<EnumValue<ModelCreation>>)Parameters[ModelCreationParameterName]; } 67 68 } 68 69 #endregion … … 88 89 set { SetSeedRandomlyParameter.Value.Value = value; } 89 90 } 90 public bool CreateSolution {91 get { return CreateSolutionParameter.Value.Value; }92 set { CreateSolutionParameter.Value.Value = value; }91 public ModelCreation ModelCreation { 92 get { return ModelCreationParameter.Value.Value; } 93 set { ModelCreationParameter.Value.Value = value; } 93 94 } 94 95 #endregion … … 106 107 Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0))); 107 108 Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true))); 108 Parameters.Add(new FixedValueParameter< BoolValue>(CreateSolutionParameterName, "Flag that indicates if a solution should be produced at the end of the run", new BoolValue(true)));109 Parameters[ CreateSolutionParameterName].Hidden = true;109 Parameters.Add(new FixedValueParameter<EnumValue<ModelCreation>>(ModelCreationParameterName, "Defines the results produced at the end of the run (Surrogate => Less disk space, lazy recalculation of model)", new EnumValue<ModelCreation>(ModelCreation.Model))); 110 Parameters[ModelCreationParameterName].Hidden = true; 110 111 111 112 Problem = new RegressionProblem(); … … 122 123 if (!Parameters.ContainsKey((SetSeedRandomlyParameterName))) 123 124 Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true))); 124 if (!Parameters.ContainsKey(CreateSolutionParameterName)) { 125 Parameters.Add(new FixedValueParameter<BoolValue>(CreateSolutionParameterName, "Flag that indicates if a solution should be produced at the end of the run", new BoolValue(true))); 126 Parameters[CreateSolutionParameterName].Hidden = true; 125 126 // parameter type has been changed 127 if (Parameters.ContainsKey("CreateSolution")) { 128 var createSolutionParam = Parameters["CreateSolution"] as FixedValueParameter<BoolValue>; 129 Parameters.Remove(createSolutionParam); 130 131 ModelCreation value = createSolutionParam.Value.Value ? ModelCreation.Model : ModelCreation.QualityOnly; 132 Parameters.Add(new FixedValueParameter<EnumValue<ModelCreation>>(ModelCreationParameterName, "Defines the results produced at the end of the run (Surrogate => Less disk space, lazy recalculation of model)", new EnumValue<ModelCreation>(value))); 133 Parameters[ModelCreationParameterName].Hidden = true; 127 134 } 128 135 #endregion … … 145 152 Results.Add(new Result("Average relative error (out-of-bag)", "The out-of-bag average of relative errors of the random forest regression solution.", new PercentValue(outOfBagAvgRelError))); 146 153 147 if (CreateSolution) { 148 var solution = model.CreateRegressionSolution(Problem.ProblemData); 154 IRegressionSolution solution = null; 155 if (ModelCreation == ModelCreation.Model) { 156 solution = model.CreateRegressionSolution(Problem.ProblemData); 157 } else if (ModelCreation == ModelCreation.SurrogateModel) { 158 var problemData = Problem.ProblemData; 159 var surrogateModel = new RandomForestModelSurrogate(model, problemData.TargetVariable, problemData, Seed, NumberOfTrees, R, M); 160 solution = surrogateModel.CreateRegressionSolution(problemData); 161 } 162 163 if (solution != null) { 149 164 Results.Add(new Result(RandomForestRegressionModelResultName, "The random forest regression solution.", solution)); 150 165 } … … 163 178 double r, double m, int seed, 164 179 out double rmsError, out double avgRelError, out double outOfBagRmsError, out double outOfBagAvgRelError) { 165 return CreateRandomForestRegressionModel(problemData, problemData.TrainingIndices, nTrees, r, m, seed, out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError); 180 var model = CreateRandomForestRegressionModel(problemData, problemData.TrainingIndices, nTrees, r, m, seed, out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError); 181 return model; 166 182 } 167 183 … … 181 197 182 198 return new RandomForestModelFull(dForest, problemData.TargetVariable, problemData.AllowedInputVariables); 183 184 //return RandomForestModel.CreateRegressionModel(problemData, nTrees, r, m, seed,185 //rmsError: out rmsError, avgRelError: out avgRelError, outOfBagRmsError: out outOfBagRmsError, outOfBagAvgRelError: out outOfBagAvgRelError);186 199 } 187 200
Note: See TracChangeset
for help on using the changeset viewer.