- Timestamp:
- 04/16/13 13:13:41 (12 years ago)
- Location:
- branches/OaaS
- Files:
-
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/OaaS
- Property svn:ignore
-
old new 21 21 protoc.exe 22 22 _ReSharper.HeuristicLab 3.3 Tests 23 Google.ProtocolBuffers-2.4.1.473.dll 23 24 packages
-
- Property svn:mergeinfo changed
- Property svn:ignore
-
branches/OaaS/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
-
branches/OaaS/HeuristicLab.Algorithms.DataAnalysis/3.4
- Property svn:ignore
-
old new 5 5 *.vs10x 6 6 Plugin.cs 7 *.user
-
- Property svn:ignore
-
branches/OaaS/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassification.cs
r8139 r9363 26 26 using HeuristicLab.Core; 27 27 using HeuristicLab.Data; 28 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;29 28 using HeuristicLab.Optimization; 29 using HeuristicLab.Parameters; 30 30 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 31 31 using HeuristicLab.Problems.DataAnalysis; 32 using HeuristicLab.Problems.DataAnalysis.Symbolic;33 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;34 using HeuristicLab.Parameters;35 32 36 33 namespace HeuristicLab.Algorithms.DataAnalysis { … … 45 42 private const string NumberOfTreesParameterName = "Number of trees"; 46 43 private const string RParameterName = "R"; 44 private const string MParameterName = "M"; 45 private const string SeedParameterName = "Seed"; 46 private const string SetSeedRandomlyParameterName = "SetSeedRandomly"; 47 47 48 #region parameter properties 48 public I ValueParameter<IntValue> NumberOfTreesParameter {49 get { return (I ValueParameter<IntValue>)Parameters[NumberOfTreesParameterName]; }49 public IFixedValueParameter<IntValue> NumberOfTreesParameter { 50 get { return (IFixedValueParameter<IntValue>)Parameters[NumberOfTreesParameterName]; } 50 51 } 51 public IValueParameter<DoubleValue> RParameter { 52 get { return (IValueParameter<DoubleValue>)Parameters[RParameterName]; } 52 public IFixedValueParameter<DoubleValue> RParameter { 53 get { return (IFixedValueParameter<DoubleValue>)Parameters[RParameterName]; } 54 } 55 public IFixedValueParameter<DoubleValue> MParameter { 56 get { return (IFixedValueParameter<DoubleValue>)Parameters[MParameterName]; } 57 } 58 public IFixedValueParameter<IntValue> SeedParameter { 59 get { return (IFixedValueParameter<IntValue>)Parameters[SeedParameterName]; } 60 } 61 public IFixedValueParameter<BoolValue> SetSeedRandomlyParameter { 62 get { return (IFixedValueParameter<BoolValue>)Parameters[SetSeedRandomlyParameterName]; } 53 63 } 54 64 #endregion … … 62 72 set { RParameter.Value.Value = value; } 63 73 } 74 public double M { 75 get { return MParameter.Value.Value; } 76 set { MParameter.Value.Value = value; } 77 } 78 public int Seed { 79 get { return SeedParameter.Value.Value; } 80 set { SeedParameter.Value.Value = value; } 81 } 82 public bool SetSeedRandomly { 83 get { return SetSeedRandomlyParameter.Value.Value; } 84 set { SetSeedRandomlyParameter.Value.Value = value; } 85 } 64 86 #endregion 87 65 88 [StorableConstructor] 66 89 private RandomForestClassification(bool deserializing) : base(deserializing) { } … … 68 91 : base(original, cloner) { 69 92 } 93 70 94 public RandomForestClassification() 71 95 : base() { 72 96 Parameters.Add(new FixedValueParameter<IntValue>(NumberOfTreesParameterName, "The number of trees in the forest. Should be between 50 and 100", new IntValue(50))); 73 97 Parameters.Add(new FixedValueParameter<DoubleValue>(RParameterName, "The ratio of the training set that will be used in the construction of individual trees (0<r<=1). Should be adjusted depending on the noise level in the dataset in the range from 0.66 (low noise) to 0.05 (high noise). This parameter should be adjusted to achieve good generalization error.", new DoubleValue(0.3))); 98 Parameters.Add(new FixedValueParameter<DoubleValue>(MParameterName, "The ratio of features that will be used in the construction of individual trees (0<m<=1)", new DoubleValue(0.5))); 99 Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0))); 100 Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true))); 74 101 Problem = new ClassificationProblem(); 75 102 } 103 76 104 [StorableHook(HookType.AfterDeserialization)] 77 private void AfterDeserialization() { } 105 private void AfterDeserialization() { 106 if (!Parameters.ContainsKey(MParameterName)) 107 Parameters.Add(new FixedValueParameter<DoubleValue>(MParameterName, "The ratio of features that will be used in the construction of individual trees (0<m<=1)", new DoubleValue(0.5))); 108 if (!Parameters.ContainsKey(SeedParameterName)) 109 Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0))); 110 if (!Parameters.ContainsKey((SetSeedRandomlyParameterName))) 111 Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true))); 112 } 78 113 79 114 public override IDeepCloneable Clone(Cloner cloner) { … … 84 119 protected override void Run() { 85 120 double rmsError, relClassificationError, outOfBagRmsError, outOfBagRelClassificationError; 86 var solution = CreateRandomForestClassificationSolution(Problem.ProblemData, NumberOfTrees, R, out rmsError, out relClassificationError, out outOfBagRmsError, out outOfBagRelClassificationError); 121 if (SetSeedRandomly) Seed = new System.Random().Next(); 122 123 var solution = CreateRandomForestClassificationSolution(Problem.ProblemData, NumberOfTrees, R, M, Seed, out rmsError, out relClassificationError, out outOfBagRmsError, out outOfBagRelClassificationError); 87 124 Results.Add(new Result(RandomForestClassificationModelResultName, "The random forest classification solution.", solution)); 88 125 Results.Add(new Result("Root mean square error", "The root of the mean of squared errors of the random forest regression solution on the training set.", new DoubleValue(rmsError))); … … 92 129 } 93 130 94 public static IClassificationSolution CreateRandomForestClassificationSolution(IClassificationProblemData problemData, int nTrees, double r, 131 public static IClassificationSolution CreateRandomForestClassificationSolution(IClassificationProblemData problemData, int nTrees, double r, double m, int seed, 95 132 out double rmsError, out double relClassificationError, out double outOfBagRmsError, out double outOfBagRelClassificationError) { 133 if (r <= 0 || r > 1) throw new ArgumentException("The R parameter in the random forest regression must be between 0 and 1."); 134 if (m <= 0 || m > 1) throw new ArgumentException("The M parameter in the random forest regression must be between 0 and 1."); 135 136 alglib.math.rndobject = new System.Random(seed); 137 96 138 Dataset dataset = problemData.Dataset; 97 139 string targetVariable = problemData.TargetVariable; … … 102 144 throw new NotSupportedException("Random forest classification does not support NaN or infinity values in the input dataset."); 103 145 146 int info = 0; 147 alglib.decisionforest dForest = new alglib.decisionforest(); 148 alglib.dfreport rep = new alglib.dfreport(); ; 149 int nRows = inputMatrix.GetLength(0); 150 int nColumns = inputMatrix.GetLength(1); 151 int sampleSize = Math.Max((int)Math.Round(r * nRows), 1); 152 int nFeatures = Math.Max((int)Math.Round(m * (nColumns - 1)), 1); 104 153 105 alglib.decisionforest dforest; 106 alglib.dfreport rep; 107 int nRows = inputMatrix.GetLength(0); 108 int nCols = inputMatrix.GetLength(1); 109 int info; 110 double[] classValues = dataset.GetDoubleValues(targetVariable).Distinct().OrderBy(x => x).ToArray(); 111 int nClasses = classValues.Count(); 154 155 double[] classValues = problemData.ClassValues.ToArray(); 156 int nClasses = problemData.Classes; 112 157 // map original class values to values [0..nClasses-1] 113 158 Dictionary<double, double> classIndices = new Dictionary<double, double>(); … … 116 161 } 117 162 for (int row = 0; row < nRows; row++) { 118 inputMatrix[row, nCol s - 1] = classIndices[inputMatrix[row, nCols - 1]];163 inputMatrix[row, nColumns - 1] = classIndices[inputMatrix[row, nColumns - 1]]; 119 164 } 120 // execute random forest algorithm 121 alglib.df buildrandomdecisionforest(inputMatrix, nRows, nCols - 1, nClasses, nTrees, r, out info, out dforest, out rep);165 // execute random forest algorithm 166 alglib.dforest.dfbuildinternal(inputMatrix, nRows, nColumns - 1, nClasses, nTrees, sampleSize, nFeatures, alglib.dforest.dfusestrongsplits + alglib.dforest.dfuseevs, ref info, dForest.innerobj, rep.innerobj); 122 167 if (info != 1) throw new ArgumentException("Error in calculation of random forest classification solution"); 123 168 … … 126 171 relClassificationError = rep.relclserror; 127 172 outOfBagRelClassificationError = rep.oobrelclserror; 128 return new RandomForestClassificationSolution((IClassificationProblemData)problemData.Clone(), new RandomForestModel(d forest, targetVariable, allowedInputVariables, classValues));173 return new RandomForestClassificationSolution((IClassificationProblemData)problemData.Clone(), new RandomForestModel(dForest, targetVariable, allowedInputVariables, classValues)); 129 174 } 130 175 #endregion -
branches/OaaS/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassificationSolution.cs
r7259 r9363 45 45 public RandomForestClassificationSolution(IClassificationProblemData problemData, IRandomForestModel randomForestModel) 46 46 : base(randomForestModel, problemData) { 47 RecalculateResults();48 47 } 49 48 … … 51 50 return new RandomForestClassificationSolution(this, cloner); 52 51 } 53 54 protected override void RecalculateResults() {55 CalculateResults();56 }57 52 } 58 53 } -
branches/OaaS/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs
r7259 r9363 132 132 133 133 public IRandomForestRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 134 return new RandomForestRegressionSolution( problemData, this);134 return new RandomForestRegressionSolution(new RegressionProblemData(problemData), this); 135 135 } 136 136 IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) { … … 138 138 } 139 139 public IRandomForestClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { 140 return new RandomForestClassificationSolution( problemData, this);140 return new RandomForestClassificationSolution(new ClassificationProblemData(problemData), this); 141 141 } 142 142 IClassificationSolution IClassificationModel.CreateClassificationSolution(IClassificationProblemData problemData) { -
branches/OaaS/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegression.cs
r8139 r9363 26 26 using HeuristicLab.Core; 27 27 using HeuristicLab.Data; 28 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;29 28 using HeuristicLab.Optimization; 29 using HeuristicLab.Parameters; 30 30 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 31 31 using HeuristicLab.Problems.DataAnalysis; 32 using HeuristicLab.Problems.DataAnalysis.Symbolic;33 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;34 using HeuristicLab.Parameters;35 32 36 33 namespace HeuristicLab.Algorithms.DataAnalysis { … … 45 42 private const string NumberOfTreesParameterName = "Number of trees"; 46 43 private const string RParameterName = "R"; 44 private const string MParameterName = "M"; 45 private const string SeedParameterName = "Seed"; 46 private const string SetSeedRandomlyParameterName = "SetSeedRandomly"; 47 47 48 #region parameter properties 48 public I ValueParameter<IntValue> NumberOfTreesParameter {49 get { return (I ValueParameter<IntValue>)Parameters[NumberOfTreesParameterName]; }49 public IFixedValueParameter<IntValue> NumberOfTreesParameter { 50 get { return (IFixedValueParameter<IntValue>)Parameters[NumberOfTreesParameterName]; } 50 51 } 51 public IValueParameter<DoubleValue> RParameter { 52 get { return (IValueParameter<DoubleValue>)Parameters[RParameterName]; } 52 public IFixedValueParameter<DoubleValue> RParameter { 53 get { return (IFixedValueParameter<DoubleValue>)Parameters[RParameterName]; } 54 } 55 public IFixedValueParameter<DoubleValue> MParameter { 56 get { return (IFixedValueParameter<DoubleValue>)Parameters[MParameterName]; } 57 } 58 public IFixedValueParameter<IntValue> SeedParameter { 59 get { return (IFixedValueParameter<IntValue>)Parameters[SeedParameterName]; } 60 } 61 public IFixedValueParameter<BoolValue> SetSeedRandomlyParameter { 62 get { return (IFixedValueParameter<BoolValue>)Parameters[SetSeedRandomlyParameterName]; } 53 63 } 54 64 #endregion … … 62 72 set { RParameter.Value.Value = value; } 63 73 } 74 public double M { 75 get { return MParameter.Value.Value; } 76 set { MParameter.Value.Value = value; } 77 } 78 public int Seed { 79 get { return SeedParameter.Value.Value; } 80 set { SeedParameter.Value.Value = value; } 81 } 82 public bool SetSeedRandomly { 83 get { return SetSeedRandomlyParameter.Value.Value; } 84 set { SetSeedRandomlyParameter.Value.Value = value; } 85 } 64 86 #endregion 65 87 [StorableConstructor] … … 68 90 : base(original, cloner) { 69 91 } 92 70 93 public RandomForestRegression() 71 94 : base() { 72 95 Parameters.Add(new FixedValueParameter<IntValue>(NumberOfTreesParameterName, "The number of trees in the forest. Should be between 50 and 100", new IntValue(50))); 73 96 Parameters.Add(new FixedValueParameter<DoubleValue>(RParameterName, "The ratio of the training set that will be used in the construction of individual trees (0<r<=1). Should be adjusted depending on the noise level in the dataset in the range from 0.66 (low noise) to 0.05 (high noise). This parameter should be adjusted to achieve good generalization error.", new DoubleValue(0.3))); 97 Parameters.Add(new FixedValueParameter<DoubleValue>(MParameterName, "The ratio of features that will be used in the construction of individual trees (0<m<=1)", new DoubleValue(0.5))); 98 Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0))); 99 Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true))); 74 100 Problem = new RegressionProblem(); 75 101 } 102 76 103 [StorableHook(HookType.AfterDeserialization)] 77 private void AfterDeserialization() { } 104 private void AfterDeserialization() { 105 if (!Parameters.ContainsKey(MParameterName)) 106 Parameters.Add(new FixedValueParameter<DoubleValue>(MParameterName, "The ratio of features that will be used in the construction of individual trees (0<m<=1)", new DoubleValue(0.5))); 107 if (!Parameters.ContainsKey(SeedParameterName)) 108 Parameters.Add(new FixedValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0))); 109 if (!Parameters.ContainsKey((SetSeedRandomlyParameterName))) 110 Parameters.Add(new FixedValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true))); 111 } 78 112 79 113 public override IDeepCloneable Clone(Cloner cloner) { … … 84 118 protected override void Run() { 85 119 double rmsError, avgRelError, outOfBagRmsError, outOfBagAvgRelError; 86 var solution = CreateRandomForestRegressionSolution(Problem.ProblemData, NumberOfTrees, R, out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError); 120 if (SetSeedRandomly) Seed = new System.Random().Next(); 121 122 var solution = CreateRandomForestRegressionSolution(Problem.ProblemData, NumberOfTrees, R, M, Seed, out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError); 87 123 Results.Add(new Result(RandomForestRegressionModelResultName, "The random forest regression solution.", solution)); 88 124 Results.Add(new Result("Root mean square error", "The root of the mean of squared errors of the random forest regression solution on the training set.", new DoubleValue(rmsError))); … … 92 128 } 93 129 94 public static IRegressionSolution CreateRandomForestRegressionSolution(IRegressionProblemData problemData, int nTrees, double r, 130 public static IRegressionSolution CreateRandomForestRegressionSolution(IRegressionProblemData problemData, int nTrees, double r, double m, int seed, 95 131 out double rmsError, out double avgRelError, out double outOfBagRmsError, out double outOfBagAvgRelError) { 132 if (r <= 0 || r > 1) throw new ArgumentException("The R parameter in the random forest regression must be between 0 and 1."); 133 if (m <= 0 || m > 1) throw new ArgumentException("The M parameter in the random forest regression must be between 0 and 1."); 134 135 alglib.math.rndobject = new System.Random(seed); 136 96 137 Dataset dataset = problemData.Dataset; 97 138 string targetVariable = problemData.TargetVariable; … … 102 143 throw new NotSupportedException("Random forest regression does not support NaN or infinity values in the input dataset."); 103 144 145 int info = 0; 146 alglib.decisionforest dForest = new alglib.decisionforest(); 147 alglib.dfreport rep = new alglib.dfreport(); ; 148 int nRows = inputMatrix.GetLength(0); 149 int nColumns = inputMatrix.GetLength(1); 150 int sampleSize = Math.Max((int)Math.Round(r * nRows), 1); 151 int nFeatures = Math.Max((int)Math.Round(m * (nColumns - 1)), 1); 104 152 105 alglib.decisionforest dforest; 106 alglib.dfreport rep; 107 int nRows = inputMatrix.GetLength(0); 108 109 int info; 110 alglib.dfbuildrandomdecisionforest(inputMatrix, nRows, allowedInputVariables.Count(), 1, nTrees, r, out info, out dforest, out rep); 153 alglib.dforest.dfbuildinternal(inputMatrix, nRows, nColumns - 1, 1, nTrees, sampleSize, nFeatures, alglib.dforest.dfusestrongsplits + alglib.dforest.dfuseevs, ref info, dForest.innerobj, rep.innerobj); 111 154 if (info != 1) throw new ArgumentException("Error in calculation of random forest regression solution"); 112 155 … … 116 159 outOfBagRmsError = rep.oobrmserror; 117 160 118 return new RandomForestRegressionSolution((IRegressionProblemData)problemData.Clone(), new RandomForestModel(d forest, targetVariable, allowedInputVariables));161 return new RandomForestRegressionSolution((IRegressionProblemData)problemData.Clone(), new RandomForestModel(dForest, targetVariable, allowedInputVariables)); 119 162 } 120 163 #endregion -
branches/OaaS/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegressionSolution.cs
r7259 r9363 45 45 public RandomForestRegressionSolution(IRegressionProblemData problemData, IRandomForestModel randomForestModel) 46 46 : base(randomForestModel, problemData) { 47 RecalculateResults();48 47 } 49 48
Note: See TracChangeset
for help on using the changeset viewer.