Changeset 6583 for trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs
- Timestamp:
- 07/21/11 16:28:00 (13 years ago)
- Location:
- trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour
- Files:
-
- 1 added
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs
r6580 r6583 36 36 namespace HeuristicLab.Algorithms.DataAnalysis { 37 37 /// <summary> 38 /// Ne ural network regression data analysis algorithm.38 /// Nearest neighbour classification data analysis algorithm. 39 39 /// </summary> 40 [Item("Ne ural Network Regression", "Neural network regression data analysis algorithm (wrapper for ALGLIB). Further documentation: http://www.alglib.net/dataanalysis/neuralnetworks.php")]40 [Item("Nearest Neighbour Classification", "Nearest neighbour classification data analysis algorithm (wrapper for ALGLIB).")] 41 41 [Creatable("Data Analysis")] 42 42 [StorableClass] 43 public sealed class NeuralNetworkRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> { 44 private const string DecayParameterName = "Decay"; 45 private const string HiddenLayersParameterName = "HiddenLayers"; 46 private const string NodesInFirstHiddenLayerParameterName = "NodesInFirstHiddenLayer"; 47 private const string NodesInSecondHiddenLayerParameterName = "NodesInSecondHiddenLayer"; 48 private const string RestartsParameterName = "Restarts"; 49 private const string NeuralNetworkRegressionModelResultName = "Neural network regression solution"; 43 public sealed class NearestNeighbourClassification : FixedDataAnalysisAlgorithm<IClassificationProblem> { 44 private const string KParameterName = "K"; 45 private const string NearestNeighbourClassificationModelResultName = "Nearest neighbour classification solution"; 50 46 51 47 #region parameter properties 52 public IFixedValueParameter<DoubleValue> DecayParameter { 53 get { return (IFixedValueParameter<DoubleValue>)Parameters[DecayParameterName]; } 54 } 55 public ConstrainedValueParameter<IntValue> HiddenLayersParameter { 56 get { return (ConstrainedValueParameter<IntValue>)Parameters[HiddenLayersParameterName]; } 57 } 58 public IFixedValueParameter<IntValue> NodesInFirstHiddenLayerParameter { 59 get { return (IFixedValueParameter<IntValue>)Parameters[NodesInFirstHiddenLayerParameterName]; } 60 } 61 public IFixedValueParameter<IntValue> NodesInSecondHiddenLayerParameter { 62 get { return (IFixedValueParameter<IntValue>)Parameters[NodesInSecondHiddenLayerParameterName]; } 63 } 64 public IFixedValueParameter<IntValue> RestartsParameter { 65 get { return (IFixedValueParameter<IntValue>)Parameters[RestartsParameterName]; } 48 public IFixedValueParameter<IntValue> KParameter { 49 get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; } 66 50 } 67 51 #endregion 68 69 52 #region properties 70 public double Decay{71 get { return DecayParameter.Value.Value; }53 public int K { 54 get { return KParameter.Value.Value; } 72 55 set { 73 if (value < 0.001 || value > 100) throw new ArgumentException("The decay parameter should be set to a value between 0.001 and 100.", "Decay"); 74 DecayParameter.Value.Value = value; 75 } 76 } 77 public int HiddenLayers { 78 get { return HiddenLayersParameter.Value.Value; } 79 set { 80 if (value < 0 || value > 2) throw new ArgumentException("The number of hidden layers should be set to 0, 1, or 2.", "HiddenLayers"); 81 HiddenLayersParameter.Value = (from v in HiddenLayersParameter.ValidValues 82 where v.Value == value 83 select v) 84 .Single(); 85 } 86 } 87 public int NodesInFirstHiddenLayer { 88 get { return NodesInFirstHiddenLayerParameter.Value.Value; } 89 set { 90 if (value < 1) throw new ArgumentException("The number of nodes in the first hidden layer must be at least one.", "NodesInFirstHiddenLayer"); 91 NodesInFirstHiddenLayerParameter.Value.Value = value; 92 } 93 } 94 public int NodesInSecondHiddenLayer { 95 get { return NodesInSecondHiddenLayerParameter.Value.Value; } 96 set { 97 if (value < 1) throw new ArgumentException("The number of nodes in the first second layer must be at least one.", "NodesInSecondHiddenLayer"); 98 NodesInSecondHiddenLayerParameter.Value.Value = value; 99 } 100 } 101 public int Restarts { 102 get { return RestartsParameter.Value.Value; } 103 set { 104 if (value < 0) throw new ArgumentException("The number of restarts must be positive.", "Restarts"); 105 RestartsParameter.Value.Value = value; 56 if (value <= 0) throw new ArgumentException("K must be larger than zero.", "K"); 57 else KParameter.Value.Value = value; 106 58 } 107 59 } 108 60 #endregion 109 61 110 111 62 [StorableConstructor] 112 private Ne uralNetworkRegression(bool deserializing) : base(deserializing) { }113 private Ne uralNetworkRegression(NeuralNetworkRegression original, Cloner cloner)63 private NearestNeighbourClassification(bool deserializing) : base(deserializing) { } 64 private NearestNeighbourClassification(NearestNeighbourClassification original, Cloner cloner) 114 65 : base(original, cloner) { 115 66 } 116 public Ne uralNetworkRegression()67 public NearestNeighbourClassification() 117 68 : base() { 118 var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { new IntValue(0), new IntValue(1), new IntValue(2) }); 119 var selectedHiddenLayerValue = (from v in validHiddenLayerValues 120 where v.Value == 1 121 select v) 122 .Single(); 123 Parameters.Add(new FixedValueParameter<DoubleValue>(DecayParameterName, "The decay parameter for the training phase of the neural network. This parameter determines the strengh of regularization and should be set to a value between 0.001 (weak regularization) to 100 (very strong regularization). The correct value should be determined via cross-validation.", new DoubleValue(1))); 124 Parameters.Add(new ConstrainedValueParameter<IntValue>(HiddenLayersParameterName, "The number of hidden layers for the neural network (0, 1, or 2)", validHiddenLayerValues, selectedHiddenLayerValue)); 125 Parameters.Add(new FixedValueParameter<IntValue>(NodesInFirstHiddenLayerParameterName, "The number of nodes in the first hidden layer. This value is not used if the number of hidden layers is zero.", new IntValue(10))); 126 Parameters.Add(new FixedValueParameter<IntValue>(NodesInSecondHiddenLayerParameterName, "The number of nodes in the second hidden layer. This value is not used if the number of hidden layers is zero or one.", new IntValue(10))); 127 Parameters.Add(new FixedValueParameter<IntValue>(RestartsParameterName, "The number of restarts for learning.", new IntValue(2))); 128 129 Problem = new RegressionProblem(); 69 Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3))); 70 Problem = new ClassificationProblem(); 130 71 } 131 72 [StorableHook(HookType.AfterDeserialization)] … … 133 74 134 75 public override IDeepCloneable Clone(Cloner cloner) { 135 return new Ne uralNetworkRegression(this, cloner);76 return new NearestNeighbourClassification(this, cloner); 136 77 } 137 78 138 #region ne ural network79 #region nearest neighbour 139 80 protected override void Run() { 140 double rmsError, avgRelError; 141 var solution = CreateNeuralNetworkRegressionSolution(Problem.ProblemData, HiddenLayers, NodesInFirstHiddenLayer, NodesInSecondHiddenLayer, Decay, Restarts, out rmsError, out avgRelError); 142 Results.Add(new Result(NeuralNetworkRegressionModelResultName, "The neural network regression solution.", solution)); 143 Results.Add(new Result("Root mean square error", "The root of the mean of squared errors of the neural network regression solution on the training set.", new DoubleValue(rmsError))); 144 Results.Add(new Result("Average relative error", "The average of relative errors of the neural network regression solution on the training set.", new PercentValue(avgRelError))); 81 var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K); 82 Results.Add(new Result(NearestNeighbourClassificationModelResultName, "The nearest neighbour classification solution.", solution)); 145 83 } 146 84 147 public static IRegressionSolution CreateNeuralNetworkRegressionSolution(IRegressionProblemData problemData, int nLayers, int nHiddenNodes1, int nHiddenNodes2, double decay, int restarts, 148 out double rmsError, out double avgRelError) { 85 public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k) { 149 86 Dataset dataset = problemData.Dataset; 150 87 string targetVariable = problemData.TargetVariable; … … 153 90 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 154 91 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 155 throw new NotSupportedException("Ne ural network regression does not support NaN or infinity values in the input dataset.");92 throw new NotSupportedException("Nearest neighbour classification does not support NaN or infinity values in the input dataset."); 156 93 157 double targetMin = problemData.Dataset.GetEnumeratedVariableValues(targetVariable).Min(); 158 targetMin = targetMin - targetMin * 0.1; // -10% 159 double targetMax = problemData.Dataset.GetEnumeratedVariableValues(targetVariable).Max(); 160 targetMax = targetMax + targetMax * 0.1; // + 10% 94 alglib.nearestneighbor.kdtree kdtree = new alglib.nearestneighbor.kdtree(); 161 95 162 alglib.multilayerperceptron multiLayerPerceptron = null;163 if (nLayers == 0) {164 alglib.mlpcreater0(allowedInputVariables.Count(), 1, targetMin, targetMax, out multiLayerPerceptron);165 } else if (nLayers == 1) {166 alglib.mlpcreater1(allowedInputVariables.Count(), nHiddenNodes1, 1, targetMin, targetMax, out multiLayerPerceptron);167 } else if (nLayers == 2) {168 alglib.mlpcreater2(allowedInputVariables.Count(), nHiddenNodes1, nHiddenNodes2, 1, targetMin, targetMax, out multiLayerPerceptron);169 } else throw new ArgumentException("Number of layers must be zero, one, or two.", "nLayers");170 alglib.mlpreport rep;171 96 int nRows = inputMatrix.GetLength(0); 97 int nFeatures = inputMatrix.GetLength(1) - 1; 98 double[] classValues = dataset.GetVariableValues(targetVariable).Distinct().OrderBy(x => x).ToArray(); 99 int nClasses = classValues.Count(); 100 // map original class values to values [0..nClasses-1] 101 Dictionary<double, double> classIndizes = new Dictionary<double, double>(); 102 for (int i = 0; i < nClasses; i++) { 103 classIndizes[classValues[i]] = i; 104 } 105 for (int row = 0; row < nRows; row++) { 106 inputMatrix[row, nFeatures] = classIndizes[inputMatrix[row, nFeatures]]; 107 } 108 alglib.nearestneighbor.kdtreebuild(inputMatrix, nRows, inputMatrix.GetLength(1) - 1, 1, 2, kdtree); 172 109 173 int info; 174 // using mlptrainlm instead of mlptraines or mlptrainbfgs because only one parameter is necessary 175 alglib.mlptrainlm(multiLayerPerceptron, inputMatrix, nRows, decay, restarts, out info, out rep); 176 if (info != 2) throw new ArgumentException("Error in calculation of neural network regression solution"); 177 178 rmsError = alglib.mlprmserror(multiLayerPerceptron, inputMatrix, nRows); 179 avgRelError = alglib.mlpavgrelerror(multiLayerPerceptron, inputMatrix, nRows); 180 181 return new NeuralNetworkRegressionSolution(problemData, new NeuralNetworkModel(multiLayerPerceptron, targetVariable, allowedInputVariables)); 110 return new NearestNeighbourClassificationSolution(problemData, new NearestNeighbourModel(kdtree, k, targetVariable, allowedInputVariables, problemData.ClassValues.ToArray())); 182 111 } 183 112 #endregion
Note: See TracChangeset
for help on using the changeset viewer.