Changeset 9363 for branches/OaaS/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs
- Timestamp:
- 04/16/13 13:13:41 (11 years ago)
- Location:
- branches/OaaS
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/OaaS
- Property svn:ignore
-
old new 21 21 protoc.exe 22 22 _ReSharper.HeuristicLab 3.3 Tests 23 Google.ProtocolBuffers-2.4.1.473.dll 23 24 packages
-
- Property svn:mergeinfo changed
- Property svn:ignore
-
branches/OaaS/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
-
branches/OaaS/HeuristicLab.Algorithms.DataAnalysis/3.4
- Property svn:ignore
-
old new 5 5 *.vs10x 6 6 Plugin.cs 7 *.user
-
- Property svn:ignore
-
branches/OaaS/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs
r8139 r9363 21 21 22 22 using System; 23 using System.Collections.Generic;24 23 using System.Linq; 25 24 using HeuristicLab.Common; 26 25 using HeuristicLab.Core; 27 26 using HeuristicLab.Data; 28 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;29 27 using HeuristicLab.Optimization; 28 using HeuristicLab.Parameters; 30 29 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 31 30 using HeuristicLab.Problems.DataAnalysis; 32 using HeuristicLab.Problems.DataAnalysis.Symbolic;33 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;34 using HeuristicLab.Parameters;35 31 36 32 namespace HeuristicLab.Algorithms.DataAnalysis { … … 84 80 85 81 public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k) { 86 Dataset dataset = problemData.Dataset; 87 string targetVariable = problemData.TargetVariable; 88 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 89 IEnumerable<int> rows = problemData.TrainingIndices; 90 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 91 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 92 throw new NotSupportedException("Nearest neighbour classification does not support NaN or infinity values in the input dataset."); 82 var problemDataClone = (IClassificationProblemData)problemData.Clone(); 83 return new NearestNeighbourClassificationSolution(problemDataClone, Train(problemDataClone, k)); 84 } 93 85 94 alglib.nearestneighbor.kdtree kdtree = new alglib.nearestneighbor.kdtree(); 95 96 int nRows = inputMatrix.GetLength(0); 97 int nFeatures = inputMatrix.GetLength(1) - 1; 98 double[] classValues = dataset.GetDoubleValues(targetVariable).Distinct().OrderBy(x => x).ToArray(); 99 int nClasses = classValues.Count(); 100 // map original class values to values [0..nClasses-1] 101 Dictionary<double, double> classIndices = new Dictionary<double, double>(); 102 for (int i = 0; i < nClasses; i++) { 103 classIndices[classValues[i]] = i; 104 } 105 for (int row = 0; row < nRows; row++) { 106 inputMatrix[row, nFeatures] = classIndices[inputMatrix[row, nFeatures]]; 107 } 108 alglib.nearestneighbor.kdtreebuild(inputMatrix, nRows, inputMatrix.GetLength(1) - 1, 1, 2, kdtree); 109 var problemDataClone = (IClassificationProblemData) problemData.Clone(); 110 return new NearestNeighbourClassificationSolution(problemDataClone, new NearestNeighbourModel(kdtree, k, targetVariable, allowedInputVariables, problemDataClone.ClassValues.ToArray())); 86 public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k) { 87 return new NearestNeighbourModel(problemData.Dataset, 88 problemData.TrainingIndices, 89 k, 90 problemData.TargetVariable, 91 problemData.AllowedInputVariables, 92 problemData.ClassValues.ToArray()); 111 93 } 112 94 #endregion
Note: See TracChangeset
for help on using the changeset viewer.