- Timestamp:
- 01/03/19 18:11:05 (6 years ago)
- Location:
- trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs
r15583 r16491 1 #region License Information1 #region License Information 2 2 /* HeuristicLab 3 3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) … … 42 42 private const string NearestNeighbourClassificationModelResultName = "Nearest neighbour classification solution"; 43 43 private const string WeightsParameterName = "Weights"; 44 44 private const string SelfMatchParameterName = "SelfMatch"; 45 45 46 46 #region parameter properties 47 47 public IFixedValueParameter<IntValue> KParameter { 48 48 get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; } 49 } 50 public IFixedValueParameter<BoolValue> SelfMatchParameter { 51 get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; } 49 52 } 50 53 public IValueParameter<DoubleArray> WeightsParameter { … … 53 56 #endregion 54 57 #region properties 58 public bool SelfMatch { 59 get { return SelfMatchParameter.Value.Value; } 60 set { SelfMatchParameter.Value.Value = value; } 61 } 55 62 public int K { 56 63 get { return KParameter.Value.Value; } … … 73 80 public NearestNeighbourClassification() 74 81 : base() { 82 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false))); 75 83 Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3))); 76 84 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); … … 83 91 if (!Parameters.ContainsKey(WeightsParameterName)) { 84 92 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 93 } 94 if (!Parameters.ContainsKey(SelfMatchParameterName)) { 95 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false))); 85 96 } 86 97 #endregion … … 95 106 double[] weights = null; 96 107 if (Weights != null) weights = Weights.CloneAsArray(); 97 var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, weights);108 var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, SelfMatch, weights); 98 109 Results.Add(new Result(NearestNeighbourClassificationModelResultName, "The nearest neighbour classification solution.", solution)); 99 110 } 100 111 101 public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, double[] weights = null) {112 public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) { 102 113 var problemDataClone = (IClassificationProblemData)problemData.Clone(); 103 return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, weights), problemDataClone);114 return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, selfMatch, weights), problemDataClone); 104 115 } 105 116 106 public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, double[] weights = null) {117 public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) { 107 118 return new NearestNeighbourModel(problemData.Dataset, 108 119 problemData.TrainingIndices, 109 120 k, 121 selfMatch, 110 122 problemData.TargetVariable, 111 123 problemData.AllowedInputVariables, -
trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs
r16243 r16491 1 #region License Information1 #region License Information 2 2 /* HeuristicLab 3 3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) … … 37 37 38 38 private readonly object kdTreeLockObject = new object(); 39 39 40 private alglib.nearestneighbor.kdtree kdTree; 40 41 public alglib.nearestneighbor.kdtree KDTree { … … 49 50 } 50 51 51 52 52 public override IEnumerable<string> VariablesUsedForPrediction { 53 53 get { return allowedInputVariables; } … … 60 60 [Storable] 61 61 private int k; 62 [Storable(DefaultValue = false)] 63 private bool selfMatch; 62 64 [Storable(DefaultValue = null)] 63 65 private double[] weights; // not set for old versions loaded from disk … … 97 99 kdTree.x = (double[])original.kdTree.x.Clone(); 98 100 kdTree.xy = (double[,])original.kdTree.xy.Clone(); 99 101 selfMatch = original.selfMatch; 100 102 k = original.k; 101 103 isCompatibilityLoaded = original.IsCompatibilityLoaded; … … 110 112 this.classValues = (double[])original.classValues.Clone(); 111 113 } 112 public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)114 public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, bool selfMatch, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null) 113 115 : base(targetVariable) { 114 116 Name = ItemName; 115 117 Description = ItemDescription; 118 this.selfMatch = selfMatch; 116 119 this.k = k; 117 120 this.allowedInputVariables = allowedInputVariables.ToArray(); … … 132 135 .Select(name => { 133 136 var pop = dataset.GetDoubleValues(name, rows).StandardDeviationPop(); 134 return pop.IsAlmost(0) ? 1.0 : 1.0/pop;137 return pop.IsAlmost(0) ? 1.0 : 1.0 / pop; 135 138 }) 136 139 .Concat(new double[] { 1.0 }) // no scaling for target variable … … 201 204 int numNeighbours; 202 205 lock (kdTreeLockObject) { // gkronber: the following calls change the kdTree data structure 203 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);206 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch); 204 207 alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists); 205 208 alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours); 206 209 } 207 210 if (selfMatch) { 211 // weights for neighbours are 1/d. 212 // override distances (=0) of exact matches using 1% of the distance of the next closest non-self-match neighbour -> selfmatches weight 100x more than the next closest neighbor. 213 // if all k neighbours are selfmatches then they all have weight 0.01. 214 double minDist = dists[0] + 1; 215 for (int i = 0; i < numNeighbours; i++) { 216 if ((minDist > dists[i]) && (dists[i] != 0)) { 217 minDist = dists[i]; 218 } 219 } 220 minDist /= 100.0; 221 for (int i = 0; i < numNeighbours; i++) { 222 if (dists[i] == 0) { 223 dists[i] = minDist; 224 } 225 } 226 } 208 227 double distanceWeightedValue = 0.0; 209 228 double distsSum = 0.0; … … 238 257 lock (kdTreeLockObject) { 239 258 // gkronber: the following calls change the kdTree data structure 240 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);259 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch); 241 260 alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists); 242 261 alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours); -
trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs
r15583 r16491 41 41 private const string NearestNeighbourRegressionModelResultName = "Nearest neighbour regression solution"; 42 42 private const string WeightsParameterName = "Weights"; 43 private const string SelfMatchParameterName = "SelfMatch"; 43 44 44 45 #region parameter properties … … 46 47 get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; } 47 48 } 48 49 public IFixedValueParameter<BoolValue> SelfMatchParameter { 50 get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; } 51 } 49 52 public IValueParameter<DoubleArray> WeightsParameter { 50 53 get { return (IValueParameter<DoubleArray>)Parameters[WeightsParameterName]; } … … 59 62 } 60 63 } 61 64 public bool SelfMatch { 65 get { return SelfMatchParameter.Value.Value; } 66 set { SelfMatchParameter.Value.Value = value; } 67 } 62 68 public DoubleArray Weights { 63 69 get { return WeightsParameter.Value; } … … 75 81 Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3))); 76 82 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 83 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false))); 77 84 Problem = new RegressionProblem(); 78 85 } … … 84 91 if (!Parameters.ContainsKey(WeightsParameterName)) { 85 92 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 93 } 94 if (!Parameters.ContainsKey(SelfMatchParameterName)) { 95 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false))); 86 96 } 87 97 #endregion … … 96 106 double[] weights = null; 97 107 if (Weights != null) weights = Weights.CloneAsArray(); 98 var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, weights);108 var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, SelfMatch, weights); 99 109 Results.Add(new Result(NearestNeighbourRegressionModelResultName, "The nearest neighbour regression solution.", solution)); 100 110 } 101 111 102 public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, double[] weights = null) {112 public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, bool selfMatch = false, double[] weights = null) { 103 113 var clonedProblemData = (IRegressionProblemData)problemData.Clone(); 104 return new NearestNeighbourRegressionSolution(Train(problemData, k, weights), clonedProblemData);114 return new NearestNeighbourRegressionSolution(Train(problemData, k, selfMatch, weights), clonedProblemData); 105 115 } 106 116 107 public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, double[] weights = null) {117 public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, bool selfMatch = false, double[] weights = null) { 108 118 return new NearestNeighbourModel(problemData.Dataset, 109 119 problemData.TrainingIndices, 110 120 k, 121 selfMatch, 111 122 problemData.TargetVariable, 112 123 problemData.AllowedInputVariables,
Note: See TracChangeset
for help on using the changeset viewer.