Changeset 16538 for branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour
- Timestamp:
- 01/18/19 14:39:01 (6 years ago)
- Location:
- branches/2847_M5Regression
- Files:
-
- 7 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
-
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4
- Property svn:mergeinfo changed
-
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs
r14523 r16538 1 #region License Information1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 6Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 42 42 private const string NearestNeighbourClassificationModelResultName = "Nearest neighbour classification solution"; 43 43 private const string WeightsParameterName = "Weights"; 44 44 private const string SelfMatchParameterName = "SelfMatch"; 45 45 46 46 #region parameter properties 47 47 public IFixedValueParameter<IntValue> KParameter { 48 48 get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; } 49 } 50 public IFixedValueParameter<BoolValue> SelfMatchParameter { 51 get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; } 49 52 } 50 53 public IValueParameter<DoubleArray> WeightsParameter { … … 53 56 #endregion 54 57 #region properties 58 public bool SelfMatch { 59 get { return SelfMatchParameter.Value.Value; } 60 set { SelfMatchParameter.Value.Value = value; } 61 } 55 62 public int K { 56 63 get { return KParameter.Value.Value; } … … 73 80 public NearestNeighbourClassification() 74 81 : base() { 82 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false))); 75 83 Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3))); 76 84 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); … … 83 91 if (!Parameters.ContainsKey(WeightsParameterName)) { 84 92 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 93 } 94 if (!Parameters.ContainsKey(SelfMatchParameterName)) { 95 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false))); 85 96 } 86 97 #endregion … … 95 106 double[] weights = null; 96 107 if (Weights != null) weights = Weights.CloneAsArray(); 97 var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, weights);108 var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, SelfMatch, weights); 98 109 Results.Add(new Result(NearestNeighbourClassificationModelResultName, "The nearest neighbour classification solution.", solution)); 99 110 } 100 111 101 public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, double[] weights = null) {112 public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) { 102 113 var problemDataClone = (IClassificationProblemData)problemData.Clone(); 103 return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, weights), problemDataClone);114 return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, selfMatch, weights), problemDataClone); 104 115 } 105 116 106 public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, double[] weights = null) {117 public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) { 107 118 return new NearestNeighbourModel(problemData.Dataset, 108 119 problemData.TrainingIndices, 109 120 k, 121 selfMatch, 110 122 problemData.TargetVariable, 111 123 problemData.AllowedInputVariables, -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassificationSolution.cs
r14185 r16538 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 6Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs
r14843 r16538 1 #region License Information1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 6Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 37 37 38 38 private readonly object kdTreeLockObject = new object(); 39 39 40 private alglib.nearestneighbor.kdtree kdTree; 40 41 public alglib.nearestneighbor.kdtree KDTree { … … 49 50 } 50 51 51 52 52 public override IEnumerable<string> VariablesUsedForPrediction { 53 53 get { return allowedInputVariables; } … … 60 60 [Storable] 61 61 private int k; 62 [Storable(DefaultValue = false)] 63 private bool selfMatch; 62 64 [Storable(DefaultValue = null)] 63 65 private double[] weights; // not set for old versions loaded from disk … … 97 99 kdTree.x = (double[])original.kdTree.x.Clone(); 98 100 kdTree.xy = (double[,])original.kdTree.xy.Clone(); 99 101 selfMatch = original.selfMatch; 100 102 k = original.k; 101 103 isCompatibilityLoaded = original.IsCompatibilityLoaded; … … 110 112 this.classValues = (double[])original.classValues.Clone(); 111 113 } 112 public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)114 public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, bool selfMatch, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null) 113 115 : base(targetVariable) { 114 116 Name = ItemName; 115 117 Description = ItemDescription; 118 this.selfMatch = selfMatch; 116 119 this.k = k; 117 120 this.allowedInputVariables = allowedInputVariables.ToArray(); … … 130 133 // automatic determination of weights (all features should have variance = 1) 131 134 this.weights = this.allowedInputVariables 132 .Select(name => 1.0 / dataset.GetDoubleValues(name, rows).StandardDeviationPop()) 135 .Select(name => { 136 var pop = dataset.GetDoubleValues(name, rows).StandardDeviationPop(); 137 return pop.IsAlmost(0) ? 1.0 : 1.0 / pop; 138 }) 133 139 .Concat(new double[] { 1.0 }) // no scaling for target variable 134 140 .ToArray(); … … 142 148 } 143 149 144 if (inputMatrix.C ast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))150 if (inputMatrix.ContainsNanOrInfinity()) 145 151 throw new NotSupportedException( 146 152 "Nearest neighbour model does not support NaN or infinity values in the input dataset."); … … 198 204 int numNeighbours; 199 205 lock (kdTreeLockObject) { // gkronber: the following calls change the kdTree data structure 200 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);206 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch); 201 207 alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists); 202 208 alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours); 203 209 } 204 210 if (selfMatch) { 211 // weights for neighbours are 1/d. 212 // override distances (=0) of exact matches using 1% of the distance of the next closest non-self-match neighbour -> selfmatches weight 100x more than the next closest neighbor. 213 // if all k neighbours are selfmatches then they all have weight 0.01. 214 double minDist = dists[0] + 1; 215 for (int i = 0; i < numNeighbours; i++) { 216 if ((minDist > dists[i]) && (dists[i] != 0)) { 217 minDist = dists[i]; 218 } 219 } 220 minDist /= 100.0; 221 for (int i = 0; i < numNeighbours; i++) { 222 if (dists[i] == 0) { 223 dists[i] = minDist; 224 } 225 } 226 } 205 227 double distanceWeightedValue = 0.0; 206 228 double distsSum = 0.0; … … 235 257 lock (kdTreeLockObject) { 236 258 // gkronber: the following calls change the kdTree data structure 237 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);259 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch); 238 260 alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists); 239 261 alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours); … … 259 281 260 282 283 public bool IsProblemDataCompatible(IRegressionProblemData problemData, out string errorMessage) { 284 return RegressionModel.IsProblemDataCompatible(this, problemData, out errorMessage); 285 } 286 287 public override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) { 288 if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null."); 289 290 var regressionProblemData = problemData as IRegressionProblemData; 291 if (regressionProblemData != null) 292 return IsProblemDataCompatible(regressionProblemData, out errorMessage); 293 294 var classificationProblemData = problemData as IClassificationProblemData; 295 if (classificationProblemData != null) 296 return IsProblemDataCompatible(classificationProblemData, out errorMessage); 297 298 throw new ArgumentException("The problem data is not a regression nor a classification problem data. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData"); 299 } 300 261 301 IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) { 262 302 return new NearestNeighbourRegressionSolution(this, new RegressionProblemData(problemData)); -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs
r14523 r16538 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 6Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab. … … 41 41 private const string NearestNeighbourRegressionModelResultName = "Nearest neighbour regression solution"; 42 42 private const string WeightsParameterName = "Weights"; 43 private const string SelfMatchParameterName = "SelfMatch"; 43 44 44 45 #region parameter properties … … 46 47 get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; } 47 48 } 48 49 public IFixedValueParameter<BoolValue> SelfMatchParameter { 50 get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; } 51 } 49 52 public IValueParameter<DoubleArray> WeightsParameter { 50 53 get { return (IValueParameter<DoubleArray>)Parameters[WeightsParameterName]; } … … 59 62 } 60 63 } 61 64 public bool SelfMatch { 65 get { return SelfMatchParameter.Value.Value; } 66 set { SelfMatchParameter.Value.Value = value; } 67 } 62 68 public DoubleArray Weights { 63 69 get { return WeightsParameter.Value; } … … 75 81 Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3))); 76 82 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 83 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false))); 77 84 Problem = new RegressionProblem(); 78 85 } … … 84 91 if (!Parameters.ContainsKey(WeightsParameterName)) { 85 92 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 93 } 94 if (!Parameters.ContainsKey(SelfMatchParameterName)) { 95 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false))); 86 96 } 87 97 #endregion … … 96 106 double[] weights = null; 97 107 if (Weights != null) weights = Weights.CloneAsArray(); 98 var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, weights);108 var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, SelfMatch, weights); 99 109 Results.Add(new Result(NearestNeighbourRegressionModelResultName, "The nearest neighbour regression solution.", solution)); 100 110 } 101 111 102 public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, double[] weights = null) {112 public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, bool selfMatch = false, double[] weights = null) { 103 113 var clonedProblemData = (IRegressionProblemData)problemData.Clone(); 104 return new NearestNeighbourRegressionSolution(Train(problemData, k, weights), clonedProblemData);114 return new NearestNeighbourRegressionSolution(Train(problemData, k, selfMatch, weights), clonedProblemData); 105 115 } 106 116 107 public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, double[] weights = null) {117 public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, bool selfMatch = false, double[] weights = null) { 108 118 return new NearestNeighbourModel(problemData.Dataset, 109 119 problemData.TrainingIndices, 110 120 k, 121 selfMatch, 111 122 problemData.TargetVariable, 112 123 problemData.AllowedInputVariables, -
branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegressionSolution.cs
r14185 r16538 1 1 #region License Information 2 2 /* HeuristicLab 3 * Copyright (C) 2002-201 6Heuristic and Evolutionary Algorithms Laboratory (HEAL)3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 4 * 5 5 * This file is part of HeuristicLab.
Note: See TracChangeset
for help on using the changeset viewer.