Changeset 17164 for stable/HeuristicLab.Algorithms.DataAnalysis/3.4
- Timestamp:
- 07/23/19 20:56:15 (5 years ago)
- Location:
- stable
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
stable
-
stable/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
/trunk/HeuristicLab.Algorithms.DataAnalysis merged: 16491
- Property svn:mergeinfo changed
-
stable/HeuristicLab.Algorithms.DataAnalysis/3.4
- Property svn:mergeinfo changed
/trunk/HeuristicLab.Algorithms.DataAnalysis/3.4 merged: 16491
- Property svn:mergeinfo changed
-
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaModel.cs
r17097 r17164 65 65 66 66 var ds = ReduceDataset(dataset, rows); 67 nnModel = new NearestNeighbourModel(ds, Enumerable.Range(0, ds.Rows), k, ds.VariableNames.Last(), ds.VariableNames.Take(transformationMatrix.GetLength(1)), classValues: classValues);67 nnModel = new NearestNeighbourModel(ds, Enumerable.Range(0, ds.Rows), k, false, ds.VariableNames.Last(), ds.VariableNames.Take(transformationMatrix.GetLength(1)), classValues: classValues); 68 68 } 69 69 -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs
r17097 r17164 1 #region License Information1 #region License Information 2 2 /* HeuristicLab 3 3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL) … … 42 42 private const string NearestNeighbourClassificationModelResultName = "Nearest neighbour classification solution"; 43 43 private const string WeightsParameterName = "Weights"; 44 44 private const string SelfMatchParameterName = "SelfMatch"; 45 45 46 46 #region parameter properties 47 47 public IFixedValueParameter<IntValue> KParameter { 48 48 get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; } 49 } 50 public IFixedValueParameter<BoolValue> SelfMatchParameter { 51 get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; } 49 52 } 50 53 public IValueParameter<DoubleArray> WeightsParameter { … … 53 56 #endregion 54 57 #region properties 58 public bool SelfMatch { 59 get { return SelfMatchParameter.Value.Value; } 60 set { SelfMatchParameter.Value.Value = value; } 61 } 55 62 public int K { 56 63 get { return KParameter.Value.Value; } … … 73 80 public NearestNeighbourClassification() 74 81 : base() { 82 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false))); 75 83 Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3))); 76 84 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); … … 83 91 if (!Parameters.ContainsKey(WeightsParameterName)) { 84 92 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 93 } 94 if (!Parameters.ContainsKey(SelfMatchParameterName)) { 95 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false))); 85 96 } 86 97 #endregion … … 95 106 double[] weights = null; 96 107 if (Weights != null) weights = Weights.CloneAsArray(); 97 var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, weights);108 var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, SelfMatch, weights); 98 109 Results.Add(new Result(NearestNeighbourClassificationModelResultName, "The nearest neighbour classification solution.", solution)); 99 110 } 100 111 101 public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, double[] weights = null) {112 public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) { 102 113 var problemDataClone = (IClassificationProblemData)problemData.Clone(); 103 return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, weights), problemDataClone);114 return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, selfMatch, weights), problemDataClone); 104 115 } 105 116 106 public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, double[] weights = null) {117 public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) { 107 118 return new NearestNeighbourModel(problemData.Dataset, 108 119 problemData.TrainingIndices, 109 120 k, 121 selfMatch, 110 122 problemData.TargetVariable, 111 123 problemData.AllowedInputVariables, -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs
r17097 r17164 1 #region License Information1 #region License Information 2 2 /* HeuristicLab 3 3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL) … … 37 37 38 38 private readonly object kdTreeLockObject = new object(); 39 39 40 private alglib.nearestneighbor.kdtree kdTree; 40 41 public alglib.nearestneighbor.kdtree KDTree { … … 49 50 } 50 51 51 52 52 public override IEnumerable<string> VariablesUsedForPrediction { 53 53 get { return allowedInputVariables; } … … 60 60 [Storable] 61 61 private int k; 62 [Storable(DefaultValue = false)] 63 private bool selfMatch; 62 64 [Storable(DefaultValue = null)] 63 65 private double[] weights; // not set for old versions loaded from disk … … 95 97 kdTree.x = (double[])original.kdTree.x.Clone(); 96 98 kdTree.xy = (double[,])original.kdTree.xy.Clone(); 97 99 selfMatch = original.selfMatch; 98 100 k = original.k; 99 101 isCompatibilityLoaded = original.IsCompatibilityLoaded; … … 108 110 this.classValues = (double[])original.classValues.Clone(); 109 111 } 110 public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)112 public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, bool selfMatch, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null) 111 113 : base(targetVariable) { 112 114 Name = ItemName; 113 115 Description = ItemDescription; 116 this.selfMatch = selfMatch; 114 117 this.k = k; 115 118 this.allowedInputVariables = allowedInputVariables.ToArray(); … … 130 133 .Select(name => { 131 134 var pop = dataset.GetDoubleValues(name, rows).StandardDeviationPop(); 132 return pop.IsAlmost(0) ? 1.0 : 1.0/pop;135 return pop.IsAlmost(0) ? 1.0 : 1.0 / pop; 133 136 }) 134 137 .Concat(new double[] { 1.0 }) // no scaling for target variable … … 199 202 int numNeighbours; 200 203 lock (kdTreeLockObject) { // gkronber: the following calls change the kdTree data structure 201 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);204 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch); 202 205 alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists); 203 206 alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours); 204 207 } 205 208 if (selfMatch) { 209 // weights for neighbours are 1/d. 210 // override distances (=0) of exact matches using 1% of the distance of the next closest non-self-match neighbour -> selfmatches weight 100x more than the next closest neighbor. 211 // if all k neighbours are selfmatches then they all have weight 0.01. 212 double minDist = dists[0] + 1; 213 for (int i = 0; i < numNeighbours; i++) { 214 if ((minDist > dists[i]) && (dists[i] != 0)) { 215 minDist = dists[i]; 216 } 217 } 218 minDist /= 100.0; 219 for (int i = 0; i < numNeighbours; i++) { 220 if (dists[i] == 0) { 221 dists[i] = minDist; 222 } 223 } 224 } 206 225 double distanceWeightedValue = 0.0; 207 226 double distsSum = 0.0; … … 236 255 lock (kdTreeLockObject) { 237 256 // gkronber: the following calls change the kdTree data structure 238 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);257 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch); 239 258 alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists); 240 259 alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours); -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs
r17097 r17164 41 41 private const string NearestNeighbourRegressionModelResultName = "Nearest neighbour regression solution"; 42 42 private const string WeightsParameterName = "Weights"; 43 private const string SelfMatchParameterName = "SelfMatch"; 43 44 44 45 #region parameter properties … … 46 47 get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; } 47 48 } 48 49 public IFixedValueParameter<BoolValue> SelfMatchParameter { 50 get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; } 51 } 49 52 public IValueParameter<DoubleArray> WeightsParameter { 50 53 get { return (IValueParameter<DoubleArray>)Parameters[WeightsParameterName]; } … … 59 62 } 60 63 } 61 64 public bool SelfMatch { 65 get { return SelfMatchParameter.Value.Value; } 66 set { SelfMatchParameter.Value.Value = value; } 67 } 62 68 public DoubleArray Weights { 63 69 get { return WeightsParameter.Value; } … … 75 81 Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3))); 76 82 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 83 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false))); 77 84 Problem = new RegressionProblem(); 78 85 } … … 84 91 if (!Parameters.ContainsKey(WeightsParameterName)) { 85 92 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 93 } 94 if (!Parameters.ContainsKey(SelfMatchParameterName)) { 95 Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false))); 86 96 } 87 97 #endregion … … 96 106 double[] weights = null; 97 107 if (Weights != null) weights = Weights.CloneAsArray(); 98 var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, weights);108 var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, SelfMatch, weights); 99 109 Results.Add(new Result(NearestNeighbourRegressionModelResultName, "The nearest neighbour regression solution.", solution)); 100 110 } 101 111 102 public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, double[] weights = null) {112 public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, bool selfMatch = false, double[] weights = null) { 103 113 var clonedProblemData = (IRegressionProblemData)problemData.Clone(); 104 return new NearestNeighbourRegressionSolution(Train(problemData, k, weights), clonedProblemData);114 return new NearestNeighbourRegressionSolution(Train(problemData, k, selfMatch, weights), clonedProblemData); 105 115 } 106 116 107 public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, double[] weights = null) {117 public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, bool selfMatch = false, double[] weights = null) { 108 118 return new NearestNeighbourModel(problemData.Dataset, 109 119 problemData.TrainingIndices, 110 120 k, 121 selfMatch, 111 122 problemData.TargetVariable, 112 123 problemData.AllowedInputVariables,
Note: See TracChangeset
for help on using the changeset viewer.