Changeset 14239 for branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour
- Timestamp:
- 08/05/16 17:40:11 (8 years ago)
- Location:
- branches/symbreg-factors-2650
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/symbreg-factors-2650
- Property svn:mergeinfo changed
/trunk/sources (added) merged: 14234-14236
- Property svn:mergeinfo changed
-
branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
/trunk/sources/HeuristicLab.Algorithms.DataAnalysis (added) merged: 14235-14236
- Property svn:mergeinfo changed
-
branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs
r14185 r14239 40 40 private const string KParameterName = "K"; 41 41 private const string NearestNeighbourClassificationModelResultName = "Nearest neighbour classification solution"; 42 private const string WeightsParameterName = "Weights"; 43 42 44 43 45 #region parameter properties 44 46 public IFixedValueParameter<IntValue> KParameter { 45 47 get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; } 48 } 49 public IValueParameter<DoubleArray> WeightsParameter { 50 get { return (IValueParameter<DoubleArray>)Parameters[WeightsParameterName]; } 46 51 } 47 52 #endregion … … 53 58 else KParameter.Value.Value = value; 54 59 } 60 } 61 public DoubleArray Weights { 62 get { return WeightsParameter.Value; } 63 set { WeightsParameter.Value = value; } 55 64 } 56 65 #endregion … … 64 73 : base() { 65 74 Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3))); 75 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 66 76 Problem = new ClassificationProblem(); 67 77 } 68 78 [StorableHook(HookType.AfterDeserialization)] 69 private void AfterDeserialization() { } 79 private void AfterDeserialization() { 80 // BackwardsCompatibility3.3 81 #region Backwards compatible code, remove with 3.4 82 if (!Parameters.ContainsKey(WeightsParameterName)) { 83 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 84 } 85 #endregion 86 } 70 87 71 88 public override IDeepCloneable Clone(Cloner cloner) { … … 75 92 #region nearest neighbour 76 93 protected override void Run() { 77 var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K); 94 double[] weights = null; 95 if (Weights != null) weights = Weights.CloneAsArray(); 96 var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, weights); 78 97 Results.Add(new Result(NearestNeighbourClassificationModelResultName, "The nearest neighbour classification solution.", solution)); 79 98 } 80 99 81 public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k ) {100 public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, double[] weights = null) { 82 101 var problemDataClone = (IClassificationProblemData)problemData.Clone(); 83 return new NearestNeighbourClassificationSolution(Train(problemDataClone, k ), problemDataClone);102 return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, weights), problemDataClone); 84 103 } 85 104 86 public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k ) {105 public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, double[] weights = null) { 87 106 return new NearestNeighbourModel(problemData.Dataset, 88 107 problemData.TrainingIndices, … … 90 109 problemData.TargetVariable, 91 110 problemData.AllowedInputVariables, 111 weights, 92 112 problemData.ClassValues.ToArray()); 93 113 } -
branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs
r14238 r14239 58 58 [Storable] 59 59 private int k; 60 [Storable(DefaultValue = null)] 61 private double[] weights; // not set for old versions loaded from disk 62 [Storable(DefaultValue = null)] 63 private double[] offsets; // not set for old versions loaded from disk 60 64 61 65 [StorableConstructor] … … 93 97 94 98 k = original.k; 99 isCompatibilityLoaded = original.IsCompatibilityLoaded; 100 if (!IsCompatibilityLoaded) { 101 weights = new double[original.weights.Length]; 102 Array.Copy(original.weights, weights, weights.Length); 103 offsets = new double[original.offsets.Length]; 104 Array.Copy(original.offsets, this.offsets, this.offsets.Length); 105 } 95 106 allowedInputVariables = (string[])original.allowedInputVariables.Clone(); 96 107 if (original.classValues != null) 97 108 this.classValues = (double[])original.classValues.Clone(); 98 109 } 99 public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues = null)110 public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null) 100 111 : base(targetVariable) { 101 112 Name = ItemName; … … 103 114 this.k = k; 104 115 this.allowedInputVariables = allowedInputVariables.ToArray(); 105 106 var inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, 107 allowedInputVariables.Concat(new string[] { targetVariable }), 108 rows); 116 double[,] inputMatrix; 117 if (IsCompatibilityLoaded) { 118 // no scaling 119 inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, 120 this.allowedInputVariables.Concat(new string[] { targetVariable }), 121 rows); 122 } else { 123 this.offsets = this.allowedInputVariables 124 .Select(name => dataset.GetDoubleValues(name, rows).Average() * -1) 125 .Concat(new double[] { 0 }) // no offset for target variable 126 .ToArray(); 127 if (weights == null) { 128 // automatic determination of weights (all features should have variance = 1) 129 this.weights = this.allowedInputVariables 130 .Select(name => 1.0 / dataset.GetDoubleValues(name, rows).StandardDeviationPop()) 131 .Concat(new double[] { 1.0 }) // no scaling for target variable 132 .ToArray(); 133 } else { 134 // user specified weights (+ 1 for target) 135 this.weights = weights.Concat(new double[] { 1.0 }).ToArray(); 136 if (this.weights.Length - 1 != this.allowedInputVariables.Length) 137 throw new ArgumentException("The number of elements in the weight vector must match the number of input variables"); 138 } 139 inputMatrix = CreateScaledData(dataset, this.allowedInputVariables.Concat(new string[] { targetVariable }), rows, this.offsets, this.weights); 140 } 109 141 110 142 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) … … 132 164 } 133 165 166 private static double[,] CreateScaledData(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows, double[] offsets, double[] factors) { 167 var x = new double[rows.Count(), variables.Count()]; 168 var colIdx = 0; 169 foreach (var variableName in variables) { 170 var rowIdx = 0; 171 foreach (var val in dataset.GetDoubleValues(variableName, rows)) { 172 x[rowIdx, colIdx] = (val + offsets[colIdx]) * factors[colIdx]; 173 rowIdx++; 174 } 175 colIdx++; 176 } 177 return x; 178 } 179 134 180 public override IDeepCloneable Clone(Cloner cloner) { 135 181 return new NearestNeighbourModel(this, cloner); … … 137 183 138 184 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 139 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 185 double[,] inputData; 186 if (IsCompatibilityLoaded) { 187 inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 188 } else { 189 inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights); 190 } 140 191 141 192 int n = inputData.GetLength(0); 142 193 int columns = inputData.GetLength(1); 143 194 double[] x = new double[columns]; 144 double[] y = new double[1];145 195 double[] dists = new double[k]; 146 196 double[,] neighbours = new double[k, columns + 1]; … … 150 200 x[column] = inputData[row, column]; 151 201 } 152 int actNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false); 153 alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists); 154 alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours); 202 int numNeighbours; 203 lock (kdTree) { // gkronber: the following calls change the kdTree data structure 204 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false); 205 alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists); 206 alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours); 207 } 155 208 156 209 double distanceWeightedValue = 0.0; 157 210 double distsSum = 0.0; 158 for (int i = 0; i < actNeighbours; i++) {211 for (int i = 0; i < numNeighbours; i++) { 159 212 distanceWeightedValue += neighbours[i, columns] / dists[i]; 160 213 distsSum += 1.0 / dists[i]; … … 166 219 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 167 220 if (classValues == null) throw new InvalidOperationException("No class values are defined."); 168 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 169 221 double[,] inputData; 222 if (IsCompatibilityLoaded) { 223 inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 224 } else { 225 inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights); 226 } 170 227 int n = inputData.GetLength(0); 171 228 int columns = inputData.GetLength(1); … … 179 236 x[column] = inputData[row, column]; 180 237 } 181 int actNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false); 182 alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists); 183 alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours); 184 238 int numNeighbours; 239 lock (kdTree) { 240 // gkronber: the following calls change the kdTree data structure 241 numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false); 242 alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists); 243 alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours); 244 } 185 245 Array.Clear(y, 0, y.Length); 186 for (int i = 0; i < actNeighbours; i++) {246 for (int i = 0; i < numNeighbours; i++) { 187 247 int classValue = (int)Math.Round(neighbours[i, columns]); 188 248 y[classValue]++; … … 219 279 #endregion 220 280 281 282 // BackwardsCompatibility3.3 283 #region Backwards compatible code, remove with 3.4 284 285 private bool isCompatibilityLoaded = false; // new kNN models have the value false, kNN models loaded from disc have the value true 286 [Storable(DefaultValue = true)] 287 public bool IsCompatibilityLoaded { 288 get { return isCompatibilityLoaded; } 289 set { isCompatibilityLoaded = value; } 290 } 291 #endregion 221 292 #region persistence 222 293 [Storable] -
branches/symbreg-factors-2650/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs
r14185 r14239 39 39 private const string KParameterName = "K"; 40 40 private const string NearestNeighbourRegressionModelResultName = "Nearest neighbour regression solution"; 41 private const string WeightsParameterName = "Weights"; 41 42 42 43 #region parameter properties 43 44 public IFixedValueParameter<IntValue> KParameter { 44 45 get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; } 46 } 47 48 public IValueParameter<DoubleArray> WeightsParameter { 49 get { return (IValueParameter<DoubleArray>)Parameters[WeightsParameterName]; } 45 50 } 46 51 #endregion … … 52 57 else KParameter.Value.Value = value; 53 58 } 59 } 60 61 public DoubleArray Weights { 62 get { return WeightsParameter.Value; } 63 set { WeightsParameter.Value = value; } 54 64 } 55 65 #endregion … … 63 73 : base() { 64 74 Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3))); 75 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 65 76 Problem = new RegressionProblem(); 66 77 } 78 67 79 [StorableHook(HookType.AfterDeserialization)] 68 private void AfterDeserialization() { } 80 private void AfterDeserialization() { 81 // BackwardsCompatibility3.3 82 #region Backwards compatible code, remove with 3.4 83 if (!Parameters.ContainsKey(WeightsParameterName)) { 84 Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)")); 85 } 86 #endregion 87 } 69 88 70 89 public override IDeepCloneable Clone(Cloner cloner) { … … 74 93 #region nearest neighbour 75 94 protected override void Run() { 76 var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K); 95 double[] weights = null; 96 if (Weights != null) weights = Weights.CloneAsArray(); 97 var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, weights); 77 98 Results.Add(new Result(NearestNeighbourRegressionModelResultName, "The nearest neighbour regression solution.", solution)); 78 99 } 79 100 80 public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k ) {101 public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, double[] weights = null) { 81 102 var clonedProblemData = (IRegressionProblemData)problemData.Clone(); 82 return new NearestNeighbourRegressionSolution(Train(problemData, k ), clonedProblemData);103 return new NearestNeighbourRegressionSolution(Train(problemData, k, weights), clonedProblemData); 83 104 } 84 105 85 public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k ) {106 public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, double[] weights = null) { 86 107 return new NearestNeighbourModel(problemData.Dataset, 87 108 problemData.TrainingIndices, 88 109 k, 89 110 problemData.TargetVariable, 90 problemData.AllowedInputVariables); 111 problemData.AllowedInputVariables, 112 weights); 91 113 } 92 114 #endregion
Note: See TracChangeset
for help on using the changeset viewer.