Changeset 8454 for branches/NCA
- Timestamp:
- 08/09/12 02:17:57 (12 years ago)
- Location:
- branches/NCA/HeuristicLab.Algorithms.NCA/3.3
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/NCA/HeuristicLab.Algorithms.NCA/3.3/NCAModel.cs
r8441 r8454 20 20 #endregion 21 21 22 using System;23 22 using System.Collections.Generic; 24 23 using System.Linq; … … 35 34 36 35 [Storable] 37 private string targetVariable; 38 [Storable] 39 private string[] allowedInputVariables; 40 [Storable] 41 private double[] classValues; 42 /// <summary> 43 /// Get a clone of the class values 44 /// </summary> 45 public double[] ClassValues { 46 get { return (double[])classValues.Clone(); } 47 } 48 [Storable] 49 private int k; 36 private Scaling scaling; 50 37 [Storable] 51 38 private double[,] transformationMatrix; 52 /// <summary>53 /// Get a clone of the transformation matrix54 /// </summary>55 39 public double[,] TransformationMatrix { 56 40 get { return (double[,])transformationMatrix.Clone(); } 57 41 } 58 42 [Storable] 59 private double[,] transformedTrainingset; 60 /// <summary> 61 /// Get a clone of the transformed trainingset 62 /// </summary> 63 public double[,] TransformedTrainingset { 64 get { return (double[,])transformedTrainingset.Clone(); } 65 } 43 private string[] allowedInputVariables; 66 44 [Storable] 67 private Scaling scaling; 45 private string targetVariable; 46 [Storable] 47 private INearestNeighbourModel nnModel; 48 [Storable] 49 private Dictionary<double, double> nn2ncaClassMapping; 50 [Storable] 51 private Dictionary<double, double> nca2nnClassMapping; 68 52 69 53 [StorableConstructor] … … 71 55 protected NCAModel(NCAModel original, Cloner cloner) 72 56 : base(original, cloner) { 73 k = original.k;74 targetVariable = original.targetVariable;75 allowedInputVariables = (string[])original.allowedInputVariables.Clone();76 if (original.classValues != null)77 this.classValues = (double[])original.classValues.Clone();78 if (original.transformationMatrix != null)79 this.transformationMatrix = (double[,])original.transformationMatrix.Clone();80 if (original.transformedTrainingset != null)81 this.transformedTrainingset = (double[,])original.transformedTrainingset.Clone();82 57 this.scaling = cloner.Clone(original.scaling); 58 this.transformationMatrix = (double[,])original.transformationMatrix.Clone(); 59 this.allowedInputVariables = (string[])original.allowedInputVariables.Clone(); 60 this.targetVariable = original.targetVariable; 61 this.nnModel = cloner.Clone(original.nnModel); 62 this.nn2ncaClassMapping = original.nn2ncaClassMapping.ToDictionary(x => x.Key, y => y.Value); 63 this.nca2nnClassMapping = original.nca2nnClassMapping.ToDictionary(x => x.Key, y => y.Value); 83 64 } 84 public NCAModel(double[,] transformedTrainingset, Scaling scaling, double[,] transformationMatrix, int k, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues = null) 85 : base() { 86 this.name = ItemName; 87 this.description = ItemDescription; 88 this.transformedTrainingset = transformedTrainingset; 65 public NCAModel(int k, double[,] scaledData, Scaling scaling, double[,] transformationMatrix, string targetVariable, IEnumerable<double> targetVector, IEnumerable<string> allowedInputVariables) { 66 Name = ItemName; 67 Description = ItemDescription; 89 68 this.scaling = scaling; 90 69 this.transformationMatrix = transformationMatrix; 91 this. k = k;70 this.allowedInputVariables = allowedInputVariables.ToArray(); 92 71 this.targetVariable = targetVariable; 93 this.allowedInputVariables = allowedInputVariables.ToArray(); 94 if (classValues != null) 95 this.classValues = (double[])classValues.Clone(); 72 73 nca2nnClassMapping = targetVector.Distinct().OrderBy(x => x).Select((v, i) => new { Index = (double)i, Class = v }).ToDictionary(x => x.Class, y => y.Index); 74 nn2ncaClassMapping = nca2nnClassMapping.ToDictionary(x => x.Value, y => y.Key); 75 76 var transformedData = ReduceWithTarget(scaledData, targetVector.Select(x => nca2nnClassMapping[x])); 77 78 var kdtree = new alglib.nearestneighbor.kdtree(); 79 alglib.nearestneighbor.kdtreebuild(transformedData, transformedData.GetLength(0), transformedData.GetLength(1) - 1, 1, 2, kdtree); 80 81 nnModel = new NearestNeighbourModel(kdtree, k, targetVariable, 82 Enumerable.Range(0, transformationMatrix.GetLength(1)).Select(x => x.ToString()), 83 nn2ncaClassMapping.Keys.ToArray()); 96 84 } 97 85 … … 101 89 102 90 public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) { 103 var k = Math.Min(this.k, transformedTrainingset.GetLength(0)); 104 var transformedRow = new double[transformationMatrix.GetLength(1)]; 105 var kVotes = new SortedList<double, double>(k + 1); 106 foreach (var r in rows) { 107 for (int i = 0; i < transformedRow.Length; i++) transformedRow[i] = 0; 108 int j = 0; 109 foreach (var v in allowedInputVariables) { 110 var values = scaling.GetScaledValues(dataset, v, rows); 111 double val = dataset.GetDoubleValue(v, r); 112 for (int i = 0; i < transformedRow.Length; i++) 113 transformedRow[i] += val * transformationMatrix[j, i]; 114 j++; 91 var unknownClasses = dataset.GetDoubleValues(targetVariable, rows).Where(x => !nca2nnClassMapping.ContainsKey(x)); 92 if (unknownClasses.Any()) 93 foreach (var uc in unknownClasses) { 94 nca2nnClassMapping[uc] = nca2nnClassMapping.Count; 95 nn2ncaClassMapping[nca2nnClassMapping[uc]] = uc; 115 96 } 116 kVotes.Clear(); 117 for (int a = 0; a < transformedTrainingset.GetLength(0); a++) { 118 double d = 0; 119 for (int y = 0; y < transformedRow.Length; y++) { 120 d += (transformedRow[y] - transformedTrainingset[a, y]) * (transformedRow[y] - transformedTrainingset[a, y]); 121 } 122 while (kVotes.ContainsKey(d)) d += 1e-12; 123 if (kVotes.Count <= k || kVotes.Last().Key > d) { 124 kVotes.Add(d, classValues[a]); 125 if (kVotes.Count > k) kVotes.RemoveAt(kVotes.Count - 1); 126 } 127 } 128 yield return kVotes.Values.ToLookup(x => x).MaxItems(x => x.Count()).First().Key; 129 } 97 var transformedData = ReduceWithTarget(dataset, rows, dataset.GetDoubleValues(targetVariable, rows).Select(x => nca2nnClassMapping[x])); 98 var ds = new Dataset(Enumerable.Range(0, transformationMatrix.GetLength(1)).Select(x => x.ToString()).Concat(targetVariable.ToEnumerable()), transformedData); 99 return nnModel.GetEstimatedClassValues(ds, Enumerable.Range(0, ds.Rows)).Select(x => nn2ncaClassMapping[x]); 130 100 } 101 131 102 public NCAClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) { 132 103 return new NCAClassificationSolution(problemData, this); 133 104 } 105 134 106 IClassificationSolution IClassificationModel.CreateClassificationSolution(IClassificationProblemData problemData) { 135 107 return CreateClassificationSolution(problemData); … … 137 109 138 110 public double[,] Reduce(Dataset dataset, IEnumerable<int> rows) { 139 var result = new double[rows.Count(), transformationMatrix.GetLength(1)]; 140 int v = 0; 141 foreach (var r in rows) { 142 int i = 0; 143 foreach (var variable in allowedInputVariables) { 144 double val = dataset.GetDoubleValue(variable, r); 145 for (int j = 0; j < result.GetLength(1); j++) 146 result[v, j] += val * transformationMatrix[i, j]; 147 i++; 148 } 149 v++; 150 } 111 var scaledData = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, scaling); 112 return Reduce(scaledData); 113 } 114 115 private double[,] Reduce(double[,] scaledData) { 116 var result = new double[scaledData.GetLength(0), transformationMatrix.GetLength(1)]; 117 for (int i = 0; i < scaledData.GetLength(0); i++) 118 for (int j = 0; j < scaledData.GetLength(1); j++) 119 for (int x = 0; x < transformationMatrix.GetLength(1); x++) { 120 result[i, x] += scaledData[i, j] * transformationMatrix[j, x]; 121 } 122 return result; 123 } 124 125 private double[,] ReduceWithTarget(Dataset dataset, IEnumerable<int> rows, IEnumerable<double> targetValues) { 126 var scaledData = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, scaling); 127 return ReduceWithTarget(scaledData, targetValues); 128 } 129 130 private double[,] ReduceWithTarget(double[,] scaledData, IEnumerable<double> targetValues) { 131 var result = new double[scaledData.GetLength(0), transformationMatrix.GetLength(1) + 1]; 132 for (int i = 0; i < scaledData.GetLength(0); i++) 133 for (int j = 0; j < scaledData.GetLength(1); j++) 134 for (int x = 0; x < transformationMatrix.GetLength(1); x++) { 135 result[i, x] += scaledData[i, j] * transformationMatrix[j, x]; 136 } 137 138 int r = 0; 139 foreach (var d in targetValues) result[r++, transformationMatrix.GetLength(1)] = d; 140 151 141 return result; 152 142 } -
branches/NCA/HeuristicLab.Algorithms.NCA/3.3/NeighborhoodComponentsAnalysis.cs
r8441 r8454 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using System.Threading; 25 26 using HeuristicLab.Algorithms.DataAnalysis; 26 27 using HeuristicLab.Analysis; … … 35 36 36 37 namespace HeuristicLab.Algorithms.NCA { 37 publicdelegate void Reporter(double quality, double[] coefficients);38 internal delegate void Reporter(double quality, double[] coefficients); 38 39 /// <summary> 39 40 /// Neighborhood Components Analysis … … 97 98 98 99 var clonedProblem = (IClassificationProblemData)Problem.ProblemData.Clone(); 99 var model = Train(clonedProblem, k, dimensions, initializer , ReportQuality);100 var model = Train(clonedProblem, k, dimensions, initializer.Initialize(clonedProblem, dimensions), ReportQuality, CancellationToken.None); 100 101 var classification = new NCAClassificationSolution(clonedProblem, model); 101 102 Results.Add(new Result("ClassificationSolution", "The classification solution.", classification)); … … 117 118 } 118 119 119 public static INCAModel Train(IClassificationProblemData data, int k, int dimensions, INCAInitializer initializer, Reporter reporter = null) { 120 var instances = data.TrainingIndices.Count(); 121 var attributes = data.AllowedInputVariables.Count(); 122 123 double[] matrix = initializer.Initialize(data, dimensions); 124 125 var info = new OptimizationInfo(data, dimensions, reporter); 120 public static INCAModel Train(IClassificationProblemData problemData, int k, int dimensions, INCAInitializer initializer) { 121 return Train(problemData, k, dimensions, initializer.Initialize(problemData, dimensions), null, CancellationToken.None); 122 } 123 124 public static INCAModel Train(IClassificationProblemData problemData, int k, int dimensions, double[,] initalMatrix) { 125 var matrix = new double[initalMatrix.Length]; 126 for (int i = 0; i < initalMatrix.GetLength(0); i++) 127 for (int j = 0; j < initalMatrix.GetLength(1); j++) 128 matrix[i * initalMatrix.GetLength(1) + j] = initalMatrix[i, j]; 129 return Train(problemData, k, dimensions, matrix, null, CancellationToken.None); 130 } 131 132 private static INCAModel Train(IClassificationProblemData data, int k, int dimensions, double[] matrix, Reporter reporter, CancellationToken cancellation) { 133 var scaling = new Scaling(data.Dataset, data.AllowedInputVariables, data.TrainingIndices); 134 var scaledData = AlglibUtil.PrepareAndScaleInputMatrix(data.Dataset, data.AllowedInputVariables, data.TrainingIndices, scaling); 135 var classes = data.Dataset.GetDoubleValues(data.TargetVariable, data.TrainingIndices).ToArray(); 136 var instances = scaledData.GetLength(0); 137 var attributes = scaledData.GetLength(1); 138 126 139 alglib.mincgstate state; 127 140 alglib.mincgreport rep; 128 129 141 alglib.mincgcreate(matrix, out state); 130 alglib.mincgsetcond(state, 0, 1e-05, 0, 20);142 alglib.mincgsetcond(state, 0, 0, 0, 20); 131 143 alglib.mincgsetxrep(state, true); 132 alglib.mincgoptimize(state, Gradient, Report, info);144 Optimize(state, scaledData, classes, dimensions, cancellation, reporter); 133 145 alglib.mincgresults(state, out matrix, out rep); 134 146 … … 139 151 transformationMatrix[i, j] = matrix[counter++]; 140 152 141 var transformedTrainingset = new double[instances, dimensions]; 142 var rowCount = 0; 143 foreach (var r in data.TrainingIndices) { 144 var i = 0; 145 foreach (var v in data.AllowedInputVariables) { 146 var val = data.Dataset.GetDoubleValue(v, r); 147 for (var j = 0; j < dimensions; j++) 148 transformedTrainingset[rowCount, j] += val * transformationMatrix[i, j]; 149 i++; 150 } 151 rowCount++; 152 } 153 154 var ds = data.Dataset; 155 var targetVariable = data.TargetVariable; 156 return new NCAModel(transformedTrainingset, info.Scaling, transformationMatrix, k, data.TargetVariable, data.AllowedInputVariables, 157 data.TrainingIndices.Select(i => ds.GetDoubleValue(targetVariable, i)).ToArray()); 158 } 159 160 private static void Report(double[] A, double func, object obj) { 161 var info = (OptimizationInfo)obj; 162 if (info.Reporter != null) info.Reporter(func, A); 163 } 164 165 private static void Gradient(double[] A, ref double func, double[] grad, object obj) { 166 var info = (OptimizationInfo)obj; 167 var data = info.Data; 168 var classes = info.TargetValues; 169 var instances = info.Instances; 170 var attributes = info.Attributes; 171 172 var AMatrix = new Matrix(A, A.Length / info.ReduceDimensions, info.ReduceDimensions); 153 return new NCAModel(k, scaledData, scaling, transformationMatrix, data.TargetVariable, data.Dataset.GetDoubleValues(data.TargetVariable, data.TrainingIndices), data.AllowedInputVariables); 154 } 155 156 private static void Optimize(alglib.mincgstate state, double[,] data, double[] classes, int dimensions, CancellationToken cancellation, Reporter reporter) { 157 while (alglib.mincgiteration(state)) { 158 159 if (cancellation.IsCancellationRequested) break; 160 161 if (state.needfg) { 162 Gradient(state.x, ref state.innerobj.f, state.innerobj.g, data, classes, dimensions); 163 continue; 164 } 165 166 if (state.innerobj.xupdated) { 167 if (reporter != null) 168 reporter(state.innerobj.f, state.innerobj.x); 169 continue; 170 } 171 172 throw new InvalidOperationException("Neighborhood Components Analysis: Error in Optimize() (some derivatives were not provided?)"); 173 } 174 } 175 176 private static void Gradient(double[] A, ref double func, double[] grad, double[,] data, double[] classes, int dimensions) { 177 var instances = data.GetLength(0); 178 var attributes = data.GetLength(1); 179 180 var AMatrix = new Matrix(A, A.Length / dimensions, dimensions); 173 181 174 182 alglib.sparsematrix probabilities; … … 222 230 } 223 231 224 #region Helpers225 232 private static IEnumerable<double> GetRow(double[,] data, int row) { 226 233 for (int i = 0; i < data.GetLength(1); i++) 227 234 yield return data[row, i]; 228 235 } 229 230 private class OptimizationInfo {231 public Scaling Scaling { get; private set; }232 public double[,] Data { get; private set; }233 public double[] TargetValues { get; private set; }234 public int ReduceDimensions { get; private set; }235 public int Instances { get; private set; }236 public int Attributes { get; private set; }237 public Reporter Reporter { get; private set; }238 239 public OptimizationInfo(IClassificationProblemData data, int reduceDimensions, Reporter reporter) {240 this.Scaling = new Scaling(data.Dataset, data.AllowedInputVariables, data.TrainingIndices);241 this.Data = AlglibUtil.PrepareAndScaleInputMatrix(data.Dataset, data.AllowedInputVariables, data.TrainingIndices, Scaling);242 this.TargetValues = data.Dataset.GetDoubleValues(data.TargetVariable, data.TrainingIndices).ToArray();243 this.ReduceDimensions = reduceDimensions;244 this.Instances = data.TrainingIndices.Count();245 this.Attributes = data.AllowedInputVariables.Count();246 this.Reporter = reporter;247 }248 }249 #endregion250 236 } 251 237 }
Note: See TracChangeset
for help on using the changeset viewer.