- Timestamp:
- 08/08/12 16:46:53 (12 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/NCA/HeuristicLab.Algorithms.NCA/3.3/NeighborhoodComponentsAnalysis.cs
r8425 r8437 1 #region License Information1 #region License Information 2 2 /* HeuristicLab 3 3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL) … … 20 20 #endregion 21 21 22 using System;23 using System.Collections.Generic;24 22 using System.Linq; 23 using HeuristicLab.Algorithms.DataAnalysis; 25 24 using HeuristicLab.Common; 25 using HeuristicLab.Core; 26 using HeuristicLab.Data; 27 using HeuristicLab.Optimization; 28 using HeuristicLab.Parameters; 29 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 30 using HeuristicLab.PluginInfrastructure; 26 31 using HeuristicLab.Problems.DataAnalysis; 27 32 28 33 namespace HeuristicLab.Algorithms.NCA { 29 public class NeighborhoodComponentsAnalysis { 34 /// <summary> 35 /// Neighborhood Components Analysis 36 /// </summary> 37 [Item("Neighborhood Components Analysis", "NCA is described in J. Goldberger, S. Roweis, G. Hinton, R. Salakhutdinov. 2005. Neighbourhood Component Analysis. Advances in Neural Information Processing Systems, 17. pp. 513-520.")] 38 [Creatable("Data Analysis")] 39 [StorableClass] 40 public sealed class NeighborhoodComponentsAnalysis : FixedDataAnalysisAlgorithm<IClassificationProblem> { 41 #region Parameter Properties 42 public IValueLookupParameter<IntValue> KParameter { 43 get { return (IValueLookupParameter<IntValue>)Parameters["k"]; } 44 } 45 public IValueLookupParameter<IntValue> ReduceDimensionsParameter { 46 get { return (IValueLookupParameter<IntValue>)Parameters["ReduceDimensions"]; } 47 } 48 private IConstrainedValueParameter<INCAInitializer> InitializationParameter { 49 get { return (IConstrainedValueParameter<INCAInitializer>)Parameters["Initialization"]; } 50 } 51 #endregion 30 52 31 public static INCAModel Train(IClassificationProblemData data, int k, int reduceDimensions, INCAInitializer initializer) { 32 var instances = data.TrainingIndices.Count(); 33 var attributes = data.AllowedInputVariables.Count(); 53 #region Properties 54 public IntValue K { 55 get { return KParameter.Value; } 56 } 57 public IntValue ReduceDimensions { 58 get { return ReduceDimensionsParameter.Value; } 59 } 60 #endregion 34 61 35 double[] matrix = initializer.Initialize(data, reduceDimensions); 62 [StorableConstructor] 63 private NeighborhoodComponentsAnalysis(bool deserializing) : base(deserializing) { } 64 private NeighborhoodComponentsAnalysis(NeighborhoodComponentsAnalysis original, Cloner cloner) : base(original, cloner) { } 65 public NeighborhoodComponentsAnalysis() 66 : base() { 67 Parameters.Add(new ValueLookupParameter<IntValue>("k", "The k for the nearest neighbor.", new IntValue(1))); 68 Parameters.Add(new ValueLookupParameter<IntValue>("ReduceDimensions", "The number of dimensions that NCA should reduce the data to.", new IntValue(2))); 69 Parameters.Add(new ConstrainedValueParameter<INCAInitializer>("Initialization", "Which method should be used to initialize the matrix. Typically LDA (linear discriminant analysis) should provide a good estimate.")); 36 70 37 alglib.mincgstate state; 38 alglib.mincgreport rep; 71 INCAInitializer defaultInitializer = null; 72 foreach (var initializer in ApplicationManager.Manager.GetInstances<INCAInitializer>().OrderBy(x => x.ItemName)) { 73 if (initializer is LDAInitializer) defaultInitializer = initializer; 74 InitializationParameter.ValidValues.Add(initializer); 75 } 76 if (defaultInitializer != null) InitializationParameter.Value = defaultInitializer; 39 77 40 // first run 41 alglib.mincgcreate(matrix, out state); 42 alglib.mincgsetcond(state, 0.0000000001, 0, 0, 0); 43 alglib.mincgoptimize(state, Gradient, null, new OptimizationInfo(data, reduceDimensions)); 44 alglib.mincgresults(state, out matrix, out rep); 45 46 var transformationMatrix = new double[attributes, reduceDimensions]; 47 var counter = 0; 48 for (var i = 0; i < attributes; i++) 49 for (var j = 0; j < reduceDimensions; j++) 50 transformationMatrix[i, j] = matrix[counter++]; 51 52 var transformedTrainingset = new double[instances, reduceDimensions]; 53 var rowCount = 0; 54 foreach (var r in data.TrainingIndices) { 55 var i = 0; 56 foreach (var v in data.AllowedInputVariables) { 57 var val = data.Dataset.GetDoubleValue(v, r); 58 for (var j = 0; j < reduceDimensions; j++) 59 transformedTrainingset[rowCount, j] += val * transformationMatrix[i, j]; 60 i++; 61 } 62 rowCount++; 63 } 64 65 return new NCAModel(transformedTrainingset, transformationMatrix, k, data.TargetVariable, data.AllowedInputVariables, 66 data.Dataset.GetDoubleValues(data.TargetVariable) 67 .Select((v, i) => new { I = i, V = v }) 68 .Where(x => x.I >= data.TrainingPartition.Start && x.I < data.TrainingPartition.End 69 && !(x.I >= data.TestPartition.Start && x.I < data.TestPartition.End)) 70 .Select(x => x.V).ToArray()); 78 Problem = new ClassificationProblem(); 71 79 } 72 80 73 private static void Gradient(double[] A, ref double func, double[] grad, object obj) { 74 var info = (OptimizationInfo)obj; 75 var instances = info.ProblemData.TrainingIndices.ToArray(); 76 var attributes = info.ProblemData.AllowedInputVariables.Count(); 77 var AMatrix = new Matrix(A, A.Length / info.ReduceDimensions, info.ReduceDimensions); 78 79 alglib.sparsematrix probabilities; 80 alglib.sparsecreate(instances.Length, instances.Length, out probabilities); 81 var distances = new double[instances.Length]; 82 for (int i = 0; i < instances.Length - 1; i++) { 83 var iVector = new Matrix(GetRow(info.ProblemData, instances[i])); 84 var denom = 0.0; 85 for (int k = 0; k < instances.Length; k++) { 86 if (k == i) continue; 87 var kVector = new Matrix(GetRow(info.ProblemData, instances[k])); 88 distances[k] = iVector.Multiply(AMatrix).Subtract(kVector.Multiply(AMatrix)).Length(); 89 denom += Math.Exp(-(distances[k] * distances[k])); 90 } 91 if (denom > 0) { 92 for (int j = i + 1; j < instances.Length; j++) { 93 if (i == j) continue; 94 var v = Math.Exp(-(distances[j] * distances[j])) / denom; 95 alglib.sparseset(probabilities, i, j, v); 96 alglib.sparseset(probabilities, j, i, v); 97 } 98 } 99 } 100 alglib.sparseconverttocrs(probabilities); // needed to enumerate in order (top-down and left-right) 101 102 int t0 = 0, t1 = 0, r, c; 103 double val; 104 var classes = info.ProblemData.Dataset.GetDoubleValues(info.ProblemData.TargetVariable, instances).ToArray(); 105 var pi = new double[instances.Length]; 106 while (alglib.sparseenumerate(probabilities, ref t0, ref t1, out r, out c, out val)) { 107 if (classes[r].IsAlmost(classes[c])) 108 pi[r] += val; 109 } 110 111 var innerSum = new double[attributes, attributes]; 112 while (alglib.sparseenumerate(probabilities, ref t0, ref t1, out r, out c, out val)) { 113 var vector = new Matrix(GetRow(info.ProblemData, instances[r])).Subtract(new Matrix(GetRow(info.ProblemData, instances[c]))).Apply(); 114 vector.OuterProduct(vector).Multiply(val * pi[r]).AddTo(innerSum); 115 116 if (classes[r].IsAlmost(classes[c])) { 117 vector.OuterProduct(vector).Multiply(-val).AddTo(innerSum); 118 } 119 } 120 121 func = -pi.Sum(); 122 123 grad = AMatrix.Multiply(-2.0).Transpose().Multiply(new Matrix(innerSum)).Transpose().ToArray(); 81 public override IDeepCloneable Clone(Cloner cloner) { 82 return new NeighborhoodComponentsAnalysis(this, cloner); 124 83 } 125 84 126 p rivate static IEnumerable<double> GetRow(IClassificationProblemData data, int row) {127 return data.AllowedInputVariables.Select(v => data.Dataset.GetDoubleValue(v, row));85 public override void Prepare() { 86 if (Problem != null) base.Prepare(); 128 87 } 129 88 130 public static NCAClassificationSolution CreateNCASolution(IClassificationProblemData problemData, int k, int reduceDimensions, INCAInitializer initializer) { 131 return new NCAClassificationSolution(problemData, Train(problemData, k, reduceDimensions, initializer)); 132 } 89 protected override void Run() { 90 var k = K.Value; 91 var dimensions = ReduceDimensions.Value; 92 var initializer = InitializationParameter.Value; 133 93 134 private class OptimizationInfo { 135 public IClassificationProblemData ProblemData { get; set; } 136 public int ReduceDimensions { get; set; } 137 public OptimizationInfo(IClassificationProblemData problem, int reduceDimensions) { 138 this.ProblemData = problem; 139 this.ReduceDimensions = reduceDimensions; 140 } 94 var clonedProblem = (IClassificationProblemData)Problem.ProblemData.Clone(); 95 var classification = new NCAClassificationSolution(clonedProblem, Auxiliary.Train(clonedProblem, k, dimensions, initializer)); 96 Results.Add(new Result("ClassificationSolution", "The classification solution.", classification)); 97 // TODO: result that shows the LOO performance 141 98 } 142 99 }
Note: See TracChangeset
for help on using the changeset viewer.