Changeset 9270 for trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4
- Timestamp:
- 03/01/13 18:32:26 (12 years ago)
- Location:
- trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4
- Files:
-
- 8 added
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r9177 r9270 224 224 <Compile Include="Nca\Initialization\INcaInitializer.cs" /> 225 225 <Compile Include="Nca\Initialization\LdaInitializer.cs" /> 226 <Compile Include="Nca\Initialization\NcaInitializer.cs" /> 226 227 <Compile Include="Nca\Initialization\PcaInitializer.cs" /> 227 228 <Compile Include="Nca\Initialization\RandomInitializer.cs" /> 228 229 <Compile Include="Nca\Matrix.cs" /> 230 <Compile Include="Nca\ModelCreation\INcaModelCreator.cs" /> 231 <Compile Include="Nca\ModelCreation\NcaModelCreator.cs" /> 229 232 <Compile Include="Nca\NcaAlgorithm.cs" /> 230 233 <Compile Include="Nca\NcaClassificationSolution.cs" /> 234 <Compile Include="Nca\NcaGradientCalculator.cs" /> 231 235 <Compile Include="Nca\NcaModel.cs" /> 236 <Compile Include="Nca\SolutionCreation\INcaSolutionCreator.cs" /> 237 <Compile Include="Nca\SolutionCreation\NcaSolutionCreator.cs" /> 232 238 <Compile Include="NearestNeighbour\NearestNeighbourClassification.cs" /> 233 239 <Compile Include="NearestNeighbour\NearestNeighbourClassificationSolution.cs" /> -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/Initialization/INcaInitializer.cs
r8471 r9270 24 24 25 25 namespace HeuristicLab.Algorithms.DataAnalysis { 26 public interface IN CAInitializer : IItem{26 public interface INcaInitializer : IOperator { 27 27 /// <summary> 28 28 /// Calculates an initial projection for the NCA to start from. … … 30 30 /// <param name="data">The problem data that contains the AllowedInputVariables and TrainingIndices.</param> 31 31 /// <param name="dimensions">The amount of columns in the matrix</param> 32 /// <returns> A flat representation of a matrix that is read row-wise and contains AllowedInputVariables * TrainingIndices numbers.</returns>33 double[ ] Initialize(IClassificationProblemData data, int dimensions);32 /// <returns>The matrix that projects the input variables into a lower dimensional space.</returns> 33 double[,] Initialize(IClassificationProblemData data, Scaling scaling, int dimensions); 34 34 } 35 35 } -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/Initialization/LdaInitializer.cs
r8471 r9270 20 20 #endregion 21 21 22 using System.Collections.Generic;23 22 using System.Linq; 24 23 using HeuristicLab.Common; … … 30 29 [Item("LDA", "Initializes the matrix by performing a linear discriminant analysis.")] 31 30 [StorableClass] 32 public class L DAInitializer : Item, INCAInitializer {31 public class LdaInitializer : NcaInitializer { 33 32 34 33 [StorableConstructor] 35 protected L DAInitializer(bool deserializing) : base(deserializing) { }36 protected L DAInitializer(LDAInitializer original, Cloner cloner) : base(original, cloner) { }37 public L DAInitializer() : base() { }34 protected LdaInitializer(bool deserializing) : base(deserializing) { } 35 protected LdaInitializer(LdaInitializer original, Cloner cloner) : base(original, cloner) { } 36 public LdaInitializer() : base() { } 38 37 39 38 public override IDeepCloneable Clone(Cloner cloner) { 40 return new L DAInitializer(this, cloner);39 return new LdaInitializer(this, cloner); 41 40 } 42 41 43 public double[] Initialize(IClassificationProblemData data, int dimensions) {42 public override double[,] Initialize(IClassificationProblemData data, Scaling scaling, int dimensions) { 44 43 var instances = data.TrainingIndices.Count(); 45 44 var attributes = data.AllowedInputVariables.Count(); 46 45 47 46 var ldaDs = new double[instances, attributes + 1]; 48 int row, col= 0;49 foreach (var variablein data.AllowedInputVariables) {50 row= 0;51 foreach (var value in data.Dataset.GetDoubleValues(variable, data.TrainingIndices)) {52 ldaDs[row, col] = value;53 row++;47 int j = 0; 48 foreach (var a in data.AllowedInputVariables) { 49 int i = 0; 50 var sv = scaling.GetScaledValues(data.Dataset, a, data.TrainingIndices); 51 foreach (var v in sv) { 52 ldaDs[i++, j] = v; 54 53 } 55 col++;54 j++; 56 55 } 57 row = 0; 58 var uniqueClasses = new Dictionary<double, int>(); 59 foreach (var label in data.Dataset.GetDoubleValues(data.TargetVariable, data.TrainingIndices)) { 60 if (!uniqueClasses.ContainsKey(label)) 61 uniqueClasses[label] = uniqueClasses.Count; 62 ldaDs[row++, attributes] = label; 63 } 64 for (row = 0; row < instances; row++) 65 ldaDs[row, attributes] = uniqueClasses[ldaDs[row, attributes]]; 56 j = 0; 57 foreach (var tv in data.Dataset.GetDoubleValues(data.TargetVariable, data.TrainingIndices)) 58 ldaDs[j++, attributes] = tv; 59 60 var uniqueClasses = data.Dataset.GetDoubleValues(data.TargetVariable, data.TrainingIndices).Distinct().Count(); 66 61 67 62 int info; 68 63 double[,] matrix; 69 alglib.fisherldan(ldaDs, instances, attributes, uniqueClasses .Count, out info, out matrix);64 alglib.fisherldan(ldaDs, instances, attributes, uniqueClasses, out info, out matrix); 70 65 71 var result = new double[attributes * dimensions]; 72 for (int i = 0; i < attributes; i++) 73 for (int j = 0; j < dimensions; j++) 74 result[i * dimensions + j] = matrix[i, j]; 75 76 return result; 66 return matrix; 77 67 } 78 68 -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/Initialization/PcaInitializer.cs
r8471 r9270 29 29 [Item("PCA", "Initializes the matrix by performing a principal components analysis.")] 30 30 [StorableClass] 31 public sealed class P CAInitializer : Item, INCAInitializer {31 public sealed class PcaInitializer : NcaInitializer { 32 32 33 33 [StorableConstructor] 34 private P CAInitializer(bool deserializing) : base(deserializing) { }35 private P CAInitializer(PCAInitializer original, Cloner cloner) : base(original, cloner) { }36 public P CAInitializer() : base() { }34 private PcaInitializer(bool deserializing) : base(deserializing) { } 35 private PcaInitializer(PcaInitializer original, Cloner cloner) : base(original, cloner) { } 36 public PcaInitializer() : base() { } 37 37 38 38 public override IDeepCloneable Clone(Cloner cloner) { 39 return new P CAInitializer(this, cloner);39 return new PcaInitializer(this, cloner); 40 40 } 41 41 42 public double[] Initialize(IClassificationProblemData data, int dimensions) {42 public override double[,] Initialize(IClassificationProblemData data, Scaling scaling, int dimensions) { 43 43 var instances = data.TrainingIndices.Count(); 44 44 var attributes = data.AllowedInputVariables.Count(); 45 45 46 var pcaDs = new double[instances, attributes]; 47 int col = 0; 48 foreach (var variable in data.AllowedInputVariables) { 49 int row = 0; 50 foreach (var value in data.Dataset.GetDoubleValues(variable, data.TrainingIndices)) { 51 pcaDs[row, col] = value; 52 row++; 53 } 54 col++; 55 } 46 var pcaDs = AlglibUtil.PrepareAndScaleInputMatrix(data.Dataset, data.AllowedInputVariables, data.TrainingIndices, scaling); 56 47 57 48 int info; … … 60 51 alglib.pcabuildbasis(pcaDs, instances, attributes, out info, out varianceValues, out matrix); 61 52 62 var result = new double[attributes * dimensions]; 63 for (int i = 0; i < attributes; i++) 64 for (int j = 0; j < dimensions; j++) 65 result[i * dimensions + j] = matrix[i, j]; 66 67 return result; 53 return matrix; 68 54 } 69 55 -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/Initialization/RandomInitializer.cs
r8471 r9270 23 23 using HeuristicLab.Common; 24 24 using HeuristicLab.Core; 25 using HeuristicLab. Data;25 using HeuristicLab.Optimization; 26 26 using HeuristicLab.Parameters; 27 27 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 28 28 using HeuristicLab.Problems.DataAnalysis; 29 using HeuristicLab.Random;30 29 31 30 namespace HeuristicLab.Algorithms.DataAnalysis { 32 31 [Item("Random", "Initializes the matrix randomly.")] 33 32 [StorableClass] 34 public class RandomInitializer : ParameterizedNamedItem, INCAInitializer { 35 private IValueParameter<IntValue> RandomParameter { 36 get { return (IValueParameter<IntValue>)Parameters["Seed"]; } 37 } 38 private IValueParameter<BoolValue> SetSeedRandomlyParameter { 39 get { return (IValueParameter<BoolValue>)Parameters["SetSeedRandomly"]; } 40 } 41 42 public int Seed { 43 get { return RandomParameter.Value.Value; } 44 set { RandomParameter.Value.Value = value; } 45 } 46 47 public bool SetSeedRandomly { 48 get { return SetSeedRandomlyParameter.Value.Value; } 49 set { SetSeedRandomlyParameter.Value.Value = value; } 33 public sealed class RandomInitializer : NcaInitializer, IStochasticOperator { 34 public ILookupParameter<IRandom> RandomParameter { 35 get { return (ILookupParameter<IRandom>)Parameters["Random"]; } 50 36 } 51 37 52 38 [StorableConstructor] 53 pr otectedRandomInitializer(bool deserializing) : base(deserializing) { }54 pr otectedRandomInitializer(RandomInitializer original, Cloner cloner) : base(original, cloner) { }39 private RandomInitializer(bool deserializing) : base(deserializing) { } 40 private RandomInitializer(RandomInitializer original, Cloner cloner) : base(original, cloner) { } 55 41 public RandomInitializer() 56 42 : base() { 57 Parameters.Add(new ValueParameter<IntValue>("Seed", "The seed for the random number generator.", new IntValue(0))); 58 Parameters.Add(new ValueParameter<BoolValue>("SetSeedRandomly", "Whether the seed should be randomized for each call.", new BoolValue(true))); 43 Parameters.Add(new LookupParameter<IRandom>("Random", "The random number generator to use.")); 59 44 } 60 45 … … 63 48 } 64 49 65 public double[] Initialize(IClassificationProblemData data, int dimensions) { 66 var instances = data.TrainingIndices.Count(); 50 public override double[,] Initialize(IClassificationProblemData data, Scaling scaling, int dimensions) { 67 51 var attributes = data.AllowedInputVariables.Count(); 68 52 69 var random = new MersenneTwister(); 70 if (SetSeedRandomly) Seed = random.Next(); 71 random.Reset(Seed); 72 73 var range = data.AllowedInputVariables.Select(x => data.Dataset.GetDoubleValues(x).Max() - data.Dataset.GetDoubleValues(x).Min()).ToArray(); 74 var matrix = new double[attributes * dimensions]; 75 for (int i = 0; i < matrix.Length; i++) 76 matrix[i] = random.NextDouble() / range[i / dimensions]; 53 var random = RandomParameter.ActualValue; 54 var matrix = new double[attributes, dimensions]; 55 for (int i = 0; i < attributes; i++) 56 for (int j = 0; j < dimensions; j++) 57 matrix[i, j] = random.NextDouble(); 77 58 78 59 return matrix; -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaAlgorithm.cs
r8681 r9270 21 21 22 22 using System; 23 using System.Collections.Generic;24 23 using System.Linq; 25 using System.Threading; 26 using HeuristicLab.Analysis; 24 using HeuristicLab.Algorithms.GradientDescent; 27 25 using HeuristicLab.Common; 28 26 using HeuristicLab.Core; 29 27 using HeuristicLab.Data; 28 using HeuristicLab.Operators; 30 29 using HeuristicLab.Optimization; 31 30 using HeuristicLab.Parameters; … … 33 32 using HeuristicLab.PluginInfrastructure; 34 33 using HeuristicLab.Problems.DataAnalysis; 34 using HeuristicLab.Random; 35 35 36 36 namespace HeuristicLab.Algorithms.DataAnalysis { 37 internal delegate void Reporter(double quality, double[] coefficients, double[] gradients);38 37 /// <summary> 39 38 /// Neighborhood Components Analysis … … 46 45 [Creatable("Data Analysis")] 47 46 [StorableClass] 48 public sealed class NcaAlgorithm : FixedDataAnalysisAlgorithm<IClassificationProblem> { 47 public sealed class NcaAlgorithm : EngineAlgorithm { 48 #region Parameter Names 49 private const string SeedParameterName = "Seed"; 50 private const string SetSeedRandomlyParameterName = "SetSeedRandomly"; 51 private const string KParameterName = "K"; 52 private const string DimensionsParameterName = "Dimensions"; 53 private const string InitializationParameterName = "Initialization"; 54 private const string NeighborSamplesParameterName = "NeighborSamples"; 55 private const string IterationsParameterName = "Iterations"; 56 private const string RegularizationParameterName = "Regularization"; 57 private const string NcaModelCreatorParameterName = "NcaModelCreator"; 58 private const string NcaSolutionCreatorParameterName = "NcaSolutionCreator"; 59 private const string ApproximateGradientsParameterName = "ApproximateGradients"; 60 private const string NcaMatrixParameterName = "NcaMatrix"; 61 private const string NcaMatrixGradientsParameterName = "NcaMatrixGradients"; 62 private const string QualityParameterName = "Quality"; 63 #endregion 64 65 public override Type ProblemType { get { return typeof(IClassificationProblem); } } 66 public new IClassificationProblem Problem { 67 get { return (IClassificationProblem)base.Problem; } 68 set { base.Problem = value; } 69 } 70 49 71 #region Parameter Properties 72 public IValueParameter<IntValue> SeedParameter { 73 get { return (IValueParameter<IntValue>)Parameters[SeedParameterName]; } 74 } 75 public IValueParameter<BoolValue> SetSeedRandomlyParameter { 76 get { return (IValueParameter<BoolValue>)Parameters[SetSeedRandomlyParameterName]; } 77 } 50 78 public IFixedValueParameter<IntValue> KParameter { 51 get { return (IFixedValueParameter<IntValue>)Parameters[ "K"]; }79 get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; } 52 80 } 53 81 public IFixedValueParameter<IntValue> DimensionsParameter { 54 get { return (IFixedValueParameter<IntValue>)Parameters[ "Dimensions"]; }55 } 56 public IConstrainedValueParameter<IN CAInitializer> InitializationParameter {57 get { return (IConstrainedValueParameter<IN CAInitializer>)Parameters["Initialization"]; }82 get { return (IFixedValueParameter<IntValue>)Parameters[DimensionsParameterName]; } 83 } 84 public IConstrainedValueParameter<INcaInitializer> InitializationParameter { 85 get { return (IConstrainedValueParameter<INcaInitializer>)Parameters[InitializationParameterName]; } 58 86 } 59 87 public IFixedValueParameter<IntValue> NeighborSamplesParameter { 60 get { return (IFixedValueParameter<IntValue>)Parameters[ "NeighborSamples"]; }88 get { return (IFixedValueParameter<IntValue>)Parameters[NeighborSamplesParameterName]; } 61 89 } 62 90 public IFixedValueParameter<IntValue> IterationsParameter { 63 get { return (IFixedValueParameter<IntValue>)Parameters[ "Iterations"]; }91 get { return (IFixedValueParameter<IntValue>)Parameters[IterationsParameterName]; } 64 92 } 65 93 public IFixedValueParameter<DoubleValue> RegularizationParameter { 66 get { return (IFixedValueParameter<DoubleValue>)Parameters["Regularization"]; } 94 get { return (IFixedValueParameter<DoubleValue>)Parameters[RegularizationParameterName]; } 95 } 96 public IValueParameter<BoolValue> ApproximateGradientsParameter { 97 get { return (IValueParameter<BoolValue>)Parameters[ApproximateGradientsParameterName]; } 98 } 99 public IValueParameter<INcaModelCreator> NcaModelCreatorParameter { 100 get { return (IValueParameter<INcaModelCreator>)Parameters[NcaModelCreatorParameterName]; } 101 } 102 public IValueParameter<INcaSolutionCreator> NcaSolutionCreatorParameter { 103 get { return (IValueParameter<INcaSolutionCreator>)Parameters[NcaSolutionCreatorParameterName]; } 67 104 } 68 105 #endregion 69 106 70 107 #region Properties 108 public int Seed { 109 get { return SeedParameter.Value.Value; } 110 set { SeedParameter.Value.Value = value; } 111 } 112 public bool SetSeedRandomly { 113 get { return SetSeedRandomlyParameter.Value.Value; } 114 set { SetSeedRandomlyParameter.Value.Value = value; } 115 } 71 116 public int K { 72 117 get { return KParameter.Value.Value; } … … 88 133 get { return RegularizationParameter.Value.Value; } 89 134 set { RegularizationParameter.Value.Value = value; } 135 } 136 public INcaModelCreator NcaModelCreator { 137 get { return NcaModelCreatorParameter.Value; } 138 set { NcaModelCreatorParameter.Value = value; } 139 } 140 public INcaSolutionCreator NcaSolutionCreator { 141 get { return NcaSolutionCreatorParameter.Value; } 142 set { NcaSolutionCreatorParameter.Value = value; } 90 143 } 91 144 #endregion … … 96 149 public NcaAlgorithm() 97 150 : base() { 98 Parameters.Add(new FixedValueParameter<IntValue>("K", "The K for the nearest neighbor.", new IntValue(3))); 99 Parameters.Add(new FixedValueParameter<IntValue>("Dimensions", "The number of dimensions that NCA should reduce the data to.", new IntValue(2))); 100 Parameters.Add(new ConstrainedValueParameter<INCAInitializer>("Initialization", "Which method should be used to initialize the matrix. Typically LDA (linear discriminant analysis) should provide a good estimate.")); 101 Parameters.Add(new FixedValueParameter<IntValue>("NeighborSamples", "How many of the neighbors should be sampled in order to speed up the calculation. This should be at least the value of k and at most the number of training instances minus one.", new IntValue(60))); 102 Parameters.Add(new FixedValueParameter<IntValue>("Iterations", "How many iterations the conjugate gradient (CG) method should be allowed to perform. The method might still terminate earlier if a local optima has already been reached.", new IntValue(50))); 103 Parameters.Add(new FixedValueParameter<DoubleValue>("Regularization", "A non-negative paramter which can be set to increase generalization and avoid overfitting. If set to 0 the algorithm is similar to NCA as proposed by Goldberger et al.", new DoubleValue(0))); 104 105 INCAInitializer defaultInitializer = null; 106 foreach (var initializer in ApplicationManager.Manager.GetInstances<INCAInitializer>().OrderBy(x => x.ItemName)) { 107 if (initializer is LDAInitializer) defaultInitializer = initializer; 151 Parameters.Add(new ValueParameter<IntValue>(SeedParameterName, "The seed of the random number generator.", new IntValue(0))); 152 Parameters.Add(new ValueParameter<BoolValue>(SetSeedRandomlyParameterName, "A boolean flag that indicates whether the seed should be randomly reset each time the algorithm is run.", new BoolValue(true))); 153 Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The K for the nearest neighbor.", new IntValue(3))); 154 Parameters.Add(new FixedValueParameter<IntValue>(DimensionsParameterName, "The number of dimensions that NCA should reduce the data to.", new IntValue(2))); 155 Parameters.Add(new ConstrainedValueParameter<INcaInitializer>(InitializationParameterName, "Which method should be used to initialize the matrix. Typically LDA (linear discriminant analysis) should provide a good estimate.")); 156 Parameters.Add(new FixedValueParameter<IntValue>(NeighborSamplesParameterName, "How many of the neighbors should be sampled in order to speed up the calculation. This should be at least the value of k and at most the number of training instances minus one will be used.", new IntValue(60))); 157 Parameters.Add(new FixedValueParameter<IntValue>(IterationsParameterName, "How many iterations the conjugate gradient (CG) method should be allowed to perform. The method might still terminate earlier if a local optima has already been reached.", new IntValue(50))); 158 Parameters.Add(new FixedValueParameter<DoubleValue>(RegularizationParameterName, "A non-negative paramter which can be set to increase generalization and avoid overfitting. If set to 0 the algorithm is similar to NCA as proposed by Goldberger et al.", new DoubleValue(0))); 159 Parameters.Add(new ValueParameter<INcaModelCreator>(NcaModelCreatorParameterName, "Creates an NCA model out of the matrix.", new NcaModelCreator())); 160 Parameters.Add(new ValueParameter<INcaSolutionCreator>(NcaSolutionCreatorParameterName, "Creates an NCA solution given a model and some data.", new NcaSolutionCreator())); 161 Parameters.Add(new ValueParameter<BoolValue>(ApproximateGradientsParameterName, "True if the gradient should be approximated otherwise they are computed exactly.", new BoolValue())); 162 163 NcaSolutionCreatorParameter.Hidden = true; 164 ApproximateGradientsParameter.Hidden = true; 165 166 INcaInitializer defaultInitializer = null; 167 foreach (var initializer in ApplicationManager.Manager.GetInstances<INcaInitializer>().OrderBy(x => x.ItemName)) { 168 if (initializer is LdaInitializer) defaultInitializer = initializer; 108 169 InitializationParameter.ValidValues.Add(initializer); 109 170 } 110 171 if (defaultInitializer != null) InitializationParameter.Value = defaultInitializer; 111 172 173 var randomCreator = new RandomCreator(); 174 var ncaInitializer = new Placeholder(); 175 var bfgsInitializer = new LbfgsInitializer(); 176 var makeStep = new LbfgsMakeStep(); 177 var branch = new ConditionalBranch(); 178 var gradientCalculator = new NcaGradientCalculator(); 179 var modelCreator = new Placeholder(); 180 var updateResults = new LbfgsUpdateResults(); 181 var analyzer = new LbfgsAnalyzer(); 182 var finalModelCreator = new Placeholder(); 183 var finalAnalyzer = new LbfgsAnalyzer(); 184 var solutionCreator = new Placeholder(); 185 186 OperatorGraph.InitialOperator = randomCreator; 187 randomCreator.SeedParameter.ActualName = SeedParameterName; 188 randomCreator.SeedParameter.Value = null; 189 randomCreator.SetSeedRandomlyParameter.ActualName = SetSeedRandomlyParameterName; 190 randomCreator.SetSeedRandomlyParameter.Value = null; 191 randomCreator.Successor = ncaInitializer; 192 193 ncaInitializer.Name = "(NcaInitializer)"; 194 ncaInitializer.OperatorParameter.ActualName = InitializationParameterName; 195 ncaInitializer.Successor = bfgsInitializer; 196 197 bfgsInitializer.IterationsParameter.ActualName = IterationsParameterName; 198 bfgsInitializer.PointParameter.ActualName = NcaMatrixParameterName; 199 bfgsInitializer.ApproximateGradientsParameter.ActualName = ApproximateGradientsParameterName; 200 bfgsInitializer.Successor = makeStep; 201 202 makeStep.StateParameter.ActualName = bfgsInitializer.StateParameter.Name; 203 makeStep.PointParameter.ActualName = NcaMatrixParameterName; 204 makeStep.Successor = branch; 205 206 branch.ConditionParameter.ActualName = makeStep.TerminationCriterionParameter.Name; 207 branch.FalseBranch = gradientCalculator; 208 branch.TrueBranch = finalModelCreator; 209 210 gradientCalculator.Successor = modelCreator; 211 212 modelCreator.OperatorParameter.ActualName = NcaModelCreatorParameterName; 213 modelCreator.Successor = updateResults; 214 215 updateResults.StateParameter.ActualName = bfgsInitializer.StateParameter.Name; 216 updateResults.QualityParameter.ActualName = QualityParameterName; 217 updateResults.QualityGradientsParameter.ActualName = NcaMatrixGradientsParameterName; 218 updateResults.ApproximateGradientsParameter.ActualName = ApproximateGradientsParameterName; 219 updateResults.Successor = analyzer; 220 221 analyzer.QualityParameter.ActualName = QualityParameterName; 222 analyzer.PointParameter.ActualName = NcaMatrixParameterName; 223 analyzer.QualityGradientsParameter.ActualName = NcaMatrixGradientsParameterName; 224 analyzer.StateParameter.ActualName = bfgsInitializer.StateParameter.Name; 225 analyzer.PointsTableParameter.ActualName = "Matrix table"; 226 analyzer.QualityGradientsTableParameter.ActualName = "Gradients table"; 227 analyzer.QualitiesTableParameter.ActualName = "Qualities"; 228 analyzer.Successor = makeStep; 229 230 finalModelCreator.OperatorParameter.ActualName = NcaModelCreatorParameterName; 231 finalModelCreator.Successor = finalAnalyzer; 232 233 finalAnalyzer.QualityParameter.ActualName = QualityParameterName; 234 finalAnalyzer.PointParameter.ActualName = NcaMatrixParameterName; 235 finalAnalyzer.QualityGradientsParameter.ActualName = NcaMatrixGradientsParameterName; 236 finalAnalyzer.PointsTableParameter.ActualName = analyzer.PointsTableParameter.ActualName; 237 finalAnalyzer.QualityGradientsTableParameter.ActualName = analyzer.QualityGradientsTableParameter.ActualName; 238 finalAnalyzer.QualitiesTableParameter.ActualName = analyzer.QualitiesTableParameter.ActualName; 239 finalAnalyzer.Successor = solutionCreator; 240 241 solutionCreator.OperatorParameter.ActualName = NcaSolutionCreatorParameterName; 242 112 243 Problem = new ClassificationProblem(); 113 244 } … … 117 248 } 118 249 119 [StorableHook(HookType.AfterDeserialization)]120 private void AfterDeserialization() {121 if (!Parameters.ContainsKey("Regularization")) {122 Parameters.Add(new FixedValueParameter<DoubleValue>("Regularization", "A non-negative paramter which can be set to increase generalization and avoid overfitting. If set to 0 the algorithm is similar to NCA as proposed by Goldberger et al.", new DoubleValue(0)));123 }124 }125 126 250 public override void Prepare() { 127 251 if (Problem != null) base.Prepare(); 128 252 } 129 130 protected override void Run() {131 var initializer = InitializationParameter.Value;132 133 var clonedProblem = (IClassificationProblemData)Problem.ProblemData.Clone();134 var model = Train(clonedProblem, K, Dimensions, NeighborSamples, Regularization, Iterations, initializer.Initialize(clonedProblem, Dimensions), ReportQuality, CancellationToken.None);135 var solution = model.CreateClassificationSolution(clonedProblem);136 if (!Results.ContainsKey("ClassificationSolution"))137 Results.Add(new Result("ClassificationSolution", "The classification solution.", solution));138 else Results["ClassificationSolution"].Value = solution;139 }140 141 public static INcaClassificationSolution CreateClassificationSolution(IClassificationProblemData data, int k, int dimensions, int neighborSamples, double regularization, int iterations, INCAInitializer initializer) {142 var clonedProblem = (IClassificationProblemData)data.Clone();143 var model = Train(clonedProblem, k, dimensions, neighborSamples, regularization, iterations, initializer);144 return model.CreateClassificationSolution(clonedProblem);145 }146 147 public static INcaModel Train(IClassificationProblemData problemData, int k, int dimensions, int neighborSamples, double regularization, int iterations, INCAInitializer initializer) {148 return Train(problemData, k, dimensions, neighborSamples, regularization, iterations, initializer.Initialize(problemData, dimensions), null, CancellationToken.None);149 }150 151 public static INcaModel Train(IClassificationProblemData problemData, int k, int neighborSamples, double regularization, int iterations, double[,] initalMatrix) {152 var matrix = new double[initalMatrix.Length];153 for (int i = 0; i < initalMatrix.GetLength(0); i++)154 for (int j = 0; j < initalMatrix.GetLength(1); j++)155 matrix[i * initalMatrix.GetLength(1) + j] = initalMatrix[i, j];156 return Train(problemData, k, initalMatrix.GetLength(1), neighborSamples, regularization, iterations, matrix, null, CancellationToken.None);157 }158 159 private static INcaModel Train(IClassificationProblemData data, int k, int dimensions, int neighborSamples, double regularization, int iterations, double[] matrix, Reporter reporter, CancellationToken cancellation) {160 var scaling = new Scaling(data.Dataset, data.AllowedInputVariables, data.TrainingIndices);161 var scaledData = AlglibUtil.PrepareAndScaleInputMatrix(data.Dataset, data.AllowedInputVariables, data.TrainingIndices, scaling);162 var classes = data.Dataset.GetDoubleValues(data.TargetVariable, data.TrainingIndices).ToArray();163 var attributes = scaledData.GetLength(1);164 165 alglib.mincgstate state;166 alglib.mincgreport rep;167 alglib.mincgcreate(matrix, out state);168 alglib.mincgsetcond(state, 0, 0, 0, iterations);169 alglib.mincgsetxrep(state, true);170 //alglib.mincgsetgradientcheck(state, 0.01);171 int neighborSampleSize = neighborSamples;172 Optimize(state, scaledData, classes, dimensions, neighborSampleSize, regularization, cancellation, reporter);173 alglib.mincgresults(state, out matrix, out rep);174 if (rep.terminationtype == -7) throw new InvalidOperationException("Gradient verification failed.");175 176 var transformationMatrix = new double[attributes, dimensions];177 var counter = 0;178 for (var i = 0; i < attributes; i++)179 for (var j = 0; j < dimensions; j++)180 transformationMatrix[i, j] = matrix[counter++];181 182 return new NcaModel(k, transformationMatrix, data.Dataset, data.TrainingIndices, data.TargetVariable, data.AllowedInputVariables, scaling, data.ClassValues.ToArray());183 }184 185 private static void Optimize(alglib.mincgstate state, double[,] data, double[] classes, int dimensions, int neighborSampleSize, double lambda, CancellationToken cancellation, Reporter reporter) {186 while (alglib.mincgiteration(state)) {187 if (cancellation.IsCancellationRequested) break;188 if (state.needfg) {189 Gradient(state.x, ref state.innerobj.f, state.innerobj.g, data, classes, dimensions, neighborSampleSize, lambda);190 continue;191 }192 if (state.innerobj.xupdated) {193 if (reporter != null)194 reporter(state.innerobj.f, state.innerobj.x, state.innerobj.g);195 continue;196 }197 throw new InvalidOperationException("Neighborhood Components Analysis: Error in Optimize() (some derivatives were not provided?)");198 }199 }200 201 private static void Gradient(double[] A, ref double func, double[] grad, double[,] data, double[] classes, int dimensions, int neighborSampleSize, double lambda) {202 var instances = data.GetLength(0);203 var attributes = data.GetLength(1);204 205 var AMatrix = new Matrix(A, A.Length / dimensions, dimensions);206 207 alglib.sparsematrix probabilities;208 alglib.sparsecreate(instances, instances, out probabilities);209 var transformedDistances = new Dictionary<int, double>(instances);210 for (int i = 0; i < instances; i++) {211 var iVector = new Matrix(GetRow(data, i), data.GetLength(1));212 for (int k = 0; k < instances; k++) {213 if (k == i) {214 transformedDistances.Remove(k);215 continue;216 }217 var kVector = new Matrix(GetRow(data, k));218 transformedDistances[k] = Math.Exp(-iVector.Multiply(AMatrix).Subtract(kVector.Multiply(AMatrix)).SumOfSquares());219 }220 var normalization = transformedDistances.Sum(x => x.Value);221 if (normalization <= 0) continue;222 foreach (var s in transformedDistances.Where(x => x.Value > 0).OrderByDescending(x => x.Value).Take(neighborSampleSize)) {223 alglib.sparseset(probabilities, i, s.Key, s.Value / normalization);224 }225 }226 alglib.sparseconverttocrs(probabilities); // needed to enumerate in order (top-down and left-right)227 228 int t0 = 0, t1 = 0, r, c;229 double val;230 var pi = new double[instances];231 while (alglib.sparseenumerate(probabilities, ref t0, ref t1, out r, out c, out val)) {232 if (classes[r].IsAlmost(classes[c])) {233 pi[r] += val;234 }235 }236 237 var innerSum = new double[attributes, attributes];238 while (alglib.sparseenumerate(probabilities, ref t0, ref t1, out r, out c, out val)) {239 var vector = new Matrix(GetRow(data, r)).Subtract(new Matrix(GetRow(data, c)));240 vector.OuterProduct(vector).Multiply(val * pi[r]).AddTo(innerSum);241 242 if (classes[r].IsAlmost(classes[c])) {243 vector.OuterProduct(vector).Multiply(-val).AddTo(innerSum);244 }245 }246 247 func = -pi.Sum() + lambda * AMatrix.SumOfSquares();248 249 r = 0;250 var newGrad = AMatrix.Multiply(-2.0).Transpose().Multiply(new Matrix(innerSum)).Transpose();251 foreach (var g in newGrad) {252 grad[r] = g + lambda * 2 * A[r];253 r++;254 }255 }256 257 private void ReportQuality(double func, double[] coefficients, double[] gradients) {258 var instances = Problem.ProblemData.TrainingIndices.Count();259 DataTable qualities;260 if (!Results.ContainsKey("Optimization")) {261 qualities = new DataTable("Optimization");262 qualities.Rows.Add(new DataRow("Quality", string.Empty));263 Results.Add(new Result("Optimization", qualities));264 } else qualities = (DataTable)Results["Optimization"].Value;265 qualities.Rows["Quality"].Values.Add(-func / instances);266 267 string[] attributNames = Problem.ProblemData.AllowedInputVariables.ToArray();268 if (gradients != null) {269 DataTable grads;270 if (!Results.ContainsKey("Gradients")) {271 grads = new DataTable("Gradients");272 for (int i = 0; i < gradients.Length; i++)273 grads.Rows.Add(new DataRow(attributNames[i / Dimensions] + "-" + (i % Dimensions), string.Empty));274 Results.Add(new Result("Gradients", grads));275 } else grads = (DataTable)Results["Gradients"].Value;276 for (int i = 0; i < gradients.Length; i++)277 grads.Rows[attributNames[i / Dimensions] + "-" + (i % Dimensions)].Values.Add(gradients[i]);278 }279 280 if (!Results.ContainsKey("Quality")) {281 Results.Add(new Result("Quality", new DoubleValue(-func / instances)));282 } else ((DoubleValue)Results["Quality"].Value).Value = -func / instances;283 284 var attributes = attributNames.Length;285 var transformationMatrix = new double[attributes, Dimensions];286 var counter = 0;287 for (var i = 0; i < attributes; i++)288 for (var j = 0; j < Dimensions; j++)289 transformationMatrix[i, j] = coefficients[counter++];290 291 var scaling = new Scaling(Problem.ProblemData.Dataset, attributNames, Problem.ProblemData.TrainingIndices);292 var model = new NcaModel(K, transformationMatrix, Problem.ProblemData.Dataset, Problem.ProblemData.TrainingIndices, Problem.ProblemData.TargetVariable, attributNames, scaling, Problem.ProblemData.ClassValues.ToArray());293 294 IClassificationSolution solution = model.CreateClassificationSolution(Problem.ProblemData);295 if (!Results.ContainsKey("ClassificationSolution")) {296 Results.Add(new Result("ClassificationSolution", solution));297 } else {298 Results["ClassificationSolution"].Value = solution;299 }300 }301 302 private static IEnumerable<double> GetRow(double[,] data, int row) {303 for (int i = 0; i < data.GetLength(1); i++)304 yield return data[row, i];305 }306 307 253 } 308 254 }
Note: See TracChangeset
for help on using the changeset viewer.