Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
08/08/12 16:46:53 (12 years ago)
Author:
abeham
Message:

#1913:

  • Improved speed of NCA
  • Reorganized things
File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/NCA/HeuristicLab.Algorithms.NCA/3.3/NeighborhoodComponentsAnalysis.cs

    r8425 r8437  
    1 #region License Information
     1#region License Information
    22/* HeuristicLab
    33 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     
    2020#endregion
    2121
    22 using System;
    23 using System.Collections.Generic;
    2422using System.Linq;
     23using HeuristicLab.Algorithms.DataAnalysis;
    2524using HeuristicLab.Common;
     25using HeuristicLab.Core;
     26using HeuristicLab.Data;
     27using HeuristicLab.Optimization;
     28using HeuristicLab.Parameters;
     29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     30using HeuristicLab.PluginInfrastructure;
    2631using HeuristicLab.Problems.DataAnalysis;
    2732
    2833namespace HeuristicLab.Algorithms.NCA {
    29   public class NeighborhoodComponentsAnalysis {
     34  /// <summary>
     35  /// Neighborhood Components Analysis
     36  /// </summary>
     37  [Item("Neighborhood Components Analysis", "NCA is described in J. Goldberger, S. Roweis, G. Hinton, R. Salakhutdinov. 2005. Neighbourhood Component Analysis. Advances in Neural Information Processing Systems, 17. pp. 513-520.")]
     38  [Creatable("Data Analysis")]
     39  [StorableClass]
     40  public sealed class NeighborhoodComponentsAnalysis : FixedDataAnalysisAlgorithm<IClassificationProblem> {
     41    #region Parameter Properties
     42    public IValueLookupParameter<IntValue> KParameter {
     43      get { return (IValueLookupParameter<IntValue>)Parameters["k"]; }
     44    }
     45    public IValueLookupParameter<IntValue> ReduceDimensionsParameter {
     46      get { return (IValueLookupParameter<IntValue>)Parameters["ReduceDimensions"]; }
     47    }
     48    private IConstrainedValueParameter<INCAInitializer> InitializationParameter {
     49      get { return (IConstrainedValueParameter<INCAInitializer>)Parameters["Initialization"]; }
     50    }
     51    #endregion
    3052
    31     public static INCAModel Train(IClassificationProblemData data, int k, int reduceDimensions, INCAInitializer initializer) {
    32       var instances = data.TrainingIndices.Count();
    33       var attributes = data.AllowedInputVariables.Count();
     53    #region Properties
     54    public IntValue K {
     55      get { return KParameter.Value; }
     56    }
     57    public IntValue ReduceDimensions {
     58      get { return ReduceDimensionsParameter.Value; }
     59    }
     60    #endregion
    3461
    35       double[] matrix = initializer.Initialize(data, reduceDimensions);
     62    [StorableConstructor]
     63    private NeighborhoodComponentsAnalysis(bool deserializing) : base(deserializing) { }
     64    private NeighborhoodComponentsAnalysis(NeighborhoodComponentsAnalysis original, Cloner cloner) : base(original, cloner) { }
     65    public NeighborhoodComponentsAnalysis()
     66      : base() {
     67      Parameters.Add(new ValueLookupParameter<IntValue>("k", "The k for the nearest neighbor.", new IntValue(1)));
     68      Parameters.Add(new ValueLookupParameter<IntValue>("ReduceDimensions", "The number of dimensions that NCA should reduce the data to.", new IntValue(2)));
     69      Parameters.Add(new ConstrainedValueParameter<INCAInitializer>("Initialization", "Which method should be used to initialize the matrix. Typically LDA (linear discriminant analysis) should provide a good estimate."));
    3670
    37       alglib.mincgstate state;
    38       alglib.mincgreport rep;
     71      INCAInitializer defaultInitializer = null;
     72      foreach (var initializer in ApplicationManager.Manager.GetInstances<INCAInitializer>().OrderBy(x => x.ItemName)) {
     73        if (initializer is LDAInitializer) defaultInitializer = initializer;
     74        InitializationParameter.ValidValues.Add(initializer);
     75      }
     76      if (defaultInitializer != null) InitializationParameter.Value = defaultInitializer;
    3977
    40       // first run
    41       alglib.mincgcreate(matrix, out state);
    42       alglib.mincgsetcond(state, 0.0000000001, 0, 0, 0);
    43       alglib.mincgoptimize(state, Gradient, null, new OptimizationInfo(data, reduceDimensions));
    44       alglib.mincgresults(state, out matrix, out rep);
    45 
    46       var transformationMatrix = new double[attributes, reduceDimensions];
    47       var counter = 0;
    48       for (var i = 0; i < attributes; i++)
    49         for (var j = 0; j < reduceDimensions; j++)
    50           transformationMatrix[i, j] = matrix[counter++];
    51 
    52       var transformedTrainingset = new double[instances, reduceDimensions];
    53       var rowCount = 0;
    54       foreach (var r in data.TrainingIndices) {
    55         var i = 0;
    56         foreach (var v in data.AllowedInputVariables) {
    57           var val = data.Dataset.GetDoubleValue(v, r);
    58           for (var j = 0; j < reduceDimensions; j++)
    59             transformedTrainingset[rowCount, j] += val * transformationMatrix[i, j];
    60           i++;
    61         }
    62         rowCount++;
    63       }
    64 
    65       return new NCAModel(transformedTrainingset, transformationMatrix, k, data.TargetVariable, data.AllowedInputVariables,
    66         data.Dataset.GetDoubleValues(data.TargetVariable)
    67           .Select((v, i) => new { I = i, V = v })
    68           .Where(x => x.I >= data.TrainingPartition.Start && x.I < data.TrainingPartition.End
    69               && !(x.I >= data.TestPartition.Start && x.I < data.TestPartition.End))
    70           .Select(x => x.V).ToArray());
     78      Problem = new ClassificationProblem();
    7179    }
    7280
    73     private static void Gradient(double[] A, ref double func, double[] grad, object obj) {
    74       var info = (OptimizationInfo)obj;
    75       var instances = info.ProblemData.TrainingIndices.ToArray();
    76       var attributes = info.ProblemData.AllowedInputVariables.Count();
    77       var AMatrix = new Matrix(A, A.Length / info.ReduceDimensions, info.ReduceDimensions);
    78 
    79       alglib.sparsematrix probabilities;
    80       alglib.sparsecreate(instances.Length, instances.Length, out probabilities);
    81       var distances = new double[instances.Length];
    82       for (int i = 0; i < instances.Length - 1; i++) {
    83         var iVector = new Matrix(GetRow(info.ProblemData, instances[i]));
    84         var denom = 0.0;
    85         for (int k = 0; k < instances.Length; k++) {
    86           if (k == i) continue;
    87           var kVector = new Matrix(GetRow(info.ProblemData, instances[k]));
    88           distances[k] = iVector.Multiply(AMatrix).Subtract(kVector.Multiply(AMatrix)).Length();
    89           denom += Math.Exp(-(distances[k] * distances[k]));
    90         }
    91         if (denom > 0) {
    92           for (int j = i + 1; j < instances.Length; j++) {
    93             if (i == j) continue;
    94             var v = Math.Exp(-(distances[j] * distances[j])) / denom;
    95             alglib.sparseset(probabilities, i, j, v);
    96             alglib.sparseset(probabilities, j, i, v);
    97           }
    98         }
    99       }
    100       alglib.sparseconverttocrs(probabilities); // needed to enumerate in order (top-down and left-right)
    101 
    102       int t0 = 0, t1 = 0, r, c;
    103       double val;
    104       var classes = info.ProblemData.Dataset.GetDoubleValues(info.ProblemData.TargetVariable, instances).ToArray();
    105       var pi = new double[instances.Length];
    106       while (alglib.sparseenumerate(probabilities, ref t0, ref t1, out r, out c, out val)) {
    107         if (classes[r].IsAlmost(classes[c]))
    108           pi[r] += val;
    109       }
    110 
    111       var innerSum = new double[attributes, attributes];
    112       while (alglib.sparseenumerate(probabilities, ref t0, ref t1, out r, out c, out val)) {
    113         var vector = new Matrix(GetRow(info.ProblemData, instances[r])).Subtract(new Matrix(GetRow(info.ProblemData, instances[c]))).Apply();
    114         vector.OuterProduct(vector).Multiply(val * pi[r]).AddTo(innerSum);
    115 
    116         if (classes[r].IsAlmost(classes[c])) {
    117           vector.OuterProduct(vector).Multiply(-val).AddTo(innerSum);
    118         }
    119       }
    120 
    121       func = -pi.Sum();
    122 
    123       grad = AMatrix.Multiply(-2.0).Transpose().Multiply(new Matrix(innerSum)).Transpose().ToArray();
     81    public override IDeepCloneable Clone(Cloner cloner) {
     82      return new NeighborhoodComponentsAnalysis(this, cloner);
    12483    }
    12584
    126     private static IEnumerable<double> GetRow(IClassificationProblemData data, int row) {
    127       return data.AllowedInputVariables.Select(v => data.Dataset.GetDoubleValue(v, row));
     85    public override void Prepare() {
     86      if (Problem != null) base.Prepare();
    12887    }
    12988
    130     public static NCAClassificationSolution CreateNCASolution(IClassificationProblemData problemData, int k, int reduceDimensions, INCAInitializer initializer) {
    131       return new NCAClassificationSolution(problemData, Train(problemData, k, reduceDimensions, initializer));
    132     }
     89    protected override void Run() {
     90      var k = K.Value;
     91      var dimensions = ReduceDimensions.Value;
     92      var initializer = InitializationParameter.Value;
    13393
    134     private class OptimizationInfo {
    135       public IClassificationProblemData ProblemData { get; set; }
    136       public int ReduceDimensions { get; set; }
    137       public OptimizationInfo(IClassificationProblemData problem, int reduceDimensions) {
    138         this.ProblemData = problem;
    139         this.ReduceDimensions = reduceDimensions;
    140       }
     94      var clonedProblem = (IClassificationProblemData)Problem.ProblemData.Clone();
     95      var classification = new NCAClassificationSolution(clonedProblem, Auxiliary.Train(clonedProblem, k, dimensions, initializer));
     96      Results.Add(new Result("ClassificationSolution", "The classification solution.", classification));
     97      // TODO: result that shows the LOO performance
    14198    }
    14299  }
Note: See TracChangeset for help on using the changeset viewer.