Free cookie consent management tool by TermsFeed Policy Generator

Changeset 8437


Ignore:
Timestamp:
08/08/12 16:46:53 (12 years ago)
Author:
abeham
Message:

#1913:

  • Improved speed of NCA
  • Reorganized things
Location:
branches/NCA
Files:
5 added
4 deleted
10 edited

Legend:

Unmodified
Added
Removed
  • branches/NCA/HeuristicLab.Algorithms.NCA.Tests/MatrixTest.cs

    r8424 r8437  
    122122    public void LengthTest() {
    123123      var vector = new double[] { 5, 4, 2, 2 };
    124       Assert.AreEqual(new Matrix_Accessor(vector, vector.Length).Length(), 7);
     124      Assert.AreEqual(new Matrix_Accessor(vector, vector.Length).VectorLength(), 7);
    125125      vector = new double[] { 5, -4, 2, 2 };
    126       Assert.AreEqual(new Matrix_Accessor(vector, vector.Length).Length(), 7);
     126      Assert.AreEqual(new Matrix_Accessor(vector, vector.Length).VectorLength(), 7);
    127127      vector = new double[] { 3, 2, 1, 5 };
    128       Assert.IsTrue(new Matrix_Accessor(vector, vector.Length).Length().IsAlmost(6.2449979983983982058468931209398));
     128      Assert.IsTrue(new Matrix_Accessor(vector, vector.Length).VectorLength().IsAlmost(6.2449979983983982058468931209398));
    129129    }
    130130
  • branches/NCA/HeuristicLab.Algorithms.NCA.Views/3.3/HeuristicLab.Algorithms.NCA.Views-3.3.csproj

    r8427 r8437  
    168168  <ItemGroup>
    169169    <None Include="Plugin.cs.frame" />
    170     <Compile Include="NCADimensionalityReductionView.cs">
     170    <Compile Include="NCADimensionReductionView.cs">
    171171      <SubType>UserControl</SubType>
    172172    </Compile>
    173     <Compile Include="NCADimensionalityReductionView.Designer.cs">
    174       <DependentUpon>NCADimensionalityReductionView.cs</DependentUpon>
     173    <Compile Include="NCADimensionReductionView.Designer.cs">
     174      <DependentUpon>NCADimensionReductionView.cs</DependentUpon>
    175175    </Compile>
    176176    <Compile Include="Plugin.cs" />
     
    188188  </ItemGroup>
    189189  <ItemGroup>
    190     <EmbeddedResource Include="NCADimensionalityReductionView.resx">
    191       <DependentUpon>NCADimensionalityReductionView.cs</DependentUpon>
     190    <EmbeddedResource Include="NCADimensionReductionView.resx">
     191      <DependentUpon>NCADimensionReductionView.cs</DependentUpon>
    192192    </EmbeddedResource>
    193193  </ItemGroup>
  • branches/NCA/HeuristicLab.Algorithms.NCA/3.3/HeuristicLab.Algorithms.NCA-3.3.csproj

    r8425 r8437  
    9999  </ItemGroup>
    100100  <ItemGroup>
     101    <Compile Include="INCAClassificationSolution.cs" />
    101102    <Compile Include="Initialization\INCAInitializer.cs" />
    102103    <Compile Include="INCAModel.cs" />
     
    105106    <Compile Include="Initialization\RandomInitializer.cs" />
    106107    <Compile Include="Matrix.cs" />
    107     <Compile Include="NCA.cs" />
     108    <Compile Include="NeighborhoodComponentsAnalysis.cs" />
    108109    <Compile Include="NCAClassificationSolution.cs" />
    109110    <Compile Include="NCAModel.cs" />
    110     <Compile Include="NeighborhoodComponentsAnalysis.cs" />
     111    <Compile Include="Auxiliary.cs" />
    111112    <Compile Include="Plugin.cs" />
    112113    <Compile Include="Properties\AssemblyInfo.cs" />
  • branches/NCA/HeuristicLab.Algorithms.NCA/3.3/INCAModel.cs

    r8427 r8437  
    2020#endregion
    2121
     22using System.Collections.Generic;
    2223using HeuristicLab.Problems.DataAnalysis;
    2324
    2425namespace HeuristicLab.Algorithms.NCA {
    2526  public interface INCAModel : IClassificationModel {
    26     double[,] TransformationMatrix { get; }
     27    double[,] Reduce(Dataset dataset, IEnumerable<int> rows);
    2728  }
    2829}
  • branches/NCA/HeuristicLab.Algorithms.NCA/3.3/Initialization/RandomInitializer.cs

    r8425 r8437  
    7474      var matrix = new double[attributes * dimensions];
    7575      for (int i = 0; i < matrix.Length; i++)
    76         matrix[i] = random.NextDouble() * range[i / dimensions];
     76        matrix[i] = random.NextDouble() / range[i / dimensions];
    7777
    7878      return matrix;
  • branches/NCA/HeuristicLab.Algorithms.NCA/3.3/Matrix.cs

    r8424 r8437  
    125125    }
    126126
    127     public double Length() {
     127    public double VectorLength() {
     128      return Math.Sqrt(SquaredVectorLength());
     129    }
     130
     131    public double SquaredVectorLength() {
    128132      if (Rows != 1) throw new ArgumentException("Length only works on vectors.");
    129       return Math.Sqrt(values.Sum(x => x * x));
     133      return values.Sum(x => x * x);
    130134    }
    131135
  • branches/NCA/HeuristicLab.Algorithms.NCA/3.3/NCAClassificationSolution.cs

    r8412 r8437  
    2828  [Item("NCAClassificationSolution", "")]
    2929  [StorableClass]
    30   public class NCAClassificationSolution : ClassificationSolution {
     30  public class NCAClassificationSolution : ClassificationSolution, INCAClassificationSolution {
    3131
    3232    public new INCAModel Model {
  • branches/NCA/HeuristicLab.Algorithms.NCA/3.3/NCAModel.cs

    r8420 r8437  
    129129      return CreateClassificationSolution(problemData);
    130130    }
     131
     132    public double[,] Reduce(Dataset dataset, IEnumerable<int> rows) {
     133      var result = new double[rows.Count(), transformationMatrix.GetLength(1)];
     134      int v = 0;
     135      foreach (var r in rows) {
     136        int i = 0;
     137        foreach (var variable in allowedInputVariables) {
     138          double val = dataset.GetDoubleValue(variable, r);
     139          for (int j = 0; j < result.GetLength(1); j++)
     140            result[v, j] += val * transformationMatrix[i, j];
     141          i++;
     142        }
     143        v++;
     144      }
     145      return result;
     146    }
    131147  }
    132148}
  • branches/NCA/HeuristicLab.Algorithms.NCA/3.3/NeighborhoodComponentsAnalysis.cs

    r8425 r8437  
    1 #region License Information
     1#region License Information
    22/* HeuristicLab
    33 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     
    2020#endregion
    2121
    22 using System;
    23 using System.Collections.Generic;
    2422using System.Linq;
     23using HeuristicLab.Algorithms.DataAnalysis;
    2524using HeuristicLab.Common;
     25using HeuristicLab.Core;
     26using HeuristicLab.Data;
     27using HeuristicLab.Optimization;
     28using HeuristicLab.Parameters;
     29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
     30using HeuristicLab.PluginInfrastructure;
    2631using HeuristicLab.Problems.DataAnalysis;
    2732
    2833namespace HeuristicLab.Algorithms.NCA {
    29   public class NeighborhoodComponentsAnalysis {
     34  /// <summary>
     35  /// Neighborhood Components Analysis
     36  /// </summary>
     37  [Item("Neighborhood Components Analysis", "NCA is described in J. Goldberger, S. Roweis, G. Hinton, R. Salakhutdinov. 2005. Neighbourhood Component Analysis. Advances in Neural Information Processing Systems, 17. pp. 513-520.")]
     38  [Creatable("Data Analysis")]
     39  [StorableClass]
     40  public sealed class NeighborhoodComponentsAnalysis : FixedDataAnalysisAlgorithm<IClassificationProblem> {
     41    #region Parameter Properties
     42    public IValueLookupParameter<IntValue> KParameter {
     43      get { return (IValueLookupParameter<IntValue>)Parameters["k"]; }
     44    }
     45    public IValueLookupParameter<IntValue> ReduceDimensionsParameter {
     46      get { return (IValueLookupParameter<IntValue>)Parameters["ReduceDimensions"]; }
     47    }
     48    private IConstrainedValueParameter<INCAInitializer> InitializationParameter {
     49      get { return (IConstrainedValueParameter<INCAInitializer>)Parameters["Initialization"]; }
     50    }
     51    #endregion
    3052
    31     public static INCAModel Train(IClassificationProblemData data, int k, int reduceDimensions, INCAInitializer initializer) {
    32       var instances = data.TrainingIndices.Count();
    33       var attributes = data.AllowedInputVariables.Count();
     53    #region Properties
     54    public IntValue K {
     55      get { return KParameter.Value; }
     56    }
     57    public IntValue ReduceDimensions {
     58      get { return ReduceDimensionsParameter.Value; }
     59    }
     60    #endregion
    3461
    35       double[] matrix = initializer.Initialize(data, reduceDimensions);
     62    [StorableConstructor]
     63    private NeighborhoodComponentsAnalysis(bool deserializing) : base(deserializing) { }
     64    private NeighborhoodComponentsAnalysis(NeighborhoodComponentsAnalysis original, Cloner cloner) : base(original, cloner) { }
     65    public NeighborhoodComponentsAnalysis()
     66      : base() {
     67      Parameters.Add(new ValueLookupParameter<IntValue>("k", "The k for the nearest neighbor.", new IntValue(1)));
     68      Parameters.Add(new ValueLookupParameter<IntValue>("ReduceDimensions", "The number of dimensions that NCA should reduce the data to.", new IntValue(2)));
     69      Parameters.Add(new ConstrainedValueParameter<INCAInitializer>("Initialization", "Which method should be used to initialize the matrix. Typically LDA (linear discriminant analysis) should provide a good estimate."));
    3670
    37       alglib.mincgstate state;
    38       alglib.mincgreport rep;
     71      INCAInitializer defaultInitializer = null;
     72      foreach (var initializer in ApplicationManager.Manager.GetInstances<INCAInitializer>().OrderBy(x => x.ItemName)) {
     73        if (initializer is LDAInitializer) defaultInitializer = initializer;
     74        InitializationParameter.ValidValues.Add(initializer);
     75      }
     76      if (defaultInitializer != null) InitializationParameter.Value = defaultInitializer;
    3977
    40       // first run
    41       alglib.mincgcreate(matrix, out state);
    42       alglib.mincgsetcond(state, 0.0000000001, 0, 0, 0);
    43       alglib.mincgoptimize(state, Gradient, null, new OptimizationInfo(data, reduceDimensions));
    44       alglib.mincgresults(state, out matrix, out rep);
    45 
    46       var transformationMatrix = new double[attributes, reduceDimensions];
    47       var counter = 0;
    48       for (var i = 0; i < attributes; i++)
    49         for (var j = 0; j < reduceDimensions; j++)
    50           transformationMatrix[i, j] = matrix[counter++];
    51 
    52       var transformedTrainingset = new double[instances, reduceDimensions];
    53       var rowCount = 0;
    54       foreach (var r in data.TrainingIndices) {
    55         var i = 0;
    56         foreach (var v in data.AllowedInputVariables) {
    57           var val = data.Dataset.GetDoubleValue(v, r);
    58           for (var j = 0; j < reduceDimensions; j++)
    59             transformedTrainingset[rowCount, j] += val * transformationMatrix[i, j];
    60           i++;
    61         }
    62         rowCount++;
    63       }
    64 
    65       return new NCAModel(transformedTrainingset, transformationMatrix, k, data.TargetVariable, data.AllowedInputVariables,
    66         data.Dataset.GetDoubleValues(data.TargetVariable)
    67           .Select((v, i) => new { I = i, V = v })
    68           .Where(x => x.I >= data.TrainingPartition.Start && x.I < data.TrainingPartition.End
    69               && !(x.I >= data.TestPartition.Start && x.I < data.TestPartition.End))
    70           .Select(x => x.V).ToArray());
     78      Problem = new ClassificationProblem();
    7179    }
    7280
    73     private static void Gradient(double[] A, ref double func, double[] grad, object obj) {
    74       var info = (OptimizationInfo)obj;
    75       var instances = info.ProblemData.TrainingIndices.ToArray();
    76       var attributes = info.ProblemData.AllowedInputVariables.Count();
    77       var AMatrix = new Matrix(A, A.Length / info.ReduceDimensions, info.ReduceDimensions);
    78 
    79       alglib.sparsematrix probabilities;
    80       alglib.sparsecreate(instances.Length, instances.Length, out probabilities);
    81       var distances = new double[instances.Length];
    82       for (int i = 0; i < instances.Length - 1; i++) {
    83         var iVector = new Matrix(GetRow(info.ProblemData, instances[i]));
    84         var denom = 0.0;
    85         for (int k = 0; k < instances.Length; k++) {
    86           if (k == i) continue;
    87           var kVector = new Matrix(GetRow(info.ProblemData, instances[k]));
    88           distances[k] = iVector.Multiply(AMatrix).Subtract(kVector.Multiply(AMatrix)).Length();
    89           denom += Math.Exp(-(distances[k] * distances[k]));
    90         }
    91         if (denom > 0) {
    92           for (int j = i + 1; j < instances.Length; j++) {
    93             if (i == j) continue;
    94             var v = Math.Exp(-(distances[j] * distances[j])) / denom;
    95             alglib.sparseset(probabilities, i, j, v);
    96             alglib.sparseset(probabilities, j, i, v);
    97           }
    98         }
    99       }
    100       alglib.sparseconverttocrs(probabilities); // needed to enumerate in order (top-down and left-right)
    101 
    102       int t0 = 0, t1 = 0, r, c;
    103       double val;
    104       var classes = info.ProblemData.Dataset.GetDoubleValues(info.ProblemData.TargetVariable, instances).ToArray();
    105       var pi = new double[instances.Length];
    106       while (alglib.sparseenumerate(probabilities, ref t0, ref t1, out r, out c, out val)) {
    107         if (classes[r].IsAlmost(classes[c]))
    108           pi[r] += val;
    109       }
    110 
    111       var innerSum = new double[attributes, attributes];
    112       while (alglib.sparseenumerate(probabilities, ref t0, ref t1, out r, out c, out val)) {
    113         var vector = new Matrix(GetRow(info.ProblemData, instances[r])).Subtract(new Matrix(GetRow(info.ProblemData, instances[c]))).Apply();
    114         vector.OuterProduct(vector).Multiply(val * pi[r]).AddTo(innerSum);
    115 
    116         if (classes[r].IsAlmost(classes[c])) {
    117           vector.OuterProduct(vector).Multiply(-val).AddTo(innerSum);
    118         }
    119       }
    120 
    121       func = -pi.Sum();
    122 
    123       grad = AMatrix.Multiply(-2.0).Transpose().Multiply(new Matrix(innerSum)).Transpose().ToArray();
     81    public override IDeepCloneable Clone(Cloner cloner) {
     82      return new NeighborhoodComponentsAnalysis(this, cloner);
    12483    }
    12584
    126     private static IEnumerable<double> GetRow(IClassificationProblemData data, int row) {
    127       return data.AllowedInputVariables.Select(v => data.Dataset.GetDoubleValue(v, row));
     85    public override void Prepare() {
     86      if (Problem != null) base.Prepare();
    12887    }
    12988
    130     public static NCAClassificationSolution CreateNCASolution(IClassificationProblemData problemData, int k, int reduceDimensions, INCAInitializer initializer) {
    131       return new NCAClassificationSolution(problemData, Train(problemData, k, reduceDimensions, initializer));
    132     }
     89    protected override void Run() {
     90      var k = K.Value;
     91      var dimensions = ReduceDimensions.Value;
     92      var initializer = InitializationParameter.Value;
    13393
    134     private class OptimizationInfo {
    135       public IClassificationProblemData ProblemData { get; set; }
    136       public int ReduceDimensions { get; set; }
    137       public OptimizationInfo(IClassificationProblemData problem, int reduceDimensions) {
    138         this.ProblemData = problem;
    139         this.ReduceDimensions = reduceDimensions;
    140       }
     94      var clonedProblem = (IClassificationProblemData)Problem.ProblemData.Clone();
     95      var classification = new NCAClassificationSolution(clonedProblem, Auxiliary.Train(clonedProblem, k, dimensions, initializer));
     96      Results.Add(new Result("ClassificationSolution", "The classification solution.", classification));
     97      // TODO: result that shows the LOO performance
    14198    }
    14299  }
  • branches/NCA/NCA.sln

    r8422 r8437  
    33# Visual Studio 2010
    44Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.Algorithms.NCA-3.3", "HeuristicLab.Algorithms.NCA\3.3\HeuristicLab.Algorithms.NCA-3.3.csproj", "{FD81A5D6-051D-4BD1-B2D8-668A820FAD6E}"
    5 EndProject
    6 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.Algorithms.NCA.Views-3.3", "HeuristicLab.Algorithms.NCA.Views\3.3\HeuristicLab.Algorithms.NCA.Views-3.3.csproj", "{07654A26-5964-4079-B023-5548B1EB1D1E}"
    75EndProject
    86Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{30CDFA65-3AD8-42A4-801E-0F785CE9DF16}"
     
    1513EndProject
    1614Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.Algorithms.NCA.Tests", "HeuristicLab.Algorithms.NCA.Tests\HeuristicLab.Algorithms.NCA.Tests.csproj", "{910C130C-378D-422B-96B7-0484F7CC66B7}"
     15EndProject
     16Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.Algorithms.NCA.Views-3.3", "HeuristicLab.Algorithms.NCA.Views\3.3\HeuristicLab.Algorithms.NCA.Views-3.3.csproj", "{07654A26-5964-4079-B023-5548B1EB1D1E}"
    1717EndProject
    1818Global
     
    3737    {FD81A5D6-051D-4BD1-B2D8-668A820FAD6E}.Release|x64.ActiveCfg = Release|Any CPU
    3838    {FD81A5D6-051D-4BD1-B2D8-668A820FAD6E}.Release|x86.ActiveCfg = Release|Any CPU
     39    {910C130C-378D-422B-96B7-0484F7CC66B7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
     40    {910C130C-378D-422B-96B7-0484F7CC66B7}.Debug|Any CPU.Build.0 = Debug|Any CPU
     41    {910C130C-378D-422B-96B7-0484F7CC66B7}.Debug|x64.ActiveCfg = Debug|Any CPU
     42    {910C130C-378D-422B-96B7-0484F7CC66B7}.Debug|x86.ActiveCfg = Debug|Any CPU
     43    {910C130C-378D-422B-96B7-0484F7CC66B7}.Release|Any CPU.ActiveCfg = Release|Any CPU
     44    {910C130C-378D-422B-96B7-0484F7CC66B7}.Release|Any CPU.Build.0 = Release|Any CPU
     45    {910C130C-378D-422B-96B7-0484F7CC66B7}.Release|x64.ActiveCfg = Release|Any CPU
     46    {910C130C-378D-422B-96B7-0484F7CC66B7}.Release|x86.ActiveCfg = Release|Any CPU
    3947    {07654A26-5964-4079-B023-5548B1EB1D1E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
    4048    {07654A26-5964-4079-B023-5548B1EB1D1E}.Debug|Any CPU.Build.0 = Debug|Any CPU
     
    4957    {07654A26-5964-4079-B023-5548B1EB1D1E}.Release|x86.ActiveCfg = Release|x86
    5058    {07654A26-5964-4079-B023-5548B1EB1D1E}.Release|x86.Build.0 = Release|x86
    51     {910C130C-378D-422B-96B7-0484F7CC66B7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
    52     {910C130C-378D-422B-96B7-0484F7CC66B7}.Debug|Any CPU.Build.0 = Debug|Any CPU
    53     {910C130C-378D-422B-96B7-0484F7CC66B7}.Debug|x64.ActiveCfg = Debug|Any CPU
    54     {910C130C-378D-422B-96B7-0484F7CC66B7}.Debug|x86.ActiveCfg = Debug|Any CPU
    55     {910C130C-378D-422B-96B7-0484F7CC66B7}.Release|Any CPU.ActiveCfg = Release|Any CPU
    56     {910C130C-378D-422B-96B7-0484F7CC66B7}.Release|Any CPU.Build.0 = Release|Any CPU
    57     {910C130C-378D-422B-96B7-0484F7CC66B7}.Release|x64.ActiveCfg = Release|Any CPU
    58     {910C130C-378D-422B-96B7-0484F7CC66B7}.Release|x86.ActiveCfg = Release|Any CPU
    5959  EndGlobalSection
    6060  GlobalSection(SolutionProperties) = preSolution
Note: See TracChangeset for help on using the changeset viewer.