Free cookie consent management tool by TermsFeed Policy Generator

source: branches/NCA/HeuristicLab.Algorithms.NCA/3.3/NeighborhoodComponentsAnalysis.cs @ 8425

Last change on this file since 8425 was 8425, checked in by abeham, 12 years ago

#1913: Added several initialization methods (LDA, PCA, and Random)

File size: 6.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Problems.DataAnalysis;
27
28namespace HeuristicLab.Algorithms.NCA {
29  public class NeighborhoodComponentsAnalysis {
30
31    public static INCAModel Train(IClassificationProblemData data, int k, int reduceDimensions, INCAInitializer initializer) {
32      var instances = data.TrainingIndices.Count();
33      var attributes = data.AllowedInputVariables.Count();
34
35      double[] matrix = initializer.Initialize(data, reduceDimensions);
36
37      alglib.mincgstate state;
38      alglib.mincgreport rep;
39
40      // first run
41      alglib.mincgcreate(matrix, out state);
42      alglib.mincgsetcond(state, 0.0000000001, 0, 0, 0);
43      alglib.mincgoptimize(state, Gradient, null, new OptimizationInfo(data, reduceDimensions));
44      alglib.mincgresults(state, out matrix, out rep);
45
46      var transformationMatrix = new double[attributes, reduceDimensions];
47      var counter = 0;
48      for (var i = 0; i < attributes; i++)
49        for (var j = 0; j < reduceDimensions; j++)
50          transformationMatrix[i, j] = matrix[counter++];
51
52      var transformedTrainingset = new double[instances, reduceDimensions];
53      var rowCount = 0;
54      foreach (var r in data.TrainingIndices) {
55        var i = 0;
56        foreach (var v in data.AllowedInputVariables) {
57          var val = data.Dataset.GetDoubleValue(v, r);
58          for (var j = 0; j < reduceDimensions; j++)
59            transformedTrainingset[rowCount, j] += val * transformationMatrix[i, j];
60          i++;
61        }
62        rowCount++;
63      }
64
65      return new NCAModel(transformedTrainingset, transformationMatrix, k, data.TargetVariable, data.AllowedInputVariables,
66        data.Dataset.GetDoubleValues(data.TargetVariable)
67          .Select((v, i) => new { I = i, V = v })
68          .Where(x => x.I >= data.TrainingPartition.Start && x.I < data.TrainingPartition.End
69              && !(x.I >= data.TestPartition.Start && x.I < data.TestPartition.End))
70          .Select(x => x.V).ToArray());
71    }
72
73    private static void Gradient(double[] A, ref double func, double[] grad, object obj) {
74      var info = (OptimizationInfo)obj;
75      var instances = info.ProblemData.TrainingIndices.ToArray();
76      var attributes = info.ProblemData.AllowedInputVariables.Count();
77      var AMatrix = new Matrix(A, A.Length / info.ReduceDimensions, info.ReduceDimensions);
78
79      alglib.sparsematrix probabilities;
80      alglib.sparsecreate(instances.Length, instances.Length, out probabilities);
81      var distances = new double[instances.Length];
82      for (int i = 0; i < instances.Length - 1; i++) {
83        var iVector = new Matrix(GetRow(info.ProblemData, instances[i]));
84        var denom = 0.0;
85        for (int k = 0; k < instances.Length; k++) {
86          if (k == i) continue;
87          var kVector = new Matrix(GetRow(info.ProblemData, instances[k]));
88          distances[k] = iVector.Multiply(AMatrix).Subtract(kVector.Multiply(AMatrix)).Length();
89          denom += Math.Exp(-(distances[k] * distances[k]));
90        }
91        if (denom > 0) {
92          for (int j = i + 1; j < instances.Length; j++) {
93            if (i == j) continue;
94            var v = Math.Exp(-(distances[j] * distances[j])) / denom;
95            alglib.sparseset(probabilities, i, j, v);
96            alglib.sparseset(probabilities, j, i, v);
97          }
98        }
99      }
100      alglib.sparseconverttocrs(probabilities); // needed to enumerate in order (top-down and left-right)
101
102      int t0 = 0, t1 = 0, r, c;
103      double val;
104      var classes = info.ProblemData.Dataset.GetDoubleValues(info.ProblemData.TargetVariable, instances).ToArray();
105      var pi = new double[instances.Length];
106      while (alglib.sparseenumerate(probabilities, ref t0, ref t1, out r, out c, out val)) {
107        if (classes[r].IsAlmost(classes[c]))
108          pi[r] += val;
109      }
110
111      var innerSum = new double[attributes, attributes];
112      while (alglib.sparseenumerate(probabilities, ref t0, ref t1, out r, out c, out val)) {
113        var vector = new Matrix(GetRow(info.ProblemData, instances[r])).Subtract(new Matrix(GetRow(info.ProblemData, instances[c]))).Apply();
114        vector.OuterProduct(vector).Multiply(val * pi[r]).AddTo(innerSum);
115
116        if (classes[r].IsAlmost(classes[c])) {
117          vector.OuterProduct(vector).Multiply(-val).AddTo(innerSum);
118        }
119      }
120
121      func = -pi.Sum();
122
123      grad = AMatrix.Multiply(-2.0).Transpose().Multiply(new Matrix(innerSum)).Transpose().ToArray();
124    }
125
126    private static IEnumerable<double> GetRow(IClassificationProblemData data, int row) {
127      return data.AllowedInputVariables.Select(v => data.Dataset.GetDoubleValue(v, row));
128    }
129
130    public static NCAClassificationSolution CreateNCASolution(IClassificationProblemData problemData, int k, int reduceDimensions, INCAInitializer initializer) {
131      return new NCAClassificationSolution(problemData, Train(problemData, k, reduceDimensions, initializer));
132    }
133
134    private class OptimizationInfo {
135      public IClassificationProblemData ProblemData { get; set; }
136      public int ReduceDimensions { get; set; }
137      public OptimizationInfo(IClassificationProblemData problem, int reduceDimensions) {
138        this.ProblemData = problem;
139        this.ReduceDimensions = reduceDimensions;
140      }
141    }
142  }
143}
Note: See TracBrowser for help on using the repository browser.