Free cookie consent management tool by TermsFeed Policy Generator

source: branches/NCA/HeuristicLab.Algorithms.NCA/3.3/NCAModel.cs @ 8464

Last change on this file since 8464 was 8454, checked in by abeham, 12 years ago

#1913:

  • Refactored NCAModel and NeighborhoodComponentsAnalysis algorithm
  • Model now includes NearestNeighborModel
  • Algorithm has ability to be canceled (basically recreated the optimization loop of mincgoptimize)
  • Scaling should work properly now
File size: 6.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Algorithms.DataAnalysis;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28using HeuristicLab.Problems.DataAnalysis;
29
30namespace HeuristicLab.Algorithms.NCA {
31  [Item("NCAModel", "")]
32  [StorableClass]
33  public class NCAModel : NamedItem, INCAModel {
34
35    [Storable]
36    private Scaling scaling;
37    [Storable]
38    private double[,] transformationMatrix;
39    public double[,] TransformationMatrix {
40      get { return (double[,])transformationMatrix.Clone(); }
41    }
42    [Storable]
43    private string[] allowedInputVariables;
44    [Storable]
45    private string targetVariable;
46    [Storable]
47    private INearestNeighbourModel nnModel;
48    [Storable]
49    private Dictionary<double, double> nn2ncaClassMapping;
50    [Storable]
51    private Dictionary<double, double> nca2nnClassMapping;
52
53    [StorableConstructor]
54    protected NCAModel(bool deserializing) : base(deserializing) { }
55    protected NCAModel(NCAModel original, Cloner cloner)
56      : base(original, cloner) {
57      this.scaling = cloner.Clone(original.scaling);
58      this.transformationMatrix = (double[,])original.transformationMatrix.Clone();
59      this.allowedInputVariables = (string[])original.allowedInputVariables.Clone();
60      this.targetVariable = original.targetVariable;
61      this.nnModel = cloner.Clone(original.nnModel);
62      this.nn2ncaClassMapping = original.nn2ncaClassMapping.ToDictionary(x => x.Key, y => y.Value);
63      this.nca2nnClassMapping = original.nca2nnClassMapping.ToDictionary(x => x.Key, y => y.Value);
64    }
65    public NCAModel(int k, double[,] scaledData, Scaling scaling, double[,] transformationMatrix, string targetVariable, IEnumerable<double> targetVector, IEnumerable<string> allowedInputVariables) {
66      Name = ItemName;
67      Description = ItemDescription;
68      this.scaling = scaling;
69      this.transformationMatrix = transformationMatrix;
70      this.allowedInputVariables = allowedInputVariables.ToArray();
71      this.targetVariable = targetVariable;
72
73      nca2nnClassMapping = targetVector.Distinct().OrderBy(x => x).Select((v, i) => new { Index = (double)i, Class = v }).ToDictionary(x => x.Class, y => y.Index);
74      nn2ncaClassMapping = nca2nnClassMapping.ToDictionary(x => x.Value, y => y.Key);
75
76      var transformedData = ReduceWithTarget(scaledData, targetVector.Select(x => nca2nnClassMapping[x]));
77
78      var kdtree = new alglib.nearestneighbor.kdtree();
79      alglib.nearestneighbor.kdtreebuild(transformedData, transformedData.GetLength(0), transformedData.GetLength(1) - 1, 1, 2, kdtree);
80
81      nnModel = new NearestNeighbourModel(kdtree, k, targetVariable,
82        Enumerable.Range(0, transformationMatrix.GetLength(1)).Select(x => x.ToString()),
83        nn2ncaClassMapping.Keys.ToArray());
84    }
85
86    public override IDeepCloneable Clone(Cloner cloner) {
87      return new NCAModel(this, cloner);
88    }
89
90    public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) {
91      var unknownClasses = dataset.GetDoubleValues(targetVariable, rows).Where(x => !nca2nnClassMapping.ContainsKey(x));
92      if (unknownClasses.Any())
93        foreach (var uc in unknownClasses) {
94          nca2nnClassMapping[uc] = nca2nnClassMapping.Count;
95          nn2ncaClassMapping[nca2nnClassMapping[uc]] = uc;
96        }
97      var transformedData = ReduceWithTarget(dataset, rows, dataset.GetDoubleValues(targetVariable, rows).Select(x => nca2nnClassMapping[x]));
98      var ds = new Dataset(Enumerable.Range(0, transformationMatrix.GetLength(1)).Select(x => x.ToString()).Concat(targetVariable.ToEnumerable()), transformedData);
99      return nnModel.GetEstimatedClassValues(ds, Enumerable.Range(0, ds.Rows)).Select(x => nn2ncaClassMapping[x]);
100    }
101
102    public NCAClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) {
103      return new NCAClassificationSolution(problemData, this);
104    }
105
106    IClassificationSolution IClassificationModel.CreateClassificationSolution(IClassificationProblemData problemData) {
107      return CreateClassificationSolution(problemData);
108    }
109
110    public double[,] Reduce(Dataset dataset, IEnumerable<int> rows) {
111      var scaledData = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, scaling);
112      return Reduce(scaledData);
113    }
114
115    private double[,] Reduce(double[,] scaledData) {
116      var result = new double[scaledData.GetLength(0), transformationMatrix.GetLength(1)];
117      for (int i = 0; i < scaledData.GetLength(0); i++)
118        for (int j = 0; j < scaledData.GetLength(1); j++)
119          for (int x = 0; x < transformationMatrix.GetLength(1); x++) {
120            result[i, x] += scaledData[i, j] * transformationMatrix[j, x];
121          }
122      return result;
123    }
124
125    private double[,] ReduceWithTarget(Dataset dataset, IEnumerable<int> rows, IEnumerable<double> targetValues) {
126      var scaledData = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, scaling);
127      return ReduceWithTarget(scaledData, targetValues);
128    }
129
130    private double[,] ReduceWithTarget(double[,] scaledData, IEnumerable<double> targetValues) {
131      var result = new double[scaledData.GetLength(0), transformationMatrix.GetLength(1) + 1];
132      for (int i = 0; i < scaledData.GetLength(0); i++)
133        for (int j = 0; j < scaledData.GetLength(1); j++)
134          for (int x = 0; x < transformationMatrix.GetLength(1); x++) {
135            result[i, x] += scaledData[i, j] * transformationMatrix[j, x];
136          }
137
138      int r = 0;
139      foreach (var d in targetValues) result[r++, transformationMatrix.GetLength(1)] = d;
140
141      return result;
142    }
143  }
144}
Note: See TracBrowser for help on using the repository browser.