Free cookie consent management tool by TermsFeed Policy Generator

source: branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/PCA/PrincipleComponentAnalysisStatic.cs @ 15430

Last change on this file since 15430 was 15430, checked in by bwerth, 7 years ago

#2847 first implementation of M5'-regression

File size: 6.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2017 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Common;
25using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
26using HeuristicLab.Problems.DataAnalysis;
27
28namespace HeuristicLab.Algorithms.DataAnalysis {
29  [StorableClass]
30  public class PrincipleComponentAnalysisStatic : IDeepCloneable {
31    #region Properties
32    [Storable]
33    private double[,] Matrix { get; set; }
34    [Storable]
35    public double[] Variances { get; private set; }
36    [Storable]
37    public string[] Variables { get; private set; }
38    [Storable]
39    private double[] Deviations { get; set; }
40    [Storable]
41    private double[] Means { get; set; }
42    public string[] Names {
43      get { return Variables.Select((_, x) => "pc" + x).ToArray(); }
44    }
45    #endregion
46
47    #region HLConstructors
48    [StorableConstructor]
49    protected PrincipleComponentAnalysisStatic(bool deserializing) { }
50    protected PrincipleComponentAnalysisStatic(PrincipleComponentAnalysisStatic original, Cloner cloner) {
51      if (original.Variances != null) Variances = original.Variances.ToArray();
52      if (original.Variables != null) Variables = original.Variables.ToArray();
53      if (original.Matrix == null) return;
54      Matrix = new double[original.Matrix.GetLength(0), original.Matrix.GetLength(1)];
55      for (var i = 0; i < original.Matrix.GetLength(0); i++)
56      for (var j = 0; j < original.Matrix.GetLength(1); j++)
57        Matrix[i, j] = original.Matrix[i, j];
58    }
59    private PrincipleComponentAnalysisStatic() { }
60    public IDeepCloneable Clone(Cloner cloner) {
61      return new PrincipleComponentAnalysisStatic(this, cloner);
62    }
63    public object Clone() {
64      return new Cloner().Clone(this);
65    }
66    #endregion
67
68    #region Static Interface
69    public static PrincipleComponentAnalysisStatic Create(IDataset dataset, IEnumerable<int> rows, IEnumerable<string> variables, bool normalize = false) {
70      var res = new PrincipleComponentAnalysisStatic();
71      res.BuildPca(dataset, rows, variables, normalize);
72      return res;
73    }
74    #endregion
75
76    public IRegressionProblemData ProjectProblem(IRegressionProblemData pd) {
77      var projected = ProjectData(pd.Dataset, pd.AllIndices);
78      return CreateProblemData(pd, projected);
79    }
80    public IDataset ProjectDataset(IDataset data) {
81      var projected = ProjectData(data, Enumerable.Range(0, data.Rows));
82      return CreateDataset(data, projected);
83    }
84    public double[,] ProjectData(IDataset dataset, IEnumerable<int> rows) {
85      var instances = rows.ToArray();
86      var result = new double[instances.Length, Variables.Length];
87      for (var r = 0; r < instances.Length; r++)
88      for (var i = 0; i < Variables.Length; i++) {
89        var val = (dataset.GetDoubleValue(Variables[i], instances[r]) - Means[i]) / Deviations[i];
90        for (var j = 0; j < Variables.Length; j++)
91          result[r, j] += val * Matrix[i, j];
92      }
93      return result;
94    }
95    public double[,] ReverseProjection(IDataset dataset, IEnumerable<int> rows, IEnumerable<string> pcaComponents) {
96      var instances = rows.ToArray();
97      var components = pcaComponents.ToArray();
98
99      var result = new double[instances.Length, Variables.Length];
100      for (var r = 0; r < instances.Length; r++)
101      for (var i = 0; i < components.Length; i++) {
102        var val = dataset.GetDoubleValue(components[i], instances[r]);
103        for (var j = 0; j < Variables.Length; j++)
104          result[r, j] += (val + Means[j]) * Deviations[j] * Matrix[j, i];
105      }
106      return result;
107    }
108
109    #region Helpers
110    private IRegressionProblemData CreateProblemData(IRegressionProblemData pd, double[,] pcs) {
111      var data = CreateDataset(pd.Dataset, pcs);
112      var res = new RegressionProblemData(data, Names, pd.TargetVariable);
113      res.TestPartition.Start = pd.TestPartition.Start;
114      res.TestPartition.End = pd.TestPartition.End;
115      res.TrainingPartition.Start = pd.TrainingPartition.Start;
116      res.TrainingPartition.End = pd.TrainingPartition.End;
117      return res;
118    }
119    private IDataset CreateDataset(IDataset data, double[,] pcs) {
120      var n = Names;
121      var n2 = data.DoubleVariables.Where(x => !Variables.Contains(x)).ToArray();
122      return new Dataset(n.Concat(n2),
123        n.Select((_, x) => Enumerable.Range(0, pcs.GetLength(0)).Select(r => pcs[r, x]).ToList())
124          .Concat(n2.Select(x => data.GetDoubleValues(x).ToList())));
125    }
126    private void BuildPca(IDataset dataset, IEnumerable<int> rows, IEnumerable<string> variables, bool normalize) {
127      var instances = rows.ToArray();
128      var attributes = variables.ToArray();
129      Means = normalize
130        ? attributes.Select(v => dataset.GetDoubleValues(v, instances).Average()).ToArray()
131        : attributes.Select(x => 0.0).ToArray();
132      Deviations = normalize
133        ? attributes.Select(v => dataset.GetDoubleValues(v, instances).StandardDeviationPop()).Select(x => x.IsAlmost(0.0) ? 1 : x).ToArray()
134        : attributes.Select(x => 1.0).ToArray();
135
136      var data = new double[instances.Length, attributes.Length];
137
138      for (var j = 0; j < attributes.Length; j++) {
139        var i = 0;
140        foreach (var v in dataset.GetDoubleValues(attributes[j], instances)) {
141          data[i, j] = (v - Means[j]) / Deviations[j];
142          i++;
143        }
144      }
145
146      int info;
147      double[] variances;
148      double[,] matrix;
149      alglib.pcabuildbasis(data, instances.Length, attributes.Length, out info, out variances, out matrix);
150      Matrix = matrix;
151      Variances = variances;
152      Variables = attributes;
153    }
154    #endregion
155  }
156}
Note: See TracBrowser for help on using the repository browser.