source: trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/VariableNetworks/GaussianProcessVariableNetwork.cs @ 14630

Last change on this file since 14630 was 14630, checked in by gkronber, 3 years ago

#2288: introduced base class for variable network instance description and implemented GRR and Linear variable networks as specific classes

File size: 5.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Core;
26using HeuristicLab.Problems.DataAnalysis;
27using HeuristicLab.Random;
28
29namespace HeuristicLab.Problems.Instances.DataAnalysis {
30  public sealed class GaussianProcessVariableNetwork : VariableNetwork {
31    private int numberOfFeatures;
32    private double noiseRatio;
33
34    public override string Name { get { return string.Format("GaussianProcessVariableNetwork-{0:0%} ({1} dim)", noiseRatio, numberOfFeatures); } }
35
36    public GaussianProcessVariableNetwork(int numberOfFeatures, double noiseRatio,
37      IRandom rand)
38      : base(250, 250, numberOfFeatures, noiseRatio, rand) {
39      this.noiseRatio = noiseRatio;
40      this.numberOfFeatures = numberOfFeatures;
41    }
42
43    // sample the input variables that are actually used and sample from a Gaussian process
44    protected override IEnumerable<double> GenerateRandomFunction(IRandom rand, List<List<double>> xs, out string[] selectedVarNames, out double[] relevance) {
45      int nl = SampleNumberOfVariables(rand, xs.Count);
46
47      var selectedIdx = Enumerable.Range(0, xs.Count).Shuffle(rand)
48        .Take(nl).ToArray();
49
50      var selectedVars = selectedIdx.Select(i => xs[i]).ToArray();
51      selectedVarNames = selectedIdx.Select(i => VariableNames[i]).ToArray();
52      return SampleGaussianProcess(rand, selectedVars, out relevance);
53    }
54
55    private IEnumerable<double> SampleGaussianProcess(IRandom rand, List<double>[] xs, out double[] relevance) {
56      int nl = xs.Length;
57      int nRows = xs.First().Count;
58
59      // sample u iid ~ N(0, 1)
60      var u = Enumerable.Range(0, nRows).Select(_ => NormalDistributedRandom.NextDouble(rand, 0, 1)).ToArray();
61
62      // sample actual length-scales
63      var l = Enumerable.Range(0, nl)
64        .Select(_ => rand.NextDouble() * 2 + 0.5)
65        .ToArray();
66
67      double[,] K = CalculateCovariance(xs, l);
68
69      // decompose
70      alglib.trfac.spdmatrixcholesky(ref K, nRows, false);
71
72
73      // calc y = Lu
74      var y = new double[u.Length];
75      alglib.ablas.rmatrixmv(nRows, nRows, K, 0, 0, 0, u, 0, ref y, 0);
76
77      // calculate relevance by removing dimensions
78      relevance = CalculateRelevance(y, u, xs, l);
79
80      return y;
81    }
82
83    // calculate variable relevance based on removal of variables
84    //  1) to remove a variable we set it's length scale to infinity (no relation of the variable value to the target)
85    //  2) calculate MSE of the original target values (y) to the updated targes y' (after variable removal)
86    //  3) relevance is larger if MSE(y,y') is large
87    //  4) scale impacts so that the most important variable has impact = 1
88    private double[] CalculateRelevance(double[] y, double[] u, List<double>[] xs, double[] l) {
89      int nRows = xs.First().Count;
90      var changedL = new double[l.Length];
91      var relevance = new double[l.Length];
92      for(int i = 0; i < l.Length; i++) {
93        Array.Copy(l, changedL, changedL.Length);
94        changedL[i] = double.MaxValue;
95        var changedK = CalculateCovariance(xs, changedL);
96
97        var yChanged = new double[u.Length];
98        alglib.ablas.rmatrixmv(nRows, nRows, changedK, 0, 0, 0, u, 0, ref yChanged, 0);
99
100        OnlineCalculatorError error;
101        var mse = OnlineMeanSquaredErrorCalculator.Calculate(y, yChanged, out error);
102        if(error != OnlineCalculatorError.None) mse = double.MaxValue;
103        relevance[i] = mse;
104      }
105      // scale so that max relevance is 1.0
106      var maxRel = relevance.Max();
107      for(int i = 0; i < relevance.Length; i++) relevance[i] /= maxRel;
108      return relevance;
109    }
110
111    private double[,] CalculateCovariance(List<double>[] xs, double[] l) {
112      int nRows = xs.First().Count;
113      double[,] K = new double[nRows, nRows];
114      for(int r = 0; r < nRows; r++) {
115        double[] xi = xs.Select(x => x[r]).ToArray();
116        for(int c = 0; c <= r; c++) {
117          double[] xj = xs.Select(x => x[c]).ToArray();
118          double dSqr = xi.Zip(xj, (xik, xjk) => (xik - xjk))
119            .Select(dk => dk * dk)
120            .Zip(l, (dk, lk) => dk / lk)
121            .Sum();
122          K[r, c] = Math.Exp(-dSqr);
123        }
124      }
125      // add a small diagonal matrix for numeric stability
126      for(int i = 0; i < nRows; i++) {
127        K[i, i] += 1.0E-7;
128      }
129
130      return K;
131    }
132  }
133}
Note: See TracBrowser for help on using the repository browser.