1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Collections.Generic;


24  using System.Linq;


25  using HeuristicLab.Core;


26  using HeuristicLab.Problems.DataAnalysis;


27  using HeuristicLab.Random;


28 


29  namespace HeuristicLab.Problems.Instances.DataAnalysis {


30  public sealed class GaussianProcessVariableNetwork : VariableNetwork {


31  private int numberOfFeatures;


32  private double noiseRatio;


33 


34  public override string Name { get { return string.Format("GaussianProcessVariableNetwork{0:0%} ({1} dim)", noiseRatio, numberOfFeatures); } }


35 


36  public GaussianProcessVariableNetwork(int numberOfFeatures, double noiseRatio,


37  IRandom rand)


38  : base(250, 250, numberOfFeatures, noiseRatio, rand) {


39  this.noiseRatio = noiseRatio;


40  this.numberOfFeatures = numberOfFeatures;


41  }


42 


43  // sample the input variables that are actually used and sample from a Gaussian process


44  protected override IEnumerable<double> GenerateRandomFunction(IRandom rand, List<List<double>> xs, out string[] selectedVarNames, out double[] relevance) {


45  int nl = SampleNumberOfVariables(rand, xs.Count);


46 


47  var selectedIdx = Enumerable.Range(0, xs.Count).Shuffle(rand)


48  .Take(nl).ToArray();


49 


50  var selectedVars = selectedIdx.Select(i => xs[i]).ToArray();


51  selectedVarNames = selectedIdx.Select(i => VariableNames[i]).ToArray();


52  return SampleGaussianProcess(rand, selectedVars, out relevance);


53  }


54 


55  private IEnumerable<double> SampleGaussianProcess(IRandom rand, List<double>[] xs, out double[] relevance) {


56  int nl = xs.Length;


57  int nRows = xs.First().Count;


58 


59  // sample u iid ~ N(0, 1)


60  var u = Enumerable.Range(0, nRows).Select(_ => NormalDistributedRandom.NextDouble(rand, 0, 1)).ToArray();


61 


62  // sample actual lengthscales


63  var l = Enumerable.Range(0, nl)


64  .Select(_ => rand.NextDouble() * 2 + 0.5)


65  .ToArray();


66 


67  double[,] K = CalculateCovariance(xs, l);


68 


69  // decompose


70  alglib.trfac.spdmatrixcholesky(ref K, nRows, false);


71 


72 


73  // calc y = Lu


74  var y = new double[u.Length];


75  alglib.ablas.rmatrixmv(nRows, nRows, K, 0, 0, 0, u, 0, ref y, 0);


76 


77  // calculate relevance by removing dimensions


78  relevance = CalculateRelevance(y, u, xs, l);


79 


80  return y;


81  }


82 


83  // calculate variable relevance based on removal of variables


84  // 1) to remove a variable we set it's length scale to infinity (no relation of the variable value to the target)


85  // 2) calculate MSE of the original target values (y) to the updated targes y' (after variable removal)


86  // 3) relevance is larger if MSE(y,y') is large


87  // 4) scale impacts so that the most important variable has impact = 1


88  private double[] CalculateRelevance(double[] y, double[] u, List<double>[] xs, double[] l) {


89  int nRows = xs.First().Count;


90  var changedL = new double[l.Length];


91  var relevance = new double[l.Length];


92  for(int i = 0; i < l.Length; i++) {


93  Array.Copy(l, changedL, changedL.Length);


94  changedL[i] = double.MaxValue;


95  var changedK = CalculateCovariance(xs, changedL);


96 


97  var yChanged = new double[u.Length];


98  alglib.ablas.rmatrixmv(nRows, nRows, changedK, 0, 0, 0, u, 0, ref yChanged, 0);


99 


100  OnlineCalculatorError error;


101  var mse = OnlineMeanSquaredErrorCalculator.Calculate(y, yChanged, out error);


102  if(error != OnlineCalculatorError.None) mse = double.MaxValue;


103  relevance[i] = mse;


104  }


105  // scale so that max relevance is 1.0


106  var maxRel = relevance.Max();


107  for(int i = 0; i < relevance.Length; i++) relevance[i] /= maxRel;


108  return relevance;


109  }


110 


111  private double[,] CalculateCovariance(List<double>[] xs, double[] l) {


112  int nRows = xs.First().Count;


113  double[,] K = new double[nRows, nRows];


114  for(int r = 0; r < nRows; r++) {


115  double[] xi = xs.Select(x => x[r]).ToArray();


116  for(int c = 0; c <= r; c++) {


117  double[] xj = xs.Select(x => x[c]).ToArray();


118  double dSqr = xi.Zip(xj, (xik, xjk) => (xik  xjk))


119  .Select(dk => dk * dk)


120  .Zip(l, (dk, lk) => dk / lk)


121  .Sum();


122  K[r, c] = Math.Exp(dSqr);


123  }


124  }


125  // add a small diagonal matrix for numeric stability


126  for(int i = 0; i < nRows; i++) {


127  K[i, i] += 1.0E7;


128  }


129 


130  return K;


131  }


132  }


133  }

