1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Collections.Generic;


24  using System.Linq;


25  using HeuristicLab.Common;


26  using HeuristicLab.Core;


27  using HeuristicLab.Random;


28 


29  namespace HeuristicLab.Problems.Instances.DataAnalysis {


30  public class VariableNetwork : ArtificialRegressionDataDescriptor {


31  private int nTrainingSamples;


32  private int nTestSamples;


33 


34  private int numberOfFeatures;


35  private double noiseRatio;


36  private IRandom random;


37 


38  public override string Name { get { return string.Format("VariableNetwork{0:0%} ({1} dim)", noiseRatio, numberOfFeatures); } }


39  private string networkDefinition;


40  public string NetworkDefinition { get { return networkDefinition; } }


41  public override string Description {


42  get {


43  return "The data are generated specifically to test methods for variable network analysis.";


44  }


45  }


46 


47  public VariableNetwork(int numberOfFeatures, double noiseRatio,


48  IRandom rand)


49  : this(250, 250, numberOfFeatures, noiseRatio, rand) { }


50 


51  public VariableNetwork(int nTrainingSamples, int nTestSamples,


52  int numberOfFeatures, double noiseRatio, IRandom rand) {


53  this.nTrainingSamples = nTrainingSamples;


54  this.nTestSamples = nTestSamples;


55  this.noiseRatio = noiseRatio;


56  this.random = rand;


57  this.numberOfFeatures = numberOfFeatures;


58  // default variable names


59  variableNames = Enumerable.Range(1, numberOfFeatures)


60  .Select(i => string.Format("X{0:000}", i))


61  .ToArray();


62  }


63 


64  private string[] variableNames;


65  protected override string[] VariableNames {


66  get {


67  return variableNames;


68  }


69  }


70 


71  // there is no specific target variable in variable network analysis but we still need to specify one


72  protected override string TargetVariable { get { return VariableNames.Last(); } }


73 


74  protected override string[] AllowedInputVariables {


75  get {


76  return VariableNames.Take(numberOfFeatures  1).ToArray();


77  }


78  }


79 


80  protected override int TrainingPartitionStart { get { return 0; } }


81  protected override int TrainingPartitionEnd { get { return nTrainingSamples; } }


82  protected override int TestPartitionStart { get { return nTrainingSamples; } }


83  protected override int TestPartitionEnd { get { return nTrainingSamples + nTestSamples; } }


84 


85 


86  protected override List<List<double>> GenerateValues() {


87  // variable names are shuffled in the beginning (and sorted at the end)


88  variableNames = variableNames.Shuffle(random).ToArray();


89 


90  // a third of all variables are independent vars


91  List<List<double>> lvl0 = new List<List<double>>();


92  int numLvl0 = (int)Math.Ceiling(numberOfFeatures * 0.33);


93 


94  List<string> description = new List<string>(); // store information how the variable is actually produced


95  List<string[]> inputVarNames = new List<string[]>(); // store information to produce graphviz file


96 


97  var nrand = new NormalDistributedRandom(random, 0, 1);


98  for (int c = 0; c < numLvl0; c++) {


99  var datai = Enumerable.Range(0, TestPartitionEnd).Select(_ => nrand.NextDouble()).ToList();


100  inputVarNames.Add(new string[] { });


101  description.Add("~ N(0, 1)");


102  lvl0.Add(datai);


103  }


104 


105  // lvl1 contains variables which are functions of vars in lvl0 (+ noise)


106  List<List<double>> lvl1 = new List<List<double>>();


107  int numLvl1 = (int)Math.Ceiling(numberOfFeatures * 0.33);


108  for (int c = 0; c < numLvl1; c++) {


109  string[] selectedVarNames;


110  var x = GenerateRandomFunction(random, lvl0, out selectedVarNames);


111  var sigma = x.StandardDeviation();


112  var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0  noiseRatio)));


113  lvl1.Add(x.Select(t => t + noisePrng.NextDouble()).ToList());


114 


115  inputVarNames.Add(selectedVarNames);


116  var desc = string.Format("f({0})", string.Join(",", selectedVarNames));


117  description.Add(string.Format(" ~ N({0}, {1:N3})", desc, noisePrng.Sigma));


118  }


119 


120  // lvl2 contains variables which are functions of vars in lvl0 and lvl1 (+ noise)


121  List<List<double>> lvl2 = new List<List<double>>();


122  int numLvl2 = (int)Math.Ceiling(numberOfFeatures * 0.2);


123  for (int c = 0; c < numLvl2; c++) {


124  string[] selectedVarNames;


125  var x = GenerateRandomFunction(random, lvl0.Concat(lvl1).ToList(), out selectedVarNames);


126  var sigma = x.StandardDeviation();


127  var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0  noiseRatio)));


128  lvl2.Add(x.Select(t => t + noisePrng.NextDouble()).ToList());


129 


130  inputVarNames.Add(selectedVarNames);


131  var desc = string.Format("f({0})", string.Join(",", selectedVarNames));


132  description.Add(string.Format(" ~ N({0}, {1:N3})", desc, noisePrng.Sigma));


133  }


134 


135  // lvl3 contains variables which are functions of vars in lvl0, lvl1 and lvl2 (+ noise)


136  List<List<double>> lvl3 = new List<List<double>>();


137  int numLvl3 = numberOfFeatures  numLvl0  numLvl1  numLvl2;


138  for (int c = 0; c < numLvl3; c++) {


139  string[] selectedVarNames;


140  var x = GenerateRandomFunction(random, lvl0.Concat(lvl1).Concat(lvl2).ToList(), out selectedVarNames);


141  var sigma = x.StandardDeviation();


142  var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0  noiseRatio)));


143  lvl3.Add(x.Select(t => t + noisePrng.NextDouble()).ToList());


144 


145  inputVarNames.Add(selectedVarNames);


146  var desc = string.Format("f({0})", string.Join(",", selectedVarNames));


147  description.Add(string.Format(" ~ N({0}, {1:N3})", desc, noisePrng.Sigma));


148  }


149 


150  networkDefinition = string.Join(Environment.NewLine, variableNames.Zip(description, (n, d) => n + d));


151  // for graphviz


152  networkDefinition += Environment.NewLine + "digraph G {";


153  foreach (var t in variableNames.Zip(inputVarNames, Tuple.Create).OrderBy(t => t.Item1)) {


154  var name = t.Item1;


155  var selectedVarNames = t.Item2;


156  foreach (var selectedVarName in selectedVarNames) {


157  networkDefinition += Environment.NewLine + selectedVarName + " > " + name;


158  }


159  }


160  networkDefinition += Environment.NewLine + "}";


161 


162  // return a random permutation of all variables


163  var allVars = lvl0.Concat(lvl1).Concat(lvl2).Concat(lvl3).ToList();


164  var orderedVars = allVars.Zip(variableNames, Tuple.Create).OrderBy(t => t.Item2).Select(t => t.Item1).ToList();


165  variableNames = variableNames.OrderBy(n => n).ToArray();


166  return orderedVars;


167  }


168 


169  // sample the input variables that are actually used and sample from a Gaussian process


170  private IEnumerable<double> GenerateRandomFunction(IRandom rand, List<List<double>> xs, out string[] selectedVarNames) {


171  double r = Math.Log(1.0  rand.NextDouble()) * 2.0; // r is exponentially distributed with lambda = 2


172  int nl = (int)Math.Floor(1.5 + r); // number of selected vars is likely to be between three and four


173  if (nl > xs.Count) nl = xs.Count; // limit max


174 


175  var selectedIdx = Enumerable.Range(0, xs.Count).Shuffle(random)


176  .Take(nl).ToArray();


177 


178  var selectedVars = selectedIdx.Select(i => xs[i]).ToArray();


179  selectedVarNames = selectedIdx.Select(i => VariableNames[i]).ToArray();


180  return SampleGaussianProcess(random, selectedVars);


181  }


182 


183  private IEnumerable<double> SampleGaussianProcess(IRandom random, List<double>[] xs) {


184  int nl = xs.Length;


185  int nRows = xs.First().Count;


186  double[,] K = new double[nRows, nRows];


187 


188  // sample lengthscales


189  var l = Enumerable.Range(0, nl)


190  .Select(_ => random.NextDouble() * 2 + 0.5)


191  .ToArray();


192  // calculate covariance matrix


193  for (int r = 0; r < nRows; r++) {


194  double[] xi = xs.Select(x => x[r]).ToArray();


195  for (int c = 0; c <= r; c++) {


196  double[] xj = xs.Select(x => x[c]).ToArray();


197  double dSqr = xi.Zip(xj, (xik, xjk) => (xik  xjk))


198  .Select(dk => dk * dk)


199  .Zip(l, (dk, lk) => dk / lk)


200  .Sum();


201  K[r, c] = Math.Exp(dSqr);


202  }


203  }


204 


205  // add a small diagonal matrix for numeric stability


206  for (int i = 0; i < nRows; i++) {


207  K[i, i] += 1.0E7;


208  }


209 


210  // decompose


211  alglib.trfac.spdmatrixcholesky(ref K, nRows, false);


212 


213  // sample u iid ~ N(0, 1)


214  var u = Enumerable.Range(0, nRows).Select(_ => NormalDistributedRandom.NextDouble(random, 0, 1)).ToArray();


215 


216  // calc y = Lu


217  var y = new double[u.Length];


218  alglib.ablas.rmatrixmv(nRows, nRows, K, 0, 0, 0, u, 0, ref y, 0);


219 


220  return y;


221  }


222  }


223  }

