Changeset 14271


Ignore:
Timestamp:
09/02/16 13:40:59 (13 months ago)
Author:
gkronber
Message:

#2660: slightly refactored VariableNetwork instance and added calculation of variable relevance

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/VariableNetworks/VariableNetwork.cs

    r14260 r14271  
    2222using System;
    2323using System.Collections.Generic;
     24using System.Globalization;
    2425using System.Linq;
    2526using HeuristicLab.Common;
     
    6061        .Select(i => string.Format("X{0:000}", i))
    6162        .ToArray();
     63
     64      variableRelevances = new Dictionary<string, IEnumerable<KeyValuePair<string, double>>>();
    6265    }
    6366
     
    8386    protected override int TestPartitionEnd { get { return nTrainingSamples + nTestSamples; } }
    8487
     88    private Dictionary<string, IEnumerable<KeyValuePair<string, double>>> variableRelevances;
     89    public IEnumerable<KeyValuePair<string, double>> GetVariableRelevance(string targetVar) {
     90      return variableRelevances[targetVar];
     91    }
    8592
    8693    protected override List<List<double>> GenerateValues() {
     
    94101      List<string> description = new List<string>(); // store information how the variable is actually produced
    95102      List<string[]> inputVarNames = new List<string[]>(); // store information to produce graphviz file
     103      List<double[]> relevances = new List<double[]>(); // stores variable relevance information (same order as given in inputVarNames)
    96104
    97105      var nrand = new NormalDistributedRandom(random, 0, 1);
    98106      for (int c = 0; c < numLvl0; c++) {
    99         var datai = Enumerable.Range(0, TestPartitionEnd).Select(_ => nrand.NextDouble()).ToList();
    100107        inputVarNames.Add(new string[] { });
     108        relevances.Add(new double[] { });
    101109        description.Add(" ~ N(0, 1)");
    102         lvl0.Add(datai);
     110        lvl0.Add(Enumerable.Range(0, TestPartitionEnd).Select(_ => nrand.NextDouble()).ToList());
    103111      }
    104112
    105113      // lvl1 contains variables which are functions of vars in lvl0 (+ noise)
    106       List<List<double>> lvl1 = new List<List<double>>();
    107114      int numLvl1 = (int)Math.Ceiling(numberOfFeatures * 0.33);
    108       for (int c = 0; c < numLvl1; c++) {
    109         string[] selectedVarNames;
    110         var x = GenerateRandomFunction(random, lvl0, out selectedVarNames);
    111         var sigma = x.StandardDeviation();
    112         var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0 - noiseRatio)));
    113         lvl1.Add(x.Select(t => t + noisePrng.NextDouble()).ToList());
    114         Array.Sort(selectedVarNames);
    115         inputVarNames.Add(selectedVarNames);
    116         var desc = string.Format("f({0})", string.Join(",", selectedVarNames));
    117         description.Add(string.Format(" ~ N({0}, {1:N3})", desc, noisePrng.Sigma));
    118       }
     115      List<List<double>> lvl1 = CreateVariables(lvl0, numLvl1, inputVarNames, description, relevances);
    119116
    120117      // lvl2 contains variables which are functions of vars in lvl0 and lvl1 (+ noise)
    121       List<List<double>> lvl2 = new List<List<double>>();
    122118      int numLvl2 = (int)Math.Ceiling(numberOfFeatures * 0.2);
    123       for (int c = 0; c < numLvl2; c++) {
    124         string[] selectedVarNames;
    125         var x = GenerateRandomFunction(random, lvl0.Concat(lvl1).ToList(), out selectedVarNames);
    126         var sigma = x.StandardDeviation();
    127         var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0 - noiseRatio)));
    128         lvl2.Add(x.Select(t => t + noisePrng.NextDouble()).ToList());
    129         Array.Sort(selectedVarNames);
    130         inputVarNames.Add(selectedVarNames);
    131         var desc = string.Format("f({0})", string.Join(",", selectedVarNames));
    132         description.Add(string.Format(" ~ N({0}, {1:N3})", desc, noisePrng.Sigma));
    133       }
     119      List<List<double>> lvl2 = CreateVariables(lvl0.Concat(lvl1).ToList(), numLvl2, inputVarNames, description, relevances);
    134120
    135121      // lvl3 contains variables which are functions of vars in lvl0, lvl1 and lvl2 (+ noise)
    136       List<List<double>> lvl3 = new List<List<double>>();
    137122      int numLvl3 = numberOfFeatures - numLvl0 - numLvl1 - numLvl2;
    138       for (int c = 0; c < numLvl3; c++) {
    139         string[] selectedVarNames;
    140         var x = GenerateRandomFunction(random, lvl0.Concat(lvl1).Concat(lvl2).ToList(), out selectedVarNames);
    141         var sigma = x.StandardDeviation();
    142         var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0 - noiseRatio)));
    143         lvl3.Add(x.Select(t => t + noisePrng.NextDouble()).ToList());
    144         Array.Sort(selectedVarNames);
    145         inputVarNames.Add(selectedVarNames);
    146         var desc = string.Format("f({0})", string.Join(",", selectedVarNames));
    147         description.Add(string.Format(" ~ N({0}, {1:N3})", desc, noisePrng.Sigma));
    148       }
     123      List<List<double>> lvl3 = CreateVariables(lvl0.Concat(lvl1).Concat(lvl2).ToList(), numLvl3, inputVarNames, description, relevances);
     124
     125      this.variableRelevances.Clear();
     126      for (int i = 0; i < variableNames.Length; i++) {
     127        var targetVarName = variableNames[i];
     128        var targetRelevantInputs =
     129          inputVarNames[i].Zip(relevances[i], (inputVar, rel) => new KeyValuePair<string, double>(inputVar, rel))
     130            .ToArray();
     131        variableRelevances.Add(targetVarName, targetRelevantInputs);
     132      }
     133
    149134      networkDefinition = string.Join(Environment.NewLine, variableNames.Zip(description, (n, d) => n + d).OrderBy(x => x));
    150135      // for graphviz
    151136      networkDefinition += Environment.NewLine + "digraph G {";
    152       foreach (var t in variableNames.Zip(inputVarNames, Tuple.Create).OrderBy(t => t.Item1)) {
    153         var name = t.Item1;
    154         var selectedVarNames = t.Item2;
    155         foreach (var selectedVarName in selectedVarNames) {
    156           networkDefinition += Environment.NewLine + selectedVarName + " -> " + name;
     137      for (int i = 0; i < variableNames.Length; i++) {
     138        var name = variableNames[i];
     139        var selectedVarNames = inputVarNames[i];
     140        var selectedRelevances = relevances[i];
     141        for (int j = 0; j < selectedVarNames.Length; j++) {
     142          var selectedVarName = selectedVarNames[j];
     143          var selectedRelevance = selectedRelevances[j];
     144          networkDefinition += Environment.NewLine + selectedVarName + " -> " + name +
     145            string.Format(CultureInfo.InvariantCulture, " [label={0:N3}]", selectedRelevance);
    157146        }
    158147      }
    159148      networkDefinition += Environment.NewLine + "}";
    160149
    161       // return a random permutation of all variables
     150      // return a random permutation of all variables (to mix lvl0, lvl1, ... variables)
    162151      var allVars = lvl0.Concat(lvl1).Concat(lvl2).Concat(lvl3).ToList();
    163152      var orderedVars = allVars.Zip(variableNames, Tuple.Create).OrderBy(t => t.Item2).Select(t => t.Item1).ToList();
     
    166155    }
    167156
     157    private List<List<double>> CreateVariables(List<List<double>> allowedInputs, int numVars, List<string[]> inputVarNames, List<string> description, List<double[]> relevances) {
     158      var res = new List<List<double>>();
     159      for (int c = 0; c < numVars; c++) {
     160        string[] selectedVarNames;
     161        double[] relevance;
     162        var x = GenerateRandomFunction(random, allowedInputs, out selectedVarNames, out relevance);
     163        var sigma = x.StandardDeviation();
     164        var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0 - noiseRatio)));
     165        res.Add(x.Select(t => t + noisePrng.NextDouble()).ToList());
     166        Array.Sort(selectedVarNames, relevance);
     167        inputVarNames.Add(selectedVarNames);
     168        relevances.Add(relevance);
     169        var desc = string.Format("f({0})", string.Join(",", selectedVarNames));
     170        // for the relevance information order variables by decreasing relevance
     171        var relevanceStr = string.Join(", ",
     172          selectedVarNames.Zip(relevance, Tuple.Create)
     173          .OrderByDescending(t => t.Item2)
     174          .Select(t => string.Format(CultureInfo.InvariantCulture, "{0}: {1:N3}", t.Item1, t.Item2)));
     175        description.Add(string.Format(" ~ N({0}, {1:N3}) [Relevances: {2}]", desc, noisePrng.Sigma, relevanceStr));
     176      }
     177      return res;
     178    }
     179
    168180    // sample the input variables that are actually used and sample from a Gaussian process
    169     private IEnumerable<double> GenerateRandomFunction(IRandom rand, List<List<double>> xs, out string[] selectedVarNames) {
     181    private IEnumerable<double> GenerateRandomFunction(IRandom rand, List<List<double>> xs, out string[] selectedVarNames, out double[] relevance) {
    170182      double r = -Math.Log(1.0 - rand.NextDouble()) * 2.0; // r is exponentially distributed with lambda = 2
    171183      int nl = (int)Math.Floor(1.5 + r); // number of selected vars is likely to be between three and four
     
    177189      var selectedVars = selectedIdx.Select(i => xs[i]).ToArray();
    178190      selectedVarNames = selectedIdx.Select(i => VariableNames[i]).ToArray();
    179       return SampleGaussianProcess(random, selectedVars);
    180     }
    181 
    182     private IEnumerable<double> SampleGaussianProcess(IRandom random, List<double>[] xs) {
     191      return SampleGaussianProcess(random, selectedVars, out relevance);
     192    }
     193
     194    private IEnumerable<double> SampleGaussianProcess(IRandom random, List<double>[] xs, out double[] relevance) {
    183195      int nl = xs.Length;
    184196      int nRows = xs.First().Count;
     
    217229      alglib.ablas.rmatrixmv(nRows, nRows, K, 0, 0, 0, u, 0, ref y, 0);
    218230
     231      // calculate variable relevance
     232      // as per Rasmussen and Williams "Gaussian Processes for Machine Learning" page 106:
     233      // ,,For the squared exponential covariance function [...] the l1, ..., lD hyperparameters
     234      // play the role of characteristic length scales [...]. Such a covariance function implements
     235      // automatic relevance determination (ARD) [Neal, 1996], since the inverse of the length-scale
     236      // determines how relevant an input is: if the length-scale has a very large value, the covariance
     237      // will become almost independent of that input, effectively removing it from inference.''
     238      relevance = l.Select(li => 1.0 / li).ToArray();
     239
    219240      return y;
    220241    }
Note: See TracChangeset for help on using the changeset viewer.