Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
03/16/17 07:56:01 (7 years ago)
Author:
gkronber
Message:

#2650: merged r14597:14737 from trunk to branch

Location:
branches/symbreg-factors-2650
Files:
6 edited
2 copied

Legend:

Unmodified
Added
Removed
  • branches/symbreg-factors-2650

  • branches/symbreg-factors-2650/HeuristicLab.Problems.Instances.DataAnalysis

  • branches/symbreg-factors-2650/HeuristicLab.Problems.Instances.DataAnalysis/3.3/HeuristicLab.Problems.Instances.DataAnalysis-3.3.csproj

    r14229 r14751  
    157157    <Compile Include="Regression\FeatureSelection\FeatureSelection.cs" />
    158158    <Compile Include="Regression\FeatureSelection\FeatureSelectionInstanceProvider.cs" />
     159    <Compile Include="Regression\VariableNetworks\LinearVariableNetwork.cs" />
     160    <Compile Include="Regression\VariableNetworks\GaussianProcessVariableNetwork.cs" />
    159161    <Compile Include="Regression\VariableNetworks\VariableNetwork.cs" />
    160162    <Compile Include="Regression\VariableNetworks\VariableNetworkInstanceProvider.cs" />
  • branches/symbreg-factors-2650/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/Friedman/FriedmanRandomFunction.cs

    r14185 r14751  
    9494    }
    9595
    96     // as described in Greedy Function Approxination paper
     96    // as described in Greedy Function Approximation paper
    9797    private IEnumerable<double> GenerateRandomFunction(IRandom rand, List<List<double>> xs, int nTerms = 20) {
    9898      int nRows = xs.First().Count;
  • branches/symbreg-factors-2650/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/VariableNetworks/VariableNetwork.cs

    r14330 r14751  
    3030
    3131namespace HeuristicLab.Problems.Instances.DataAnalysis {
    32   public class VariableNetwork : ArtificialRegressionDataDescriptor {
     32  public abstract class VariableNetwork : ArtificialRegressionDataDescriptor {
    3333    private int nTrainingSamples;
    3434    private int nTestSamples;
     
    3838    private IRandom random;
    3939
    40     public override string Name { get { return string.Format("VariableNetwork-{0:0%} ({1} dim)", noiseRatio, numberOfFeatures); } }
    4140    private string networkDefinition;
    4241    public string NetworkDefinition { get { return networkDefinition; } }
     
    4746    }
    4847
    49     public VariableNetwork(int numberOfFeatures, double noiseRatio,
    50       IRandom rand)
    51       : this(250, 250, numberOfFeatures, noiseRatio, rand) { }
    52 
    53     public VariableNetwork(int nTrainingSamples, int nTestSamples,
     48    protected VariableNetwork(int nTrainingSamples, int nTestSamples,
    5449      int numberOfFeatures, double noiseRatio, IRandom rand) {
    5550      this.nTrainingSamples = nTrainingSamples;
     
    105100
    106101      var nrand = new NormalDistributedRandom(random, 0, 1);
    107       for (int c = 0; c < numLvl0; c++) {
     102      for(int c = 0; c < numLvl0; c++) {
    108103        inputVarNames.Add(new string[] { });
    109104        relevances.Add(new double[] { });
    110         description.Add(" ~ N(0, 1)");
    111         lvl0.Add(Enumerable.Range(0, TestPartitionEnd).Select(_ => nrand.NextDouble()).ToList());
     105        description.Add(" ~ N(0, 1 + noiseLvl)");
     106        // use same generation procedure for all variables
     107        var x = Enumerable.Range(0, TestPartitionEnd).Select(_ => nrand.NextDouble()).ToList();
     108        var sigma = x.StandardDeviationPop();
     109        var mean = x.Average();
     110        for(int i = 0; i < x.Count; i++) x[i] = (x[i] - mean) / sigma;
     111        var noisePrng = new NormalDistributedRandom(random, 0, Math.Sqrt(noiseRatio / (1.0 - noiseRatio)));
     112        lvl0.Add(x.Select(t => t + noisePrng.NextDouble()).ToList());
    112113      }
    113114
     
    125126
    126127      this.variableRelevances.Clear();
    127       for (int i = 0; i < variableNames.Length; i++) {
     128      for(int i = 0; i < variableNames.Length; i++) {
    128129        var targetVarName = variableNames[i];
    129130        var targetRelevantInputs =
     
    136137      // for graphviz
    137138      networkDefinition += Environment.NewLine + "digraph G {";
    138       for (int i = 0; i < variableNames.Length; i++) {
     139      for(int i = 0; i < variableNames.Length; i++) {
    139140        var name = variableNames[i];
    140141        var selectedVarNames = inputVarNames[i];
    141142        var selectedRelevances = relevances[i];
    142         for (int j = 0; j < selectedVarNames.Length; j++) {
     143        for(int j = 0; j < selectedVarNames.Length; j++) {
    143144          var selectedVarName = selectedVarNames[j];
    144145          var selectedRelevance = selectedRelevances[j];
     
    157158
    158159    private List<List<double>> CreateVariables(List<List<double>> allowedInputs, int numVars, List<string[]> inputVarNames, List<string> description, List<double[]> relevances) {
    159       var res = new List<List<double>>();
    160       for (int c = 0; c < numVars; c++) {
     160      var newVariables = new List<List<double>>();
     161      for(int c = 0; c < numVars; c++) {
    161162        string[] selectedVarNames;
    162163        double[] relevance;
    163         var x = GenerateRandomFunction(random, allowedInputs, out selectedVarNames, out relevance);
     164        var x = GenerateRandomFunction(random, allowedInputs, out selectedVarNames, out relevance).ToArray();
     165        // standardize x
    164166        var sigma = x.StandardDeviation();
    165         var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0 - noiseRatio)));
    166         res.Add(x.Select(t => t + noisePrng.NextDouble()).ToList());
     167        var mean = x.Average();
     168        for(int i = 0; i < x.Length; i++) x[i] = (x[i] - mean) / sigma;
     169
     170        var noisePrng = new NormalDistributedRandom(random, 0, Math.Sqrt(noiseRatio / (1.0 - noiseRatio)));
     171        newVariables.Add(x.Select(t => t + noisePrng.NextDouble()).ToList());
    167172        Array.Sort(selectedVarNames, relevance);
    168173        inputVarNames.Add(selectedVarNames);
     
    176181        description.Add(string.Format(" ~ N({0}, {1:N3}) [Relevances: {2}]", desc, noisePrng.Sigma, relevanceStr));
    177182      }
    178       return res;
     183      return newVariables;
    179184    }
    180185
    181     // sample the input variables that are actually used and sample from a Gaussian process
    182     private IEnumerable<double> GenerateRandomFunction(IRandom rand, List<List<double>> xs, out string[] selectedVarNames, out double[] relevance) {
     186    public int SampleNumberOfVariables(IRandom rand, int maxNumberOfVariables) {
    183187      double r = -Math.Log(1.0 - rand.NextDouble()) * 2.0; // r is exponentially distributed with lambda = 2
    184188      int nl = (int)Math.Floor(1.5 + r); // number of selected vars is likely to be between three and four
    185       if (nl > xs.Count) nl = xs.Count; // limit max
    186 
    187       var selectedIdx = Enumerable.Range(0, xs.Count).Shuffle(random)
    188         .Take(nl).ToArray();
    189 
    190       var selectedVars = selectedIdx.Select(i => xs[i]).ToArray();
    191       selectedVarNames = selectedIdx.Select(i => VariableNames[i]).ToArray();
    192       return SampleGaussianProcess(random, selectedVars, out relevance);
     189      return Math.Min(maxNumberOfVariables, nl);
    193190    }
    194191
    195     private IEnumerable<double> SampleGaussianProcess(IRandom random, List<double>[] xs, out double[] relevance) {
    196       int nl = xs.Length;
    197       int nRows = xs.First().Count;
    198 
    199       // sample u iid ~ N(0, 1)
    200       var u = Enumerable.Range(0, nRows).Select(_ => NormalDistributedRandom.NextDouble(random, 0, 1)).ToArray();
    201 
    202       // sample actual length-scales
    203       var l = Enumerable.Range(0, nl)
    204         .Select(_ => random.NextDouble() * 2 + 0.5)
    205         .ToArray();
    206 
    207       double[,] K = CalculateCovariance(xs, l);
    208 
    209       // decompose
    210       alglib.trfac.spdmatrixcholesky(ref K, nRows, false);
    211 
    212 
    213       // calc y = Lu
    214       var y = new double[u.Length];
    215       alglib.ablas.rmatrixmv(nRows, nRows, K, 0, 0, 0, u, 0, ref y, 0);
    216 
    217       // calculate relevance by removing dimensions
    218       relevance = CalculateRelevance(y, u, xs, l);
    219 
    220 
    221       // calculate variable relevance
    222       // as per Rasmussen and Williams "Gaussian Processes for Machine Learning" page 106:
    223       // ,,For the squared exponential covariance function [...] the l1, ..., lD hyperparameters
    224       // play the role of characteristic length scales [...]. Such a covariance function implements
    225       // automatic relevance determination (ARD) [Neal, 1996], since the inverse of the length-scale
    226       // determines how relevant an input is: if the length-scale has a very large value, the covariance
    227       // will become almost independent of that input, effectively removing it from inference.''
    228       // relevance = l.Select(li => 1.0 / li).ToArray();
    229 
    230       return y;
    231     }
    232 
    233     // calculate variable relevance based on removal of variables
    234     //  1) to remove a variable we set it's length scale to infinity (no relation of the variable value to the target)
    235     //  2) calculate MSE of the original target values (y) to the updated targes y' (after variable removal)
    236     //  3) relevance is larger if MSE(y,y') is large
    237     //  4) scale impacts so that the most important variable has impact = 1
    238     private double[] CalculateRelevance(double[] y, double[] u, List<double>[] xs, double[] l) {
    239       int nRows = xs.First().Count;
    240       var changedL = new double[l.Length];
    241       var relevance = new double[l.Length];
    242       for (int i = 0; i < l.Length; i++) {
    243         Array.Copy(l, changedL, changedL.Length);
    244         changedL[i] = double.MaxValue;
    245         var changedK = CalculateCovariance(xs, changedL);
    246 
    247         var yChanged = new double[u.Length];
    248         alglib.ablas.rmatrixmv(nRows, nRows, changedK, 0, 0, 0, u, 0, ref yChanged, 0);
    249 
    250         OnlineCalculatorError error;
    251         var mse = OnlineMeanSquaredErrorCalculator.Calculate(y, yChanged, out error);
    252         if (error != OnlineCalculatorError.None) mse = double.MaxValue;
    253         relevance[i] = mse;
    254       }
    255       // scale so that max relevance is 1.0
    256       var maxRel = relevance.Max();
    257       for (int i = 0; i < relevance.Length; i++) relevance[i] /= maxRel;
    258       return relevance;
    259     }
    260 
    261     private double[,] CalculateCovariance(List<double>[] xs, double[] l) {
    262       int nRows = xs.First().Count;
    263       double[,] K = new double[nRows, nRows];
    264       for (int r = 0; r < nRows; r++) {
    265         double[] xi = xs.Select(x => x[r]).ToArray();
    266         for (int c = 0; c <= r; c++) {
    267           double[] xj = xs.Select(x => x[c]).ToArray();
    268           double dSqr = xi.Zip(xj, (xik, xjk) => (xik - xjk))
    269             .Select(dk => dk * dk)
    270             .Zip(l, (dk, lk) => dk / lk)
    271             .Sum();
    272           K[r, c] = Math.Exp(-dSqr);
    273         }
    274       }
    275       // add a small diagonal matrix for numeric stability
    276       for (int i = 0; i < nRows; i++) {
    277         K[i, i] += 1.0E-7;
    278       }
    279 
    280       return K;
    281     }
     192    // sample a random function and calculate the variable relevances
     193    protected abstract IEnumerable<double> GenerateRandomFunction(IRandom rand, List<List<double>> xs, out string[] selectedVarNames, out double[] relevance);
    282194  }
    283195}
  • branches/symbreg-factors-2650/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/VariableNetworks/VariableNetworkInstanceProvider.cs

    r14277 r14751  
    4949    public override IEnumerable<IDataDescriptor> GetDataDescriptors() {
    5050      var numVariables = new int[] { 10, 20, 50, 100 };
    51       var noiseRatios = new double[] { 0, 0.01, 0.05, 0.1 };
     51      var noiseRatios = new double[] { 0, 0.01, 0.05, 0.1, 0.2 };
    5252      var rand = new MersenneTwister((uint)Seed); // use fixed seed for deterministic problem generation
    53       return (from size in numVariables
    54               from noiseRatio in noiseRatios
    55               select new VariableNetwork(size, noiseRatio, new MersenneTwister((uint)rand.Next())))
    56               .Cast<IDataDescriptor>()
    57               .ToList();
     53      var lr = (from size in numVariables
     54                from noiseRatio in noiseRatios
     55                select new LinearVariableNetwork(size, noiseRatio, new MersenneTwister((uint)rand.Next())))
     56                .Cast<IDataDescriptor>()
     57                .ToList();
     58      var gp = (from size in numVariables
     59                from noiseRatio in noiseRatios
     60                select new GaussianProcessVariableNetwork(size, noiseRatio, new MersenneTwister((uint)rand.Next())))
     61                .Cast<IDataDescriptor>()
     62                .ToList();
     63      return lr.Concat(gp);
    5864    }
    5965
Note: See TracChangeset for help on using the changeset viewer.