Changeset 15195 for stable/HeuristicLab.Problems.Instances.DataAnalysis
- Timestamp:
- 07/11/17 12:55:52 (8 years ago)
- Location:
- stable/HeuristicLab.Problems.Instances.DataAnalysis/3.3
- Files:
-
- 4 edited
- 2 copied
Legend:
- Unmodified
- Added
- Removed
-
stable/HeuristicLab.Problems.Instances.DataAnalysis/3.3/HeuristicLab.Problems.Instances.DataAnalysis-3.3.csproj
r14305 r15195 157 157 <Compile Include="Regression\FeatureSelection\FeatureSelection.cs" /> 158 158 <Compile Include="Regression\FeatureSelection\FeatureSelectionInstanceProvider.cs" /> 159 <Compile Include="Regression\VariableNetworks\LinearVariableNetwork.cs" /> 160 <Compile Include="Regression\VariableNetworks\GaussianProcessVariableNetwork.cs" /> 159 161 <Compile Include="Regression\VariableNetworks\VariableNetwork.cs" /> 160 162 <Compile Include="Regression\VariableNetworks\VariableNetworkInstanceProvider.cs" /> -
stable/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/Friedman/FriedmanRandomFunction.cs
r14186 r15195 94 94 } 95 95 96 // as described in Greedy Function Approxi nation paper96 // as described in Greedy Function Approximation paper 97 97 private IEnumerable<double> GenerateRandomFunction(IRandom rand, List<List<double>> xs, int nTerms = 20) { 98 98 int nRows = xs.First().Count; -
stable/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/VariableNetworks/VariableNetwork.cs
r14186 r15195 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Globalization; 24 25 using System.Linq; 25 26 using HeuristicLab.Common; 26 27 using HeuristicLab.Core; 28 using HeuristicLab.Problems.DataAnalysis; 27 29 using HeuristicLab.Random; 28 30 29 31 namespace HeuristicLab.Problems.Instances.DataAnalysis { 30 public class VariableNetwork : ArtificialRegressionDataDescriptor {32 public abstract class VariableNetwork : ArtificialRegressionDataDescriptor { 31 33 private int nTrainingSamples; 32 34 private int nTestSamples; … … 36 38 private IRandom random; 37 39 38 public override string Name { get { return string.Format("VariableNetwork-{0:0%} ({1} dim)", noiseRatio, numberOfFeatures); } }39 40 private string networkDefinition; 40 41 public string NetworkDefinition { get { return networkDefinition; } } … … 45 46 } 46 47 47 public VariableNetwork(int numberOfFeatures, double noiseRatio, 48 IRandom rand) 49 : this(250, 250, numberOfFeatures, noiseRatio, rand) { } 50 51 public VariableNetwork(int nTrainingSamples, int nTestSamples, 48 protected VariableNetwork(int nTrainingSamples, int nTestSamples, 52 49 int numberOfFeatures, double noiseRatio, IRandom rand) { 53 50 this.nTrainingSamples = nTrainingSamples; … … 60 57 .Select(i => string.Format("X{0:000}", i)) 61 58 .ToArray(); 59 60 variableRelevances = new Dictionary<string, IEnumerable<KeyValuePair<string, double>>>(); 62 61 } 63 62 … … 83 82 protected override int TestPartitionEnd { get { return nTrainingSamples + nTestSamples; } } 84 83 84 private Dictionary<string, IEnumerable<KeyValuePair<string, double>>> variableRelevances; 85 public IEnumerable<KeyValuePair<string, double>> GetVariableRelevance(string targetVar) { 86 return variableRelevances[targetVar]; 87 } 85 88 86 89 protected override List<List<double>> GenerateValues() { … … 94 97 List<string> description = new List<string>(); // store information how the variable is actually produced 95 98 List<string[]> inputVarNames = new List<string[]>(); // store information to produce graphviz file 99 List<double[]> relevances = new List<double[]>(); // stores variable relevance information (same order as given in inputVarNames) 96 100 97 101 var nrand = new NormalDistributedRandom(random, 0, 1); 98 for (int c = 0; c < numLvl0; c++) { 99 var datai = Enumerable.Range(0, TestPartitionEnd).Select(_ => nrand.NextDouble()).ToList(); 102 for(int c = 0; c < numLvl0; c++) { 100 103 inputVarNames.Add(new string[] { }); 101 description.Add("~ N(0, 1)"); 102 lvl0.Add(datai); 104 relevances.Add(new double[] { }); 105 description.Add(" ~ N(0, 1 + noiseLvl)"); 106 // use same generation procedure for all variables 107 var x = Enumerable.Range(0, TestPartitionEnd).Select(_ => nrand.NextDouble()).ToList(); 108 var sigma = x.StandardDeviationPop(); 109 var mean = x.Average(); 110 for(int i = 0; i < x.Count; i++) x[i] = (x[i] - mean) / sigma; 111 var noisePrng = new NormalDistributedRandom(random, 0, Math.Sqrt(noiseRatio / (1.0 - noiseRatio))); 112 lvl0.Add(x.Select(t => t + noisePrng.NextDouble()).ToList()); 103 113 } 104 114 105 115 // lvl1 contains variables which are functions of vars in lvl0 (+ noise) 106 List<List<double>> lvl1 = new List<List<double>>();107 116 int numLvl1 = (int)Math.Ceiling(numberOfFeatures * 0.33); 108 for (int c = 0; c < numLvl1; c++) { 109 string[] selectedVarNames; 110 var x = GenerateRandomFunction(random, lvl0, out selectedVarNames); 111 var sigma = x.StandardDeviation(); 112 var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0 - noiseRatio))); 113 lvl1.Add(x.Select(t => t + noisePrng.NextDouble()).ToList()); 117 List<List<double>> lvl1 = CreateVariables(lvl0, numLvl1, inputVarNames, description, relevances); 114 118 115 inputVarNames.Add(selectedVarNames); 116 var desc = string.Format("f({0})", string.Join(",", selectedVarNames)); 117 description.Add(string.Format(" ~ N({0}, {1:N3})", desc, noisePrng.Sigma)); 119 // lvl2 contains variables which are functions of vars in lvl0 and lvl1 (+ noise) 120 int numLvl2 = (int)Math.Ceiling(numberOfFeatures * 0.2); 121 List<List<double>> lvl2 = CreateVariables(lvl0.Concat(lvl1).ToList(), numLvl2, inputVarNames, description, relevances); 122 123 // lvl3 contains variables which are functions of vars in lvl0, lvl1 and lvl2 (+ noise) 124 int numLvl3 = numberOfFeatures - numLvl0 - numLvl1 - numLvl2; 125 List<List<double>> lvl3 = CreateVariables(lvl0.Concat(lvl1).Concat(lvl2).ToList(), numLvl3, inputVarNames, description, relevances); 126 127 this.variableRelevances.Clear(); 128 for(int i = 0; i < variableNames.Length; i++) { 129 var targetVarName = variableNames[i]; 130 var targetRelevantInputs = 131 inputVarNames[i].Zip(relevances[i], (inputVar, rel) => new KeyValuePair<string, double>(inputVar, rel)) 132 .ToArray(); 133 variableRelevances.Add(targetVarName, targetRelevantInputs); 118 134 } 119 135 120 // lvl2 contains variables which are functions of vars in lvl0 and lvl1 (+ noise) 121 List<List<double>> lvl2 = new List<List<double>>(); 122 int numLvl2 = (int)Math.Ceiling(numberOfFeatures * 0.2); 123 for (int c = 0; c < numLvl2; c++) { 124 string[] selectedVarNames; 125 var x = GenerateRandomFunction(random, lvl0.Concat(lvl1).ToList(), out selectedVarNames); 126 var sigma = x.StandardDeviation(); 127 var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0 - noiseRatio))); 128 lvl2.Add(x.Select(t => t + noisePrng.NextDouble()).ToList()); 129 130 inputVarNames.Add(selectedVarNames); 131 var desc = string.Format("f({0})", string.Join(",", selectedVarNames)); 132 description.Add(string.Format(" ~ N({0}, {1:N3})", desc, noisePrng.Sigma)); 133 } 134 135 // lvl3 contains variables which are functions of vars in lvl0, lvl1 and lvl2 (+ noise) 136 List<List<double>> lvl3 = new List<List<double>>(); 137 int numLvl3 = numberOfFeatures - numLvl0 - numLvl1 - numLvl2; 138 for (int c = 0; c < numLvl3; c++) { 139 string[] selectedVarNames; 140 var x = GenerateRandomFunction(random, lvl0.Concat(lvl1).Concat(lvl2).ToList(), out selectedVarNames); 141 var sigma = x.StandardDeviation(); 142 var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0 - noiseRatio))); 143 lvl3.Add(x.Select(t => t + noisePrng.NextDouble()).ToList()); 144 145 inputVarNames.Add(selectedVarNames); 146 var desc = string.Format("f({0})", string.Join(",", selectedVarNames)); 147 description.Add(string.Format(" ~ N({0}, {1:N3})", desc, noisePrng.Sigma)); 148 } 149 150 networkDefinition = string.Join(Environment.NewLine, variableNames.Zip(description, (n, d) => n + d)); 136 networkDefinition = string.Join(Environment.NewLine, variableNames.Zip(description, (n, d) => n + d).OrderBy(x => x)); 151 137 // for graphviz 152 138 networkDefinition += Environment.NewLine + "digraph G {"; 153 foreach (var t in variableNames.Zip(inputVarNames, Tuple.Create).OrderBy(t => t.Item1)) { 154 var name = t.Item1; 155 var selectedVarNames = t.Item2; 156 foreach (var selectedVarName in selectedVarNames) { 157 networkDefinition += Environment.NewLine + selectedVarName + " -> " + name; 139 for(int i = 0; i < variableNames.Length; i++) { 140 var name = variableNames[i]; 141 var selectedVarNames = inputVarNames[i]; 142 var selectedRelevances = relevances[i]; 143 for(int j = 0; j < selectedVarNames.Length; j++) { 144 var selectedVarName = selectedVarNames[j]; 145 var selectedRelevance = selectedRelevances[j]; 146 networkDefinition += Environment.NewLine + selectedVarName + " -> " + name + 147 string.Format(CultureInfo.InvariantCulture, " [label={0:N3}]", selectedRelevance); 158 148 } 159 149 } 160 150 networkDefinition += Environment.NewLine + "}"; 161 151 162 // return a random permutation of all variables 152 // return a random permutation of all variables (to mix lvl0, lvl1, ... variables) 163 153 var allVars = lvl0.Concat(lvl1).Concat(lvl2).Concat(lvl3).ToList(); 164 154 var orderedVars = allVars.Zip(variableNames, Tuple.Create).OrderBy(t => t.Item2).Select(t => t.Item1).ToList(); … … 167 157 } 168 158 169 // sample the input variables that are actually used and sample from a Gaussian process 170 private IEnumerable<double> GenerateRandomFunction(IRandom rand, List<List<double>> xs, out string[] selectedVarNames) { 159 private List<List<double>> CreateVariables(List<List<double>> allowedInputs, int numVars, List<string[]> inputVarNames, List<string> description, List<double[]> relevances) { 160 var newVariables = new List<List<double>>(); 161 for(int c = 0; c < numVars; c++) { 162 string[] selectedVarNames; 163 double[] relevance; 164 var x = GenerateRandomFunction(random, allowedInputs, out selectedVarNames, out relevance).ToArray(); 165 // standardize x 166 var sigma = x.StandardDeviation(); 167 var mean = x.Average(); 168 for(int i = 0; i < x.Length; i++) x[i] = (x[i] - mean) / sigma; 169 170 var noisePrng = new NormalDistributedRandom(random, 0, Math.Sqrt(noiseRatio / (1.0 - noiseRatio))); 171 newVariables.Add(x.Select(t => t + noisePrng.NextDouble()).ToList()); 172 Array.Sort(selectedVarNames, relevance); 173 inputVarNames.Add(selectedVarNames); 174 relevances.Add(relevance); 175 var desc = string.Format("f({0})", string.Join(",", selectedVarNames)); 176 // for the relevance information order variables by decreasing relevance 177 var relevanceStr = string.Join(", ", 178 selectedVarNames.Zip(relevance, Tuple.Create) 179 .OrderByDescending(t => t.Item2) 180 .Select(t => string.Format(CultureInfo.InvariantCulture, "{0}: {1:N3}", t.Item1, t.Item2))); 181 description.Add(string.Format(" ~ N({0}, {1:N3}) [Relevances: {2}]", desc, noisePrng.Sigma, relevanceStr)); 182 } 183 return newVariables; 184 } 185 186 public int SampleNumberOfVariables(IRandom rand, int maxNumberOfVariables) { 171 187 double r = -Math.Log(1.0 - rand.NextDouble()) * 2.0; // r is exponentially distributed with lambda = 2 172 188 int nl = (int)Math.Floor(1.5 + r); // number of selected vars is likely to be between three and four 173 if (nl > xs.Count) nl = xs.Count; // limit max 174 175 var selectedIdx = Enumerable.Range(0, xs.Count).Shuffle(random) 176 .Take(nl).ToArray(); 177 178 var selectedVars = selectedIdx.Select(i => xs[i]).ToArray(); 179 selectedVarNames = selectedIdx.Select(i => VariableNames[i]).ToArray(); 180 return SampleGaussianProcess(random, selectedVars); 189 return Math.Min(maxNumberOfVariables, nl); 181 190 } 182 191 183 private IEnumerable<double> SampleGaussianProcess(IRandom random, List<double>[] xs) { 184 int nl = xs.Length; 185 int nRows = xs.First().Count; 186 double[,] K = new double[nRows, nRows]; 187 188 // sample length-scales 189 var l = Enumerable.Range(0, nl) 190 .Select(_ => random.NextDouble() * 2 + 0.5) 191 .ToArray(); 192 // calculate covariance matrix 193 for (int r = 0; r < nRows; r++) { 194 double[] xi = xs.Select(x => x[r]).ToArray(); 195 for (int c = 0; c <= r; c++) { 196 double[] xj = xs.Select(x => x[c]).ToArray(); 197 double dSqr = xi.Zip(xj, (xik, xjk) => (xik - xjk)) 198 .Select(dk => dk * dk) 199 .Zip(l, (dk, lk) => dk / lk) 200 .Sum(); 201 K[r, c] = Math.Exp(-dSqr); 202 } 203 } 204 205 // add a small diagonal matrix for numeric stability 206 for (int i = 0; i < nRows; i++) { 207 K[i, i] += 1.0E-7; 208 } 209 210 // decompose 211 alglib.trfac.spdmatrixcholesky(ref K, nRows, false); 212 213 // sample u iid ~ N(0, 1) 214 var u = Enumerable.Range(0, nRows).Select(_ => NormalDistributedRandom.NextDouble(random, 0, 1)).ToArray(); 215 216 // calc y = Lu 217 var y = new double[u.Length]; 218 alglib.ablas.rmatrixmv(nRows, nRows, K, 0, 0, 0, u, 0, ref y, 0); 219 220 return y; 221 } 192 // sample a random function and calculate the variable relevances 193 protected abstract IEnumerable<double> GenerateRandomFunction(IRandom rand, List<List<double>> xs, out string[] selectedVarNames, out double[] relevance); 222 194 } 223 195 } -
stable/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/VariableNetworks/VariableNetworkInstanceProvider.cs
r14305 r15195 32 32 } 33 33 public override string Description { 34 get { return "A set of regression benchmark instances for variable network analysis "; }34 get { return "A set of regression benchmark instances for variable network analysis. The data for these instances are randomly generated as described in the reference publication."; } 35 35 } 36 36 public override Uri WebLink { … … 38 38 } 39 39 public override string ReferencePublication { 40 get { return " "; }40 get { return "G. Kronberger, B. Burlacu, M. Kommenda, S. Winkler, M. Affenzeller. Measures for the Evaluation and Comparison of Graphical Model Structures. to appear in Computer Aided Systems Theory - EUROCAST 2017, Springer 2018"; } 41 41 } 42 42 public int Seed { get; private set; } … … 49 49 public override IEnumerable<IDataDescriptor> GetDataDescriptors() { 50 50 var numVariables = new int[] { 10, 20, 50, 100 }; 51 var noiseRatios = new double[] { 0 .01, 0.05, 0.1};51 var noiseRatios = new double[] { 0, 0.01, 0.05, 0.1, 0.2 }; 52 52 var rand = new MersenneTwister((uint)Seed); // use fixed seed for deterministic problem generation 53 return (from size in numVariables 54 from noiseRatio in noiseRatios 55 select new VariableNetwork(size, noiseRatio, new MersenneTwister((uint)rand.Next()))) 56 .Cast<IDataDescriptor>() 57 .ToList(); 53 var lr = (from size in numVariables 54 from noiseRatio in noiseRatios 55 select new LinearVariableNetwork(size, noiseRatio, new MersenneTwister((uint)rand.Next()))) 56 .Cast<IDataDescriptor>() 57 .ToList(); 58 var gp = (from size in numVariables 59 from noiseRatio in noiseRatios 60 select new GaussianProcessVariableNetwork(size, noiseRatio, new MersenneTwister((uint)rand.Next()))) 61 .Cast<IDataDescriptor>() 62 .ToList(); 63 return lr.Concat(gp); 58 64 } 59 65
Note: See TracChangeset
for help on using the changeset viewer.