Changeset 14271 for trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/VariableNetworks
- Timestamp:
- 09/02/16 13:40:59 (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/VariableNetworks/VariableNetwork.cs
r14260 r14271 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Globalization; 24 25 using System.Linq; 25 26 using HeuristicLab.Common; … … 60 61 .Select(i => string.Format("X{0:000}", i)) 61 62 .ToArray(); 63 64 variableRelevances = new Dictionary<string, IEnumerable<KeyValuePair<string, double>>>(); 62 65 } 63 66 … … 83 86 protected override int TestPartitionEnd { get { return nTrainingSamples + nTestSamples; } } 84 87 88 private Dictionary<string, IEnumerable<KeyValuePair<string, double>>> variableRelevances; 89 public IEnumerable<KeyValuePair<string, double>> GetVariableRelevance(string targetVar) { 90 return variableRelevances[targetVar]; 91 } 85 92 86 93 protected override List<List<double>> GenerateValues() { … … 94 101 List<string> description = new List<string>(); // store information how the variable is actually produced 95 102 List<string[]> inputVarNames = new List<string[]>(); // store information to produce graphviz file 103 List<double[]> relevances = new List<double[]>(); // stores variable relevance information (same order as given in inputVarNames) 96 104 97 105 var nrand = new NormalDistributedRandom(random, 0, 1); 98 106 for (int c = 0; c < numLvl0; c++) { 99 var datai = Enumerable.Range(0, TestPartitionEnd).Select(_ => nrand.NextDouble()).ToList();100 107 inputVarNames.Add(new string[] { }); 108 relevances.Add(new double[] { }); 101 109 description.Add(" ~ N(0, 1)"); 102 lvl0.Add( datai);110 lvl0.Add(Enumerable.Range(0, TestPartitionEnd).Select(_ => nrand.NextDouble()).ToList()); 103 111 } 104 112 105 113 // lvl1 contains variables which are functions of vars in lvl0 (+ noise) 106 List<List<double>> lvl1 = new List<List<double>>();107 114 int numLvl1 = (int)Math.Ceiling(numberOfFeatures * 0.33); 108 for (int c = 0; c < numLvl1; c++) { 109 string[] selectedVarNames; 110 var x = GenerateRandomFunction(random, lvl0, out selectedVarNames); 111 var sigma = x.StandardDeviation(); 112 var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0 - noiseRatio))); 113 lvl1.Add(x.Select(t => t + noisePrng.NextDouble()).ToList()); 114 Array.Sort(selectedVarNames); 115 inputVarNames.Add(selectedVarNames); 116 var desc = string.Format("f({0})", string.Join(",", selectedVarNames)); 117 description.Add(string.Format(" ~ N({0}, {1:N3})", desc, noisePrng.Sigma)); 118 } 115 List<List<double>> lvl1 = CreateVariables(lvl0, numLvl1, inputVarNames, description, relevances); 119 116 120 117 // lvl2 contains variables which are functions of vars in lvl0 and lvl1 (+ noise) 121 List<List<double>> lvl2 = new List<List<double>>();122 118 int numLvl2 = (int)Math.Ceiling(numberOfFeatures * 0.2); 123 for (int c = 0; c < numLvl2; c++) { 124 string[] selectedVarNames; 125 var x = GenerateRandomFunction(random, lvl0.Concat(lvl1).ToList(), out selectedVarNames); 126 var sigma = x.StandardDeviation(); 127 var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0 - noiseRatio))); 128 lvl2.Add(x.Select(t => t + noisePrng.NextDouble()).ToList()); 129 Array.Sort(selectedVarNames); 130 inputVarNames.Add(selectedVarNames); 131 var desc = string.Format("f({0})", string.Join(",", selectedVarNames)); 132 description.Add(string.Format(" ~ N({0}, {1:N3})", desc, noisePrng.Sigma)); 133 } 119 List<List<double>> lvl2 = CreateVariables(lvl0.Concat(lvl1).ToList(), numLvl2, inputVarNames, description, relevances); 134 120 135 121 // lvl3 contains variables which are functions of vars in lvl0, lvl1 and lvl2 (+ noise) 136 List<List<double>> lvl3 = new List<List<double>>();137 122 int numLvl3 = numberOfFeatures - numLvl0 - numLvl1 - numLvl2; 138 for (int c = 0; c < numLvl3; c++) {139 string[] selectedVarNames; 140 var x = GenerateRandomFunction(random, lvl0.Concat(lvl1).Concat(lvl2).ToList(), out selectedVarNames);141 var sigma = x.StandardDeviation();142 var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0 - noiseRatio)));143 lvl3.Add(x.Select(t => t + noisePrng.NextDouble()).ToList());144 Array.Sort(selectedVarNames);145 inputVarNames.Add(selectedVarNames);146 var desc = string.Format("f({0})", string.Join(",", selectedVarNames));147 description.Add(string.Format(" ~ N({0}, {1:N3})", desc, noisePrng.Sigma));148 } 123 List<List<double>> lvl3 = CreateVariables(lvl0.Concat(lvl1).Concat(lvl2).ToList(), numLvl3, inputVarNames, description, relevances); 124 125 this.variableRelevances.Clear(); 126 for (int i = 0; i < variableNames.Length; i++) { 127 var targetVarName = variableNames[i]; 128 var targetRelevantInputs = 129 inputVarNames[i].Zip(relevances[i], (inputVar, rel) => new KeyValuePair<string, double>(inputVar, rel)) 130 .ToArray(); 131 variableRelevances.Add(targetVarName, targetRelevantInputs); 132 } 133 149 134 networkDefinition = string.Join(Environment.NewLine, variableNames.Zip(description, (n, d) => n + d).OrderBy(x => x)); 150 135 // for graphviz 151 136 networkDefinition += Environment.NewLine + "digraph G {"; 152 foreach (var t in variableNames.Zip(inputVarNames, Tuple.Create).OrderBy(t => t.Item1)) { 153 var name = t.Item1; 154 var selectedVarNames = t.Item2; 155 foreach (var selectedVarName in selectedVarNames) { 156 networkDefinition += Environment.NewLine + selectedVarName + " -> " + name; 137 for (int i = 0; i < variableNames.Length; i++) { 138 var name = variableNames[i]; 139 var selectedVarNames = inputVarNames[i]; 140 var selectedRelevances = relevances[i]; 141 for (int j = 0; j < selectedVarNames.Length; j++) { 142 var selectedVarName = selectedVarNames[j]; 143 var selectedRelevance = selectedRelevances[j]; 144 networkDefinition += Environment.NewLine + selectedVarName + " -> " + name + 145 string.Format(CultureInfo.InvariantCulture, " [label={0:N3}]", selectedRelevance); 157 146 } 158 147 } 159 148 networkDefinition += Environment.NewLine + "}"; 160 149 161 // return a random permutation of all variables 150 // return a random permutation of all variables (to mix lvl0, lvl1, ... variables) 162 151 var allVars = lvl0.Concat(lvl1).Concat(lvl2).Concat(lvl3).ToList(); 163 152 var orderedVars = allVars.Zip(variableNames, Tuple.Create).OrderBy(t => t.Item2).Select(t => t.Item1).ToList(); … … 166 155 } 167 156 157 private List<List<double>> CreateVariables(List<List<double>> allowedInputs, int numVars, List<string[]> inputVarNames, List<string> description, List<double[]> relevances) { 158 var res = new List<List<double>>(); 159 for (int c = 0; c < numVars; c++) { 160 string[] selectedVarNames; 161 double[] relevance; 162 var x = GenerateRandomFunction(random, allowedInputs, out selectedVarNames, out relevance); 163 var sigma = x.StandardDeviation(); 164 var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0 - noiseRatio))); 165 res.Add(x.Select(t => t + noisePrng.NextDouble()).ToList()); 166 Array.Sort(selectedVarNames, relevance); 167 inputVarNames.Add(selectedVarNames); 168 relevances.Add(relevance); 169 var desc = string.Format("f({0})", string.Join(",", selectedVarNames)); 170 // for the relevance information order variables by decreasing relevance 171 var relevanceStr = string.Join(", ", 172 selectedVarNames.Zip(relevance, Tuple.Create) 173 .OrderByDescending(t => t.Item2) 174 .Select(t => string.Format(CultureInfo.InvariantCulture, "{0}: {1:N3}", t.Item1, t.Item2))); 175 description.Add(string.Format(" ~ N({0}, {1:N3}) [Relevances: {2}]", desc, noisePrng.Sigma, relevanceStr)); 176 } 177 return res; 178 } 179 168 180 // sample the input variables that are actually used and sample from a Gaussian process 169 private IEnumerable<double> GenerateRandomFunction(IRandom rand, List<List<double>> xs, out string[] selectedVarNames ) {181 private IEnumerable<double> GenerateRandomFunction(IRandom rand, List<List<double>> xs, out string[] selectedVarNames, out double[] relevance) { 170 182 double r = -Math.Log(1.0 - rand.NextDouble()) * 2.0; // r is exponentially distributed with lambda = 2 171 183 int nl = (int)Math.Floor(1.5 + r); // number of selected vars is likely to be between three and four … … 177 189 var selectedVars = selectedIdx.Select(i => xs[i]).ToArray(); 178 190 selectedVarNames = selectedIdx.Select(i => VariableNames[i]).ToArray(); 179 return SampleGaussianProcess(random, selectedVars );180 } 181 182 private IEnumerable<double> SampleGaussianProcess(IRandom random, List<double>[] xs ) {191 return SampleGaussianProcess(random, selectedVars, out relevance); 192 } 193 194 private IEnumerable<double> SampleGaussianProcess(IRandom random, List<double>[] xs, out double[] relevance) { 183 195 int nl = xs.Length; 184 196 int nRows = xs.First().Count; … … 217 229 alglib.ablas.rmatrixmv(nRows, nRows, K, 0, 0, 0, u, 0, ref y, 0); 218 230 231 // calculate variable relevance 232 // as per Rasmussen and Williams "Gaussian Processes for Machine Learning" page 106: 233 // ,,For the squared exponential covariance function [...] the l1, ..., lD hyperparameters 234 // play the role of characteristic length scales [...]. Such a covariance function implements 235 // automatic relevance determination (ARD) [Neal, 1996], since the inverse of the length-scale 236 // determines how relevant an input is: if the length-scale has a very large value, the covariance 237 // will become almost independent of that input, effectively removing it from inference.'' 238 relevance = l.Select(li => 1.0 / li).ToArray(); 239 219 240 return y; 220 241 }
Note: See TracChangeset
for help on using the changeset viewer.