Changeset 14277 for branches/symbreg-factors-2650/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/VariableNetworks
- Timestamp:
- 09/08/16 11:41:45 (8 years ago)
- Location:
- branches/symbreg-factors-2650/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/VariableNetworks
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/symbreg-factors-2650/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/VariableNetworks/VariableNetwork.cs
r14185 r14277 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Globalization; 24 25 using System.Linq; 25 26 using HeuristicLab.Common; … … 60 61 .Select(i => string.Format("X{0:000}", i)) 61 62 .ToArray(); 63 64 variableRelevances = new Dictionary<string, IEnumerable<KeyValuePair<string, double>>>(); 62 65 } 63 66 … … 83 86 protected override int TestPartitionEnd { get { return nTrainingSamples + nTestSamples; } } 84 87 88 private Dictionary<string, IEnumerable<KeyValuePair<string, double>>> variableRelevances; 89 public IEnumerable<KeyValuePair<string, double>> GetVariableRelevance(string targetVar) { 90 return variableRelevances[targetVar]; 91 } 85 92 86 93 protected override List<List<double>> GenerateValues() { … … 94 101 List<string> description = new List<string>(); // store information how the variable is actually produced 95 102 List<string[]> inputVarNames = new List<string[]>(); // store information to produce graphviz file 103 List<double[]> relevances = new List<double[]>(); // stores variable relevance information (same order as given in inputVarNames) 96 104 97 105 var nrand = new NormalDistributedRandom(random, 0, 1); 98 106 for (int c = 0; c < numLvl0; c++) { 99 var datai = Enumerable.Range(0, TestPartitionEnd).Select(_ => nrand.NextDouble()).ToList();100 107 inputVarNames.Add(new string[] { }); 101 description.Add("~ N(0, 1)"); 102 lvl0.Add(datai); 108 relevances.Add(new double[] { }); 109 description.Add(" ~ N(0, 1)"); 110 lvl0.Add(Enumerable.Range(0, TestPartitionEnd).Select(_ => nrand.NextDouble()).ToList()); 103 111 } 104 112 105 113 // lvl1 contains variables which are functions of vars in lvl0 (+ noise) 106 List<List<double>> lvl1 = new List<List<double>>();107 114 int numLvl1 = (int)Math.Ceiling(numberOfFeatures * 0.33); 108 for (int c = 0; c < numLvl1; c++) { 109 string[] selectedVarNames; 110 var x = GenerateRandomFunction(random, lvl0, out selectedVarNames); 111 var sigma = x.StandardDeviation(); 112 var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0 - noiseRatio))); 113 lvl1.Add(x.Select(t => t + noisePrng.NextDouble()).ToList()); 114 115 inputVarNames.Add(selectedVarNames); 116 var desc = string.Format("f({0})", string.Join(",", selectedVarNames)); 117 description.Add(string.Format(" ~ N({0}, {1:N3})", desc, noisePrng.Sigma)); 118 } 115 List<List<double>> lvl1 = CreateVariables(lvl0, numLvl1, inputVarNames, description, relevances); 119 116 120 117 // lvl2 contains variables which are functions of vars in lvl0 and lvl1 (+ noise) 121 List<List<double>> lvl2 = new List<List<double>>();122 118 int numLvl2 = (int)Math.Ceiling(numberOfFeatures * 0.2); 123 for (int c = 0; c < numLvl2; c++) { 124 string[] selectedVarNames; 125 var x = GenerateRandomFunction(random, lvl0.Concat(lvl1).ToList(), out selectedVarNames); 126 var sigma = x.StandardDeviation(); 127 var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0 - noiseRatio))); 128 lvl2.Add(x.Select(t => t + noisePrng.NextDouble()).ToList()); 129 130 inputVarNames.Add(selectedVarNames); 131 var desc = string.Format("f({0})", string.Join(",", selectedVarNames)); 132 description.Add(string.Format(" ~ N({0}, {1:N3})", desc, noisePrng.Sigma)); 133 } 119 List<List<double>> lvl2 = CreateVariables(lvl0.Concat(lvl1).ToList(), numLvl2, inputVarNames, description, relevances); 134 120 135 121 // lvl3 contains variables which are functions of vars in lvl0, lvl1 and lvl2 (+ noise) 136 List<List<double>> lvl3 = new List<List<double>>();137 122 int numLvl3 = numberOfFeatures - numLvl0 - numLvl1 - numLvl2; 138 for (int c = 0; c < numLvl3; c++) { 139 string[] selectedVarNames; 140 var x = GenerateRandomFunction(random, lvl0.Concat(lvl1).Concat(lvl2).ToList(), out selectedVarNames); 141 var sigma = x.StandardDeviation(); 142 var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0 - noiseRatio))); 143 lvl3.Add(x.Select(t => t + noisePrng.NextDouble()).ToList()); 144 145 inputVarNames.Add(selectedVarNames); 146 var desc = string.Format("f({0})", string.Join(",", selectedVarNames)); 147 description.Add(string.Format(" ~ N({0}, {1:N3})", desc, noisePrng.Sigma)); 148 } 149 150 networkDefinition = string.Join(Environment.NewLine, variableNames.Zip(description, (n, d) => n + d)); 123 List<List<double>> lvl3 = CreateVariables(lvl0.Concat(lvl1).Concat(lvl2).ToList(), numLvl3, inputVarNames, description, relevances); 124 125 this.variableRelevances.Clear(); 126 for (int i = 0; i < variableNames.Length; i++) { 127 var targetVarName = variableNames[i]; 128 var targetRelevantInputs = 129 inputVarNames[i].Zip(relevances[i], (inputVar, rel) => new KeyValuePair<string, double>(inputVar, rel)) 130 .ToArray(); 131 variableRelevances.Add(targetVarName, targetRelevantInputs); 132 } 133 134 networkDefinition = string.Join(Environment.NewLine, variableNames.Zip(description, (n, d) => n + d).OrderBy(x => x)); 151 135 // for graphviz 152 136 networkDefinition += Environment.NewLine + "digraph G {"; 153 foreach (var t in variableNames.Zip(inputVarNames, Tuple.Create).OrderBy(t => t.Item1)) { 154 var name = t.Item1; 155 var selectedVarNames = t.Item2; 156 foreach (var selectedVarName in selectedVarNames) { 157 networkDefinition += Environment.NewLine + selectedVarName + " -> " + name; 137 for (int i = 0; i < variableNames.Length; i++) { 138 var name = variableNames[i]; 139 var selectedVarNames = inputVarNames[i]; 140 var selectedRelevances = relevances[i]; 141 for (int j = 0; j < selectedVarNames.Length; j++) { 142 var selectedVarName = selectedVarNames[j]; 143 var selectedRelevance = selectedRelevances[j]; 144 networkDefinition += Environment.NewLine + selectedVarName + " -> " + name + 145 string.Format(CultureInfo.InvariantCulture, " [label={0:N3}]", selectedRelevance); 158 146 } 159 147 } 160 148 networkDefinition += Environment.NewLine + "}"; 161 149 162 // return a random permutation of all variables 150 // return a random permutation of all variables (to mix lvl0, lvl1, ... variables) 163 151 var allVars = lvl0.Concat(lvl1).Concat(lvl2).Concat(lvl3).ToList(); 164 152 var orderedVars = allVars.Zip(variableNames, Tuple.Create).OrderBy(t => t.Item2).Select(t => t.Item1).ToList(); … … 167 155 } 168 156 157 private List<List<double>> CreateVariables(List<List<double>> allowedInputs, int numVars, List<string[]> inputVarNames, List<string> description, List<double[]> relevances) { 158 var res = new List<List<double>>(); 159 for (int c = 0; c < numVars; c++) { 160 string[] selectedVarNames; 161 double[] relevance; 162 var x = GenerateRandomFunction(random, allowedInputs, out selectedVarNames, out relevance); 163 var sigma = x.StandardDeviation(); 164 var noisePrng = new NormalDistributedRandom(random, 0, sigma * Math.Sqrt(noiseRatio / (1.0 - noiseRatio))); 165 res.Add(x.Select(t => t + noisePrng.NextDouble()).ToList()); 166 Array.Sort(selectedVarNames, relevance); 167 inputVarNames.Add(selectedVarNames); 168 relevances.Add(relevance); 169 var desc = string.Format("f({0})", string.Join(",", selectedVarNames)); 170 // for the relevance information order variables by decreasing relevance 171 var relevanceStr = string.Join(", ", 172 selectedVarNames.Zip(relevance, Tuple.Create) 173 .OrderByDescending(t => t.Item2) 174 .Select(t => string.Format(CultureInfo.InvariantCulture, "{0}: {1:N3}", t.Item1, t.Item2))); 175 description.Add(string.Format(" ~ N({0}, {1:N3}) [Relevances: {2}]", desc, noisePrng.Sigma, relevanceStr)); 176 } 177 return res; 178 } 179 169 180 // sample the input variables that are actually used and sample from a Gaussian process 170 private IEnumerable<double> GenerateRandomFunction(IRandom rand, List<List<double>> xs, out string[] selectedVarNames ) {181 private IEnumerable<double> GenerateRandomFunction(IRandom rand, List<List<double>> xs, out string[] selectedVarNames, out double[] relevance) { 171 182 double r = -Math.Log(1.0 - rand.NextDouble()) * 2.0; // r is exponentially distributed with lambda = 2 172 183 int nl = (int)Math.Floor(1.5 + r); // number of selected vars is likely to be between three and four … … 178 189 var selectedVars = selectedIdx.Select(i => xs[i]).ToArray(); 179 190 selectedVarNames = selectedIdx.Select(i => VariableNames[i]).ToArray(); 180 return SampleGaussianProcess(random, selectedVars );181 } 182 183 private IEnumerable<double> SampleGaussianProcess(IRandom random, List<double>[] xs ) {191 return SampleGaussianProcess(random, selectedVars, out relevance); 192 } 193 194 private IEnumerable<double> SampleGaussianProcess(IRandom random, List<double>[] xs, out double[] relevance) { 184 195 int nl = xs.Length; 185 196 int nRows = xs.First().Count; … … 218 229 alglib.ablas.rmatrixmv(nRows, nRows, K, 0, 0, 0, u, 0, ref y, 0); 219 230 231 // calculate variable relevance 232 // as per Rasmussen and Williams "Gaussian Processes for Machine Learning" page 106: 233 // ,,For the squared exponential covariance function [...] the l1, ..., lD hyperparameters 234 // play the role of characteristic length scales [...]. Such a covariance function implements 235 // automatic relevance determination (ARD) [Neal, 1996], since the inverse of the length-scale 236 // determines how relevant an input is: if the length-scale has a very large value, the covariance 237 // will become almost independent of that input, effectively removing it from inference.'' 238 relevance = l.Select(li => 1.0 / li).ToArray(); 239 220 240 return y; 221 241 } -
branches/symbreg-factors-2650/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/VariableNetworks/VariableNetworkInstanceProvider.cs
r14229 r14277 49 49 public override IEnumerable<IDataDescriptor> GetDataDescriptors() { 50 50 var numVariables = new int[] { 10, 20, 50, 100 }; 51 var noiseRatios = new double[] { 0 .01, 0.05, 0.1 };51 var noiseRatios = new double[] { 0, 0.01, 0.05, 0.1 }; 52 52 var rand = new MersenneTwister((uint)Seed); // use fixed seed for deterministic problem generation 53 53 return (from size in numVariables
Note: See TracChangeset
for help on using the changeset viewer.