- Timestamp:
- 02/17/13 17:18:01 (12 years ago)
- Location:
- trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3
- Files:
-
- 1 added
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/DataAnalysisInstanceProvider.cs
r8878 r9217 49 49 int count = values.First().Count; 50 50 int[] indices = Enumerable.Range(0, count).Shuffle(new FastRandom()).ToArray(); 51 List<IList> shuff eledValues= new List<IList>(values.Count);51 List<IList> shuffled = new List<IList>(values.Count); 52 52 for (int col = 0; col < values.Count; col++) { 53 53 54 54 if (values[col] is List<double>) 55 shuff eledValues.Add(new List<double>());55 shuffled.Add(new List<double>()); 56 56 else if (values[col] is List<DateTime>) 57 shuff eledValues.Add(new List<DateTime>());57 shuffled.Add(new List<DateTime>()); 58 58 else if (values[col] is List<string>) 59 shuff eledValues.Add(new List<string>());59 shuffled.Add(new List<string>()); 60 60 else 61 61 throw new InvalidOperationException(); 62 62 63 63 for (int i = 0; i < count; i++) { 64 shuff eledValues[col].Add(values[col][indices[i]]);64 shuffled[col].Add(values[col][indices[i]]); 65 65 } 66 66 } 67 return shuff eledValues;67 return shuffled; 68 68 } 69 69 -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/HeuristicLab.Problems.Instances.DataAnalysis-3.3.csproj
r9208 r9217 140 140 <Compile Include="Regression\ArtificialRegressionInstanceProvider.cs" /> 141 141 <Compile Include="Regression\CSV\RegressionCSVInstanceProvider.cs" /> 142 <Compile Include="Regression\FeatureSelection\FeatureSelectionRegressionProblemData.cs" /> 142 143 <Compile Include="Regression\FeatureSelection\FeatureSelection.cs" /> 143 144 <Compile Include="Regression\FeatureSelection\FeatureSelectionInstanceProvider.cs" /> … … 245 246 <Private>False</Private> 246 247 </ProjectReference> 248 <ProjectReference Include="..\..\HeuristicLab.Parameters\3.3\HeuristicLab.Parameters-3.3.csproj"> 249 <Project>{56F9106A-079F-4C61-92F6-86A84C2D84B7}</Project> 250 <Name>HeuristicLab.Parameters-3.3</Name> 251 </ProjectReference> 252 <ProjectReference Include="..\..\HeuristicLab.Persistence\3.3\HeuristicLab.Persistence-3.3.csproj"> 253 <Project>{102BC7D3-0EF9-439C-8F6D-96FF0FDB8E1B}</Project> 254 <Name>HeuristicLab.Persistence-3.3</Name> 255 </ProjectReference> 247 256 <ProjectReference Include="..\..\HeuristicLab.PluginInfrastructure\3.3\HeuristicLab.PluginInfrastructure-3.3.csproj"> 248 257 <Project>{94186A6A-5176-4402-AE83-886557B53CCA}</Project> -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Plugin.cs.frame
r8568 r9217 28 28 [PluginDependency("HeuristicLab.Core", "3.3")] 29 29 [PluginDependency("HeuristicLab.Data", "3.3")] 30 [PluginDependency("HeuristicLab.Parameters", "3.3")] 31 [PluginDependency("HeuristicLab.Persistence", "3.3")] 30 32 [PluginDependency("HeuristicLab.Problems.DataAnalysis", "3.4")] 31 33 [PluginDependency("HeuristicLab.Problems.Instances", "3.3")] -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/FeatureSelection/FeatureSelection.cs
r9094 r9217 24 24 using System.Linq; 25 25 using HeuristicLab.Common; 26 using HeuristicLab.Core; 26 27 using HeuristicLab.Random; 27 28 28 29 namespace HeuristicLab.Problems.Instances.DataAnalysis { 29 30 public class FeatureSelection : ArtificialRegressionDataDescriptor { 30 private int trainingSamples;31 private const int TestSamples = 5000;31 private int nTrainingSamples; 32 private int nTestSamples; 32 33 33 34 private int numberOfFeatures; 34 35 private double selectionProbability; 35 36 private double noiseRatio; 37 private IRandom xRandom; 38 private IRandom weightRandom; 36 39 37 40 public override string Name { get { return string.Format("FeatSel-{0}-{1:0%}-{2:0%}", numberOfFeatures, selectionProbability, noiseRatio); } } … … 39 42 get { 40 43 return "This problem is specifically designed to test feature selection." + Environment.NewLine 41 + "In this instance the number of rows for training (" + trainingSamples +44 + "In this instance the number of rows for training (" + nTrainingSamples + 42 45 ") is only slightly larger than the number of columns (" + numberOfFeatures + 43 46 ") and only a subset of the columns must be selected for the predictive model." + Environment.NewLine … … 47 50 + "X(i,j) ~ N(0, 1) iid, w(i) ~ U(0, 10) iid, n ~ N(0, sigma(w*S) * SQRT(" + noiseRatio + "))" + Environment.NewLine 48 51 + "The noise level is " + noiseRatio + " * sigma, thus an optimal model has R² = " 49 + Math.Round( 1 - noiseRatio, 2) + " (or equivalently: NMSE = " + noiseRatio + ")" + Environment.NewLine50 + "N = " + ( trainingSamples + TestSamples) + " (" + trainingSamples + " training, " +TestSamples + " test)" + Environment.NewLine52 + Math.Round(optimalRSquared) + " (or equivalently: NMSE = " + noiseRatio + ")" + Environment.NewLine 53 + "N = " + (nTrainingSamples + nTestSamples) + " (" + nTrainingSamples + " training, " + nTestSamples + " test)" + Environment.NewLine 51 54 + "k = " + numberOfFeatures; 52 55 ; … … 54 57 } 55 58 56 public FeatureSelection(int numberOfFeatures, double selectionProbability, double noiseRatio) { 59 private double[] w; 60 public double[] Weights { 61 get { return w; } 62 } 63 64 private string[] selectedFeatures; 65 public string[] SelectedFeatures { 66 get { return selectedFeatures; } 67 } 68 69 private double optimalRSquared; 70 public double OptimalRSquared { 71 get { return optimalRSquared; } 72 } 73 74 75 public FeatureSelection(int numberOfFeatures, double selectionProbability, double noiseRatio, IRandom xGenerator, IRandom weightGenerator) 76 : this((int)Math.Round(numberOfFeatures * 1.2), 5000, numberOfFeatures, 77 selectionProbability, noiseRatio, xGenerator, weightGenerator) { } 78 79 public FeatureSelection(int nTrainingSamples, int nTestSamples, int numberOfFeatures, 80 double selectionProbability, double noiseRatio, IRandom xGenerator, IRandom weightGenerator) { 57 81 this.numberOfFeatures = numberOfFeatures; 58 this.trainingSamples = (int)Math.Round(numberOfFeatures * 1.2); // 20% more rows than columns 82 this.nTrainingSamples = nTrainingSamples; 83 this.nTestSamples = nTestSamples; 59 84 this.selectionProbability = selectionProbability; 60 85 this.noiseRatio = noiseRatio; 86 this.xRandom = xGenerator; 87 this.weightRandom = weightGenerator; 61 88 } 62 89 … … 74 101 } 75 102 } 103 76 104 protected override int TrainingPartitionStart { get { return 0; } } 77 protected override int TrainingPartitionEnd { get { return trainingSamples; } } 78 protected override int TestPartitionStart { get { return trainingSamples; } } 79 protected override int TestPartitionEnd { get { return trainingSamples + TestSamples; } } 105 protected override int TrainingPartitionEnd { get { return nTrainingSamples; } } 106 protected override int TestPartitionStart { get { return nTrainingSamples; } } 107 protected override int TestPartitionEnd { get { return nTrainingSamples + nTestSamples; } } 108 80 109 81 110 protected override List<List<double>> GenerateValues() { 82 111 List<List<double>> data = new List<List<double>>(); 83 112 for (int i = 0; i < AllowedInputVariables.Count(); i++) { 84 data.Add(ValueGenerator.GenerateNormalDistributedValues(TestPartitionEnd, 0, 1).ToList()); 113 data.Add(Enumerable.Range(0, TestPartitionEnd) 114 .Select(_ => xRandom.NextDouble()) 115 .ToList()); 85 116 } 86 117 … … 90 121 .Where(_ => random.NextDouble() < selectionProbability) 91 122 .ToArray(); 92 var w = ValueGenerator.GenerateUniformDistributedValues(selectedFeatures.Length, 0, 10) 93 123 124 w = selectedFeatures.Select(_ => weightRandom.NextDouble()).ToArray(); 94 125 var target = new List<double>(); 95 126 for (int i = 0; i < data[0].Count; i++) { … … 104 135 data.Add(target.Select(t => t + noisePrng.NextDouble()).ToList()); 105 136 137 // set property listing the selected features as string[] 138 this.selectedFeatures = selectedFeatures.Select(i => AllowedInputVariables[i]).ToArray(); 139 optimalRSquared = 1 - noiseRatio; 106 140 return data; 107 141 } -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/FeatureSelection/FeatureSelectionInstanceProvider.cs
r9093 r9217 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Linq; 25 using HeuristicLab.Data; 26 using HeuristicLab.Problems.DataAnalysis; 27 using HeuristicLab.Random; 24 28 25 29 namespace HeuristicLab.Problems.Instances.DataAnalysis { … … 39 43 40 44 public override IEnumerable<IDataDescriptor> GetDataDescriptors() { 41 List<IDataDescriptor> descriptorList = new List<IDataDescriptor>();42 45 var sizes = new int[] { 50, 100, 200 }; 43 46 var pp = new double[] { 0.1, 0.25, 0.5 }; 44 47 var noiseRatios = new double[] { 0.01, 0.05, 0.1, 0.2 }; 45 foreach (var size in sizes) { 46 foreach (var p in pp) { 47 foreach (var noiseRatio in noiseRatios) { 48 descriptorList.Add(new FeatureSelection(size, p, noiseRatio)); 49 } 50 } 51 } 52 return descriptorList; 48 var mt = new MersenneTwister(); 49 var xGenerator = new NormalDistributedRandom(mt, 0, 1); 50 var weightGenerator = new UniformDistributedRandom(mt, 0, 10); 51 return (from size in sizes 52 from p in pp 53 from noiseRatio in noiseRatios 54 select new FeatureSelection(size, p, noiseRatio, xGenerator, weightGenerator)) 55 .Cast<IDataDescriptor>() 56 .ToList(); 57 } 58 59 public override IRegressionProblemData LoadData(IDataDescriptor descriptor) { 60 var featureSelectionDescriptor = descriptor as FeatureSelection; 61 if (featureSelectionDescriptor == null) throw new ArgumentException("FeatureSelectionInstanceProvider expects an FeatureSelection data descriptor."); 62 // base call generates a regression problem data 63 var regProblemData = base.LoadData(featureSelectionDescriptor); 64 var problemData = 65 new FeatureSelectionRegressionProblemData( 66 regProblemData.Dataset, regProblemData.AllowedInputVariables, regProblemData.TargetVariable, 67 featureSelectionDescriptor.SelectedFeatures, featureSelectionDescriptor.Weights, 68 featureSelectionDescriptor.OptimalRSquared); 69 70 // copy values from regProblemData to feature selection problem data 71 problemData.Name = regProblemData.Name; 72 problemData.Description = regProblemData.Description; 73 problemData.TrainingPartition.Start = regProblemData.TrainingPartition.Start; 74 problemData.TrainingPartition.End = regProblemData.TrainingPartition.End; 75 problemData.TestPartition.Start = regProblemData.TestPartition.Start; 76 problemData.TestPartition.End = regProblemData.TestPartition.End; 77 78 return problemData; 53 79 } 54 80 }
Note: See TracChangeset
for help on using the changeset viewer.