Changeset 9208
- Timestamp:
- 02/06/13 12:30:13 (12 years ago)
- Location:
- trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3
- Files:
-
- 3 added
- 1 deleted
- 11 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ResourceClassificationDataDescriptor.cs
r7849 r9208 20 20 #endregion 21 21 22 using System.Collections.Generic; 23 using System.Linq; 22 24 23 25 namespace HeuristicLab.Problems.Instances.DataAnalysis { 24 internal class ResourceClassificationDataDescriptor : IDataDescriptor { 25 public string Name { get; internal set; } 26 public string Description { get; internal set; } 26 public abstract class ResourceClassificationDataDescriptor : ClassificationDataDescriptor { 27 internal string ResourceName { get; set; } 27 28 28 internal string ResourceName { get; set; } 29 internal ResourceClassificationDataDescriptor(string name, string description, string resourceName) { 30 Name = name; 31 Description = description; 32 ResourceName = resourceName; 29 public bool CheckVariableNames(IEnumerable<string> VariableNames) { 30 return this.VariableNames.All(x => VariableNames.Contains(x)); 33 31 } 34 32 } -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ResourceClassificationInstanceProvider.cs
r7965 r9208 21 21 22 22 using System; 23 using System.Collections.Generic;24 23 using System.Globalization; 25 24 using System.IO; … … 34 33 35 34 protected abstract string FileName { get; } 36 37 public override IEnumerable<IDataDescriptor> GetDataDescriptors() {38 var solutionsArchiveName = GetResourceName(FileName + @"\.zip");39 if (!String.IsNullOrEmpty(solutionsArchiveName)) {40 using (var solutionsZipFile = new ZipInputStream(GetType().Assembly.GetManifestResourceStream(solutionsArchiveName))) {41 IList<string> entries = new List<string>();42 ZipEntry curEntry;43 while ((curEntry = solutionsZipFile.GetNextEntry()) != null) {44 entries.Add(curEntry.Name);45 }46 foreach (var entry in entries.OrderBy(x => x)) {47 yield return new ResourceClassificationDataDescriptor(Path.GetFileNameWithoutExtension(entry), Description, entry);48 }49 }50 }51 }52 35 53 36 public override IClassificationProblemData LoadData(IDataDescriptor id) { … … 70 53 71 54 Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values); 72 string targetVar = csvFileParser.VariableNames.Where(x => dataset.DoubleVariables.Contains(x)).Last(); 73 IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar)); 55 if (!descriptor.CheckVariableNames(csvFileParser.VariableNames)) { 56 throw new ArgumentException("Parsed file contains variables which are not in the descriptor."); 57 } 74 58 75 ClassificationProblemData claData = new ClassificationProblemData(dataset, allowedInputVars, targetVar); 76 77 int trainingPartEnd = csvFileParser.Rows * 2 / 3; 78 claData.TrainingPartition.Start = 0; 79 claData.TrainingPartition.End = trainingPartEnd; 80 claData.TestPartition.Start = trainingPartEnd; 81 claData.TestPartition.End = csvFileParser.Rows; 82 83 claData.Name = descriptor.Name; 84 claData.Description = descriptor.Description; 85 return claData; 59 return descriptor.GenerateClassificationData(dataset); 86 60 } 87 61 } -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/UCI/Iris.cs
r8889 r9208 23 23 using System; 24 24 namespace HeuristicLab.Problems.Instances.DataAnalysis { 25 public class Iris : IUCIDataDescriptor {26 public string Name { get { return "Iris"; } }27 public string Description {25 public class Iris : UCIDataDescriptor { 26 public override string Filename { get { return "Iris"; } } 27 public override string Description { 28 28 get { 29 29 return "Data Set Information:" + Environment.NewLine … … 35 35 + "other 2; the latter are NOT linearly separable from each other." + Environment.NewLine 36 36 + "Predicted attribute: class of iris plant." + Environment.NewLine 37 + "This is an exceedingly simple domain."; 37 + "This is an exceedingly simple domain." + Environment.NewLine + Environment.NewLine 38 + "The classes have been converted in the following way" + Environment.NewLine 39 + "Iris-setosa = 0" + Environment.NewLine 40 + "Iris-versicolor = 1" + Environment.NewLine 41 + "Iris-virginica = 2"; 38 42 } 39 43 } 40 public string Donor { get { return "M. Marshall"; } } 41 public int Year { get { return 1988; } } 44 public override string Donor { get { return "M. Marshall"; } } 45 public override int Year { get { return 1988; } } 46 47 protected override string TargetVariable { get { return "class"; } } 48 protected override string[] VariableNames { 49 get { return new string[] { "sepal_length", "sepal_width", "petal_length", "petal_width", "class" }; } 50 } 51 protected override string[] AllowedInputVariables { 52 get { return new string[] { "sepal_length", "sepal_width", "petal_length", "petal_width" }; } 53 } 54 protected override int TrainingPartitionStart { get { return 0; } } 55 protected override int TrainingPartitionEnd { get { return 100; } } 56 protected override int TestPartitionStart { get { return 100; } } 57 protected override int TestPartitionEnd { get { return 150; } } 42 58 } 43 59 } -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/UCI/Mammography.cs
r8889 r9208 23 23 using System; 24 24 namespace HeuristicLab.Problems.Instances.DataAnalysis { 25 public class Mammography : IUCIDataDescriptor {26 public string Name { get { return "Mammography"; } }27 public string Description {25 public class Mammography : UCIDataDescriptor { 26 public override string Filename { get { return "Mammography"; } } 27 public override string Description { 28 28 get { 29 29 return "Data Set Information: Features are computed from a digitized image of a fine needle aspirate (FNA) of a breast mass." … … 46 46 } 47 47 } 48 public string Donor { get { return "M. Elter"; } } 49 public int Year { get { return 2007; } } 48 public override string Donor { get { return "M. Elter"; } } 49 public override int Year { get { return 2007; } } 50 51 protected override string TargetVariable { get { return "Severity"; } } 52 protected override string[] VariableNames { 53 get { return new string[] { "BI-RADS", "Age", "Shape", "Margin", "Density", "Severity" }; } 54 } 55 protected override string[] AllowedInputVariables { 56 get { return new string[] { "BI-RADS", "Age", "Shape", "Margin", "Density" }; } 57 } 58 protected override int TrainingPartitionStart { get { return 0; } } 59 protected override int TrainingPartitionEnd { get { return 641; } } 60 protected override int TestPartitionStart { get { return 641; } } 61 protected override int TestPartitionEnd { get { return 961; } } 50 62 } 51 63 } -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/UCI/Parkinson.cs
r8908 r9208 23 23 24 24 namespace HeuristicLab.Problems.Instances.DataAnalysis { 25 public class Parkinson : IUCIDataDescriptor {26 public string Name { get { return "Parkinson"; } }27 public string Description {25 public class Parkinson : UCIDataDescriptor { 26 public override string Filename { get { return "Parkinson"; } } 27 public override string Description { 28 28 get { 29 29 return "Data Set Information:" + Environment.NewLine … … 40 40 } 41 41 } 42 public string Donor { get { return "M. Little"; } } 43 public int Year { get { return 2008; } } 42 public override string Donor { get { return "M. Little"; } } 43 public override int Year { get { return 2008; } } 44 45 protected override string TargetVariable { get { return "status"; } } 46 protected override string[] VariableNames { 47 get { return new string[] { "MDVP:Fo(Hz)", "MDVP:Fhi(Hz)", "MDVP:Flo(Hz)", "MDVP:Jitter(%)", "MDVP:Jitter(Abs)", "MDVP:RAP", "MDVP:PPQ", "Jitter:DDP", "MDVP:Shimmer", "MDVP:Shimmer(dB)", "Shimmer:APQ3", "Shimmer:APQ5", "MDVP:APQ", "Shimmer:DDA", "NHR", "HNR", "RPDE", "DFA", "spread1", "spread2", "D2", "PPE", "status" }; } 48 } 49 protected override string[] AllowedInputVariables { 50 get { return new string[] { "MDVP:Fo(Hz)", "MDVP:Fhi(Hz)", "MDVP:Flo(Hz)", "MDVP:Jitter(%)", "MDVP:Jitter(Abs)", "MDVP:RAP", "MDVP:PPQ", "Jitter:DDP", "MDVP:Shimmer", "MDVP:Shimmer(dB)", "Shimmer:APQ3", "Shimmer:APQ5", "MDVP:APQ", "Shimmer:DDA", "NHR", "HNR", "RPDE", "DFA", "spread1", "spread2", "D2", "PPE" }; } 51 } 52 protected override int TrainingPartitionStart { get { return 0; } } 53 protected override int TrainingPartitionEnd { get { return 130; } } 54 protected override int TestPartitionStart { get { return 130; } } 55 protected override int TestPartitionEnd { get { return 195; } } 44 56 } 45 57 } -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/UCI/Thyroid.cs
r8889 r9208 23 23 using System; 24 24 namespace HeuristicLab.Problems.Instances.DataAnalysis { 25 public class Thyroid : IUCIDataDescriptor {26 public string Name { get { return "Thyroid"; } }27 public string Description {25 public class Thyroid : UCIDataDescriptor { 26 public override string Filename { get { return "Thyroid"; } } 27 public override string Description { 28 28 get { 29 29 return "Thyroid gland data. ('normal', hypo and hyper functioning)" + Environment.NewLine + Environment.NewLine + … … 39 39 } 40 40 } 41 public string Donor { get { return "S. Aeberhard"; } } 42 public int Year { get { return 1992; } } 41 public override string Donor { get { return "S. Aeberhard"; } } 42 public override int Year { get { return 1992; } } 43 44 protected override string TargetVariable { get { return "X000"; } } 45 protected override string[] VariableNames { 46 get { return new string[] { "X001", "X002", "X003", "X004", "X005", "X000" }; } 47 } 48 protected override string[] AllowedInputVariables { 49 get { return new string[] { "X001", "X002", "X003", "X004", "X005" }; } 50 } 51 protected override int TrainingPartitionStart { get { return 0; } } 52 protected override int TrainingPartitionEnd { get { return 143; } } 53 protected override int TestPartitionStart { get { return 143; } } 54 protected override int TestPartitionEnd { get { return 215; } } 43 55 } 44 56 } -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/UCI/UCIInstanceProvider.cs
r8903 r9208 46 46 47 47 public override IEnumerable<IDataDescriptor> GetDataDescriptors() { 48 List< IUCIDataDescriptor> descriptorList = new List<IUCIDataDescriptor>();48 List<UCIDataDescriptor> descriptorList = new List<UCIDataDescriptor>(); 49 49 descriptorList.Add(new Iris()); 50 50 descriptorList.Add(new Mammography()); … … 53 53 descriptorList.Add(new Vertebral_3C()); 54 54 descriptorList.Add(new Wine()); 55 descriptorList.Add(new WisconsinDiagnosticBreastCancer()); 55 56 var solutionsArchiveName = GetResourceName(FileName + @"\.zip"); 56 57 if (!String.IsNullOrEmpty(solutionsArchiveName)) { … … 62 63 } 63 64 foreach (var entry in entries.OrderBy(x => x)) { 64 string prettyName = Path.GetFileNameWithoutExtension(entry);65 IUCIDataDescriptor desc = descriptorList.Where(x => x.Name.Equals(prettyName)).FirstOrDefault();65 string filename = Path.GetFileNameWithoutExtension(entry); 66 UCIDataDescriptor desc = descriptorList.Where(x => x.Filename.Equals(filename)).FirstOrDefault(); 66 67 if (desc != null) { 67 prettyName = String.Format("{0}, {1}, {2}", prettyName, desc.Donor, desc.Year);68 yield return new ResourceClassificationDataDescriptor(prettyName, desc.Description, entry);68 desc.ResourceName = entry; 69 yield return desc; 69 70 } else 70 yield return new ResourceClassificationDataDescriptor(prettyName, Description, entry);71 throw new ArgumentNullException("No Descriptor could be found for this entry."); 71 72 } 72 73 } -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/UCI/Vertebral_3C.cs
r8908 r9208 23 23 24 24 namespace HeuristicLab.Problems.Instances.DataAnalysis { 25 public class Vertebral_3C : IUCIDataDescriptor {26 public string Name { get { return "Vertebral_3C"; } }27 public string Description {25 public class Vertebral_3C : UCIDataDescriptor { 26 public override string Filename { get { return "Vertebral_3C"; } } 27 public override string Description { 28 28 get { 29 29 return "Data Set Information:" + Environment.NewLine … … 40 40 } 41 41 } 42 public string Donor { get { return "H. da Mota"; } } 43 public int Year { get { return 2011; } } 42 public override string Donor { get { return "H. da Mota"; } } 43 public override int Year { get { return 2011; } } 44 45 protected override string TargetVariable { get { return "class"; } } 46 protected override string[] VariableNames { 47 get { return new string[] { "pelvic_incidence", "pelvic_tilt", "lumbar_lordosis_angle", "sacral_slope", "pelvic_radius", "degree_1", "class" }; } 48 } 49 protected override string[] AllowedInputVariables { 50 get { return new string[] { "pelvic_incidence", "pelvic_tilt", "lumbar_lordosis_angle", "sacral_slope", "pelvic_radius", "degree_1" }; } 51 } 52 protected override int TrainingPartitionStart { get { return 0; } } 53 protected override int TrainingPartitionEnd { get { return 207; } } 54 protected override int TestPartitionStart { get { return 207; } } 55 protected override int TestPartitionEnd { get { return 310; } } 44 56 } 45 57 } -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/UCI/Wine.cs
r8889 r9208 23 23 using System; 24 24 namespace HeuristicLab.Problems.Instances.DataAnalysis { 25 public class Wine : IUCIDataDescriptor {26 public string Name { get { return "Wine"; } }27 public string Description {25 public class Wine : UCIDataDescriptor { 26 public override string Filename { get { return "Wine"; } } 27 public override string Description { 28 28 get { 29 29 return "These data are the results of a chemical analysis of wines grown in the same region " + … … 51 51 } 52 52 } 53 public string Donor { get { return "S. Aeberhard"; } } 54 public int Year { get { return 1991; } } 53 public override string Donor { get { return "S. Aeberhard"; } } 54 public override int Year { get { return 1991; } } 55 56 protected override string TargetVariable { get { return "Class"; } } 57 protected override string[] VariableNames { 58 get { return new string[] { "Alcohol", "Malic acid", "Ash", "Alcalinity of ash", "Magnesium", "Total phenols", "Flavanoids", "Nonflavanoid phenols", "Proanthocyanins", "Color intensity", "Hue", "OD280/OD315 of diluted wines", "Proline", "Class" }; } 59 } 60 protected override string[] AllowedInputVariables { 61 get { return new string[] { "Alcohol", "Malic acid", "Ash", "Alcalinity of ash", "Magnesium", "Total phenols", "Flavanoids", "Nonflavanoid phenols", "Proanthocyanins", "Color intensity", "Hue", "OD280/OD315 of diluted wines", "Proline" }; } 62 } 63 protected override int TrainingPartitionStart { get { return 0; } } 64 protected override int TrainingPartitionEnd { get { return 119; } } 65 protected override int TestPartitionStart { get { return 119; } } 66 protected override int TestPartitionEnd { get { return 178; } } 55 67 } 56 68 } -
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/HeuristicLab.Problems.Instances.DataAnalysis-3.3.csproj
r9133 r9208 120 120 <Compile Include="Classification\ResourceClassificationDataDescriptor.cs" /> 121 121 <Compile Include="Classification\ResourceClassificationInstanceProvider.cs" /> 122 <Compile Include="Classification\UCI\WisconsinDiagnosticBreastCancer.cs" /> 122 123 <Compile Include="Classification\UCI\Vertebral_3C.cs" /> 123 124 <Compile Include="Classification\UCI\Parkinson.cs" /> 124 125 <Compile Include="Classification\UCI\Iris.cs" /> 125 <Compile Include="Classification\UCI\ IUCIDataDescriptor.cs" />126 <Compile Include="Classification\UCI\UCIDataDescriptor.cs" /> 126 127 <Compile Include="Classification\UCI\Mammography.cs" /> 127 128 <Compile Include="Classification\UCI\Thyroid.cs" /> … … 135 136 <Compile Include="Plugin.cs" /> 136 137 <Compile Include="Properties\AssemblyInfo.cs" /> 138 <Compile Include="Classification\ClassificationDataDescriptor.cs" /> 137 139 <Compile Include="Regression\ArtificialRegressionDataDescriptor.cs" /> 138 140 <Compile Include="Regression\ArtificialRegressionInstanceProvider.cs" />
Note: See TracChangeset
for help on using the changeset viewer.