Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
02/06/13 12:30:13 (12 years ago)
Author:
sforsten
Message:

#1941:

  • added wisconsin breast cancer problem instance
  • corrected iris dataset
  • changed classification data descriptors to be able to set training and test partition as well as input and target variables (in the same way as it is done in regression)
Location:
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification
Files:
3 added
1 deleted
10 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ResourceClassificationDataDescriptor.cs

    r7849 r9208  
    2020#endregion
    2121
     22using System.Collections.Generic;
     23using System.Linq;
    2224
    2325namespace HeuristicLab.Problems.Instances.DataAnalysis {
    24   internal class ResourceClassificationDataDescriptor : IDataDescriptor {
    25     public string Name { get; internal set; }
    26     public string Description { get; internal set; }
     26  public abstract class ResourceClassificationDataDescriptor : ClassificationDataDescriptor {
     27    internal string ResourceName { get; set; }
    2728
    28     internal string ResourceName { get; set; }
    29     internal ResourceClassificationDataDescriptor(string name, string description, string resourceName) {
    30       Name = name;
    31       Description = description;
    32       ResourceName = resourceName;
     29    public bool CheckVariableNames(IEnumerable<string> VariableNames) {
     30      return this.VariableNames.All(x => VariableNames.Contains(x));
    3331    }
    3432  }
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ResourceClassificationInstanceProvider.cs

    r7965 r9208  
    2121
    2222using System;
    23 using System.Collections.Generic;
    2423using System.Globalization;
    2524using System.IO;
     
    3433
    3534    protected abstract string FileName { get; }
    36 
    37     public override IEnumerable<IDataDescriptor> GetDataDescriptors() {
    38       var solutionsArchiveName = GetResourceName(FileName + @"\.zip");
    39       if (!String.IsNullOrEmpty(solutionsArchiveName)) {
    40         using (var solutionsZipFile = new ZipInputStream(GetType().Assembly.GetManifestResourceStream(solutionsArchiveName))) {
    41           IList<string> entries = new List<string>();
    42           ZipEntry curEntry;
    43           while ((curEntry = solutionsZipFile.GetNextEntry()) != null) {
    44             entries.Add(curEntry.Name);
    45           }
    46           foreach (var entry in entries.OrderBy(x => x)) {
    47             yield return new ResourceClassificationDataDescriptor(Path.GetFileNameWithoutExtension(entry), Description, entry);
    48           }
    49         }
    50       }
    51     }
    5235
    5336    public override IClassificationProblemData LoadData(IDataDescriptor id) {
     
    7053
    7154        Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
    72         string targetVar = csvFileParser.VariableNames.Where(x => dataset.DoubleVariables.Contains(x)).Last();
    73         IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar));
     55        if (!descriptor.CheckVariableNames(csvFileParser.VariableNames)) {
     56          throw new ArgumentException("Parsed file contains variables which are not in the descriptor.");
     57        }
    7458
    75         ClassificationProblemData claData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);
    76 
    77         int trainingPartEnd = csvFileParser.Rows * 2 / 3;
    78         claData.TrainingPartition.Start = 0;
    79         claData.TrainingPartition.End = trainingPartEnd;
    80         claData.TestPartition.Start = trainingPartEnd;
    81         claData.TestPartition.End = csvFileParser.Rows;
    82 
    83         claData.Name = descriptor.Name;
    84         claData.Description = descriptor.Description;
    85         return claData;
     59        return descriptor.GenerateClassificationData(dataset);
    8660      }
    8761    }
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/UCI/Iris.cs

    r8889 r9208  
    2323using System;
    2424namespace HeuristicLab.Problems.Instances.DataAnalysis {
    25   public class Iris : IUCIDataDescriptor {
    26     public string Name { get { return "Iris"; } }
    27     public string Description {
     25  public class Iris : UCIDataDescriptor {
     26    public override string Filename { get { return "Iris"; } }
     27    public override string Description {
    2828      get {
    2929        return "Data Set Information:" + Environment.NewLine
     
    3535        + "other 2; the latter are NOT linearly separable from each other." + Environment.NewLine
    3636        + "Predicted attribute: class of iris plant." + Environment.NewLine
    37         + "This is an exceedingly simple domain.";
     37        + "This is an exceedingly simple domain." + Environment.NewLine + Environment.NewLine
     38        + "The classes have been converted in the following way" + Environment.NewLine
     39        + "Iris-setosa     = 0" + Environment.NewLine
     40        + "Iris-versicolor = 1" + Environment.NewLine
     41        + "Iris-virginica  = 2";
    3842      }
    3943    }
    40     public string Donor { get { return "M. Marshall"; } }
    41     public int Year { get { return 1988; } }
     44    public override string Donor { get { return "M. Marshall"; } }
     45    public override int Year { get { return 1988; } }
     46
     47    protected override string TargetVariable { get { return "class"; } }
     48    protected override string[] VariableNames {
     49      get { return new string[] { "sepal_length", "sepal_width", "petal_length", "petal_width", "class" }; }
     50    }
     51    protected override string[] AllowedInputVariables {
     52      get { return new string[] { "sepal_length", "sepal_width", "petal_length", "petal_width" }; }
     53    }
     54    protected override int TrainingPartitionStart { get { return 0; } }
     55    protected override int TrainingPartitionEnd { get { return 100; } }
     56    protected override int TestPartitionStart { get { return 100; } }
     57    protected override int TestPartitionEnd { get { return 150; } }
    4258  }
    4359}
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/UCI/Mammography.cs

    r8889 r9208  
    2323using System;
    2424namespace HeuristicLab.Problems.Instances.DataAnalysis {
    25   public class Mammography : IUCIDataDescriptor {
    26     public string Name { get { return "Mammography"; } }
    27     public string Description {
     25  public class Mammography : UCIDataDescriptor {
     26    public override string Filename { get { return "Mammography"; } }
     27    public override string Description {
    2828      get {
    2929        return "Data Set Information: Features are computed from a digitized image of a fine needle aspirate (FNA) of a breast mass."
     
    4646      }
    4747    }
    48     public string Donor { get { return "M. Elter"; } }
    49     public int Year { get { return 2007; } }
     48    public override string Donor { get { return "M. Elter"; } }
     49    public override int Year { get { return 2007; } }
     50
     51    protected override string TargetVariable { get { return "Severity"; } }
     52    protected override string[] VariableNames {
     53      get { return new string[] { "BI-RADS", "Age", "Shape", "Margin", "Density", "Severity" }; }
     54    }
     55    protected override string[] AllowedInputVariables {
     56      get { return new string[] { "BI-RADS", "Age", "Shape", "Margin", "Density" }; }
     57    }
     58    protected override int TrainingPartitionStart { get { return 0; } }
     59    protected override int TrainingPartitionEnd { get { return 641; } }
     60    protected override int TestPartitionStart { get { return 641; } }
     61    protected override int TestPartitionEnd { get { return 961; } }
    5062  }
    5163}
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/UCI/Parkinson.cs

    r8908 r9208  
    2323
    2424namespace HeuristicLab.Problems.Instances.DataAnalysis {
    25   public class Parkinson : IUCIDataDescriptor {
    26     public string Name { get { return "Parkinson"; } }
    27     public string Description {
     25  public class Parkinson : UCIDataDescriptor {
     26    public override string Filename { get { return "Parkinson"; } }
     27    public override string Description {
    2828      get {
    2929        return "Data Set Information:" + Environment.NewLine
     
    4040      }
    4141    }
    42     public string Donor { get { return "M. Little"; } }
    43     public int Year { get { return 2008; } }
     42    public override string Donor { get { return "M. Little"; } }
     43    public override int Year { get { return 2008; } }
     44
     45    protected override string TargetVariable { get { return "status"; } }
     46    protected override string[] VariableNames {
     47      get { return new string[] { "MDVP:Fo(Hz)", "MDVP:Fhi(Hz)", "MDVP:Flo(Hz)", "MDVP:Jitter(%)", "MDVP:Jitter(Abs)", "MDVP:RAP", "MDVP:PPQ", "Jitter:DDP", "MDVP:Shimmer", "MDVP:Shimmer(dB)", "Shimmer:APQ3", "Shimmer:APQ5", "MDVP:APQ", "Shimmer:DDA", "NHR", "HNR", "RPDE", "DFA", "spread1", "spread2", "D2", "PPE", "status" }; }
     48    }
     49    protected override string[] AllowedInputVariables {
     50      get { return new string[] { "MDVP:Fo(Hz)", "MDVP:Fhi(Hz)", "MDVP:Flo(Hz)", "MDVP:Jitter(%)", "MDVP:Jitter(Abs)", "MDVP:RAP", "MDVP:PPQ", "Jitter:DDP", "MDVP:Shimmer", "MDVP:Shimmer(dB)", "Shimmer:APQ3", "Shimmer:APQ5", "MDVP:APQ", "Shimmer:DDA", "NHR", "HNR", "RPDE", "DFA", "spread1", "spread2", "D2", "PPE" }; }
     51    }
     52    protected override int TrainingPartitionStart { get { return 0; } }
     53    protected override int TrainingPartitionEnd { get { return 130; } }
     54    protected override int TestPartitionStart { get { return 130; } }
     55    protected override int TestPartitionEnd { get { return 195; } }
    4456  }
    4557}
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/UCI/Thyroid.cs

    r8889 r9208  
    2323using System;
    2424namespace HeuristicLab.Problems.Instances.DataAnalysis {
    25   public class Thyroid : IUCIDataDescriptor {
    26     public string Name { get { return "Thyroid"; } }
    27     public string Description {
     25  public class Thyroid : UCIDataDescriptor {
     26    public override string Filename { get { return "Thyroid"; } }
     27    public override string Description {
    2828      get {
    2929        return "Thyroid gland data. ('normal', hypo and hyper functioning)" + Environment.NewLine + Environment.NewLine +
     
    3939      }
    4040    }
    41     public string Donor { get { return "S. Aeberhard"; } }
    42     public int Year { get { return 1992; } }
     41    public override string Donor { get { return "S. Aeberhard"; } }
     42    public override int Year { get { return 1992; } }
     43
     44    protected override string TargetVariable { get { return "X000"; } }
     45    protected override string[] VariableNames {
     46      get { return new string[] { "X001", "X002", "X003", "X004", "X005", "X000" }; }
     47    }
     48    protected override string[] AllowedInputVariables {
     49      get { return new string[] { "X001", "X002", "X003", "X004", "X005" }; }
     50    }
     51    protected override int TrainingPartitionStart { get { return 0; } }
     52    protected override int TrainingPartitionEnd { get { return 143; } }
     53    protected override int TestPartitionStart { get { return 143; } }
     54    protected override int TestPartitionEnd { get { return 215; } }
    4355  }
    4456}
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/UCI/UCIInstanceProvider.cs

    r8903 r9208  
    4646
    4747    public override IEnumerable<IDataDescriptor> GetDataDescriptors() {
    48       List<IUCIDataDescriptor> descriptorList = new List<IUCIDataDescriptor>();
     48      List<UCIDataDescriptor> descriptorList = new List<UCIDataDescriptor>();
    4949      descriptorList.Add(new Iris());
    5050      descriptorList.Add(new Mammography());
     
    5353      descriptorList.Add(new Vertebral_3C());
    5454      descriptorList.Add(new Wine());
     55      descriptorList.Add(new WisconsinDiagnosticBreastCancer());
    5556      var solutionsArchiveName = GetResourceName(FileName + @"\.zip");
    5657      if (!String.IsNullOrEmpty(solutionsArchiveName)) {
     
    6263          }
    6364          foreach (var entry in entries.OrderBy(x => x)) {
    64             string prettyName = Path.GetFileNameWithoutExtension(entry);
    65             IUCIDataDescriptor desc = descriptorList.Where(x => x.Name.Equals(prettyName)).FirstOrDefault();
     65            string filename = Path.GetFileNameWithoutExtension(entry);
     66            UCIDataDescriptor desc = descriptorList.Where(x => x.Filename.Equals(filename)).FirstOrDefault();
    6667            if (desc != null) {
    67               prettyName = String.Format("{0}, {1}, {2}", prettyName, desc.Donor, desc.Year);
    68               yield return new ResourceClassificationDataDescriptor(prettyName, desc.Description, entry);
     68              desc.ResourceName = entry;
     69              yield return desc;
    6970            } else
    70               yield return new ResourceClassificationDataDescriptor(prettyName, Description, entry);
     71              throw new ArgumentNullException("No Descriptor could be found for this entry.");
    7172          }
    7273        }
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/UCI/Vertebral_3C.cs

    r8908 r9208  
    2323
    2424namespace HeuristicLab.Problems.Instances.DataAnalysis {
    25   public class Vertebral_3C : IUCIDataDescriptor {
    26     public string Name { get { return "Vertebral_3C"; } }
    27     public string Description {
     25  public class Vertebral_3C : UCIDataDescriptor {
     26    public override string Filename { get { return "Vertebral_3C"; } }
     27    public override string Description {
    2828      get {
    2929        return "Data Set Information:" + Environment.NewLine
     
    4040      }
    4141    }
    42     public string Donor { get { return "H. da Mota"; } }
    43     public int Year { get { return 2011; } }
     42    public override string Donor { get { return "H. da Mota"; } }
     43    public override int Year { get { return 2011; } }
     44
     45    protected override string TargetVariable { get { return "class"; } }
     46    protected override string[] VariableNames {
     47      get { return new string[] { "pelvic_incidence", "pelvic_tilt", "lumbar_lordosis_angle", "sacral_slope", "pelvic_radius", "degree_1", "class" }; }
     48    }
     49    protected override string[] AllowedInputVariables {
     50      get { return new string[] { "pelvic_incidence", "pelvic_tilt", "lumbar_lordosis_angle", "sacral_slope", "pelvic_radius", "degree_1" }; }
     51    }
     52    protected override int TrainingPartitionStart { get { return 0; } }
     53    protected override int TrainingPartitionEnd { get { return 207; } }
     54    protected override int TestPartitionStart { get { return 207; } }
     55    protected override int TestPartitionEnd { get { return 310; } }
    4456  }
    4557}
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/UCI/Wine.cs

    r8889 r9208  
    2323using System;
    2424namespace HeuristicLab.Problems.Instances.DataAnalysis {
    25   public class Wine : IUCIDataDescriptor {
    26     public string Name { get { return "Wine"; } }
    27     public string Description {
     25  public class Wine : UCIDataDescriptor {
     26    public override string Filename { get { return "Wine"; } }
     27    public override string Description {
    2828      get {
    2929        return "These data are the results of a chemical analysis of wines grown in the same region " +
     
    5151      }
    5252    }
    53     public string Donor { get { return "S. Aeberhard"; } }
    54     public int Year { get { return 1991; } }
     53    public override string Donor { get { return "S. Aeberhard"; } }
     54    public override int Year { get { return 1991; } }
     55
     56    protected override string TargetVariable { get { return "Class"; } }
     57    protected override string[] VariableNames {
     58      get { return new string[] { "Alcohol", "Malic acid", "Ash", "Alcalinity of ash", "Magnesium", "Total phenols", "Flavanoids", "Nonflavanoid phenols", "Proanthocyanins", "Color intensity", "Hue", "OD280/OD315 of diluted wines", "Proline", "Class" }; }
     59    }
     60    protected override string[] AllowedInputVariables {
     61      get { return new string[] { "Alcohol", "Malic acid", "Ash", "Alcalinity of ash", "Magnesium", "Total phenols", "Flavanoids", "Nonflavanoid phenols", "Proanthocyanins", "Color intensity", "Hue", "OD280/OD315 of diluted wines", "Proline" }; }
     62    }
     63    protected override int TrainingPartitionStart { get { return 0; } }
     64    protected override int TrainingPartitionEnd { get { return 119; } }
     65    protected override int TestPartitionStart { get { return 119; } }
     66    protected override int TestPartitionEnd { get { return 178; } }
    5567  }
    5668}
Note: See TracChangeset for help on using the changeset viewer.