Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HivePerformance/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ResourceClassificationInstanceProvider.cs @ 9368

Last change on this file since 9368 was 9208, checked in by sforsten, 12 years ago

#1941:

  • added wisconsin breast cancer problem instance
  • corrected iris dataset
  • changed classification data descriptors to be able to set training and test partition as well as input and target variables (in the same way as it is done in regression)
File size: 2.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Globalization;
24using System.IO;
25using System.Linq;
26using System.Reflection;
27using System.Text.RegularExpressions;
28using HeuristicLab.Problems.DataAnalysis;
29using ICSharpCode.SharpZipLib.Zip;
30
31namespace HeuristicLab.Problems.Instances.DataAnalysis {
32  public abstract class ResourceClassificationInstanceProvider : ClassificationInstanceProvider {
33
34    protected abstract string FileName { get; }
35
36    public override IClassificationProblemData LoadData(IDataDescriptor id) {
37      var descriptor = (ResourceClassificationDataDescriptor)id;
38
39      var instanceArchiveName = GetResourceName(FileName + @"\.zip");
40      using (var instancesZipFile = new ZipFile(GetType().Assembly.GetManifestResourceStream(instanceArchiveName))) {
41        var entry = instancesZipFile.GetEntry(descriptor.ResourceName);
42        NumberFormatInfo numberFormat;
43        DateTimeFormatInfo dateFormat;
44        char separator;
45        using (Stream stream = instancesZipFile.GetInputStream(entry)) {
46          TableFileParser.DetermineFileFormat(stream, out numberFormat, out dateFormat, out separator);
47        }
48
49        TableFileParser csvFileParser = new TableFileParser();
50        using (Stream stream = instancesZipFile.GetInputStream(entry)) {
51          csvFileParser.Parse(stream, numberFormat, dateFormat, separator);
52        }
53
54        Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
55        if (!descriptor.CheckVariableNames(csvFileParser.VariableNames)) {
56          throw new ArgumentException("Parsed file contains variables which are not in the descriptor.");
57        }
58
59        return descriptor.GenerateClassificationData(dataset);
60      }
61    }
62
63    protected virtual string GetResourceName(string fileName) {
64      return Assembly.GetExecutingAssembly().GetManifestResourceNames()
65              .Where(x => Regex.Match(x, @".*\.Data\." + fileName).Success).SingleOrDefault();
66    }
67  }
68}
Note: See TracBrowser for help on using the repository browser.