Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
05/21/12 11:13:32 (13 years ago)
Author:
sforsten
Message:

#1784: changed the TableFileParser, so that you don't have to determine the file format by yourself. Comments have been added for the different Parse methods.

Location:
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3

    • Property svn:ignore set to
      Plugin.cs
      obj
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ClassificationInstanceProvider.cs

    r7849 r7851  
    2323using System.Collections;
    2424using System.Collections.Generic;
    25 using System.Globalization;
    2625using System.IO;
    2726using System.Linq;
     
    3231  public abstract class ClassificationInstanceProvider : IProblemInstanceProvider<IClassificationProblemData> {
    3332    public IClassificationProblemData LoadData(string path) {
    34       NumberFormatInfo numberFormat;
    35       DateTimeFormatInfo dateFormat;
    36       char separator;
    37       TableFileParser.DetermineFileFormat(new FileStream(path, FileMode.Open), out numberFormat, out dateFormat, out separator);
    38 
    39       IClassificationProblemData claData = LoadData(new FileStream(path, FileMode.Open), numberFormat, dateFormat, separator);
    40       int pos = path.LastIndexOf('\\');
    41       if (pos < 0)
    42         claData.Name = path;
    43       else {
    44         pos++;
    45         claData.Name = path.Substring(pos, path.Length - pos);
    46       }
    47 
    48       return claData;
    49     }
    50 
    51     protected IClassificationProblemData LoadData(Stream stream, NumberFormatInfo numberFormat, DateTimeFormatInfo dateFormat, char separator) {
    5233      TableFileParser csvFileParser = new TableFileParser();
    5334
    54       csvFileParser.Parse(stream, numberFormat, dateFormat, separator);
     35      csvFileParser.Parse(path);
    5536
    5637      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
     
    6546      claData.TestPartition.Start = trainingPartEnd;
    6647      claData.TestPartition.End = csvFileParser.Rows;
     48      int pos = path.LastIndexOf('\\');
     49      if (pos < 0)
     50        claData.Name = path;
     51      else {
     52        pos++;
     53        claData.Name = path.Substring(pos, path.Length - pos);
     54      }
     55
    6756      return claData;
    6857    }
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ResourceClassificationInstanceProvider.cs

    r7849 r7851  
    6464        }
    6565
    66         IClassificationProblemData claData;
     66        TableFileParser csvFileParser = new TableFileParser();
    6767        using (Stream stream = instancesZipFile.GetInputStream(entry)) {
    68           claData = LoadData(stream, numberFormat, dateFormat, separator);
     68          csvFileParser.Parse(stream, numberFormat, dateFormat, separator);
    6969        }
     70
     71        Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
     72        string targetVar = csvFileParser.VariableNames.Last();
     73        IEnumerable<string> allowedInputVars = csvFileParser.VariableNames.Where(x => !x.Equals(targetVar));
     74
     75        ClassificationProblemData claData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);
     76
     77        int trainingPartEnd = csvFileParser.Rows * 2 / 3;
     78        claData.TrainingPartition.Start = 0;
     79        claData.TrainingPartition.End = trainingPartEnd;
     80        claData.TestPartition.Start = trainingPartEnd;
     81        claData.TestPartition.End = csvFileParser.Rows;
     82
    7083        claData.Name = descriptor.Name;
    7184        claData.Description = descriptor.Description;
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Properties

    • Property svn:ignore set to
      AssemblyInfo.cs
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/RegressionInstanceProvider.cs

    r7849 r7851  
    2323using System.Collections;
    2424using System.Collections.Generic;
    25 using System.Globalization;
    2625using System.IO;
    2726using System.Linq;
     
    3332
    3433    public IRegressionProblemData LoadData(string path) {
    35       NumberFormatInfo numberFormat;
    36       DateTimeFormatInfo dateFormat;
    37       char separator;
    38       TableFileParser.DetermineFileFormat(path, out numberFormat, out dateFormat, out separator);
     34      TableFileParser csvFileParser = new TableFileParser();
     35      csvFileParser.Parse(path);
    3936
    40       IRegressionProblemData regData = LoadData(new FileStream(path, FileMode.Open), numberFormat, dateFormat, separator);
     37      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
     38      string targetVar = csvFileParser.VariableNames.Last();
     39      IEnumerable<string> allowedInputVars = csvFileParser.VariableNames.Where(x => !x.Equals(targetVar));
     40
     41      IRegressionProblemData regData = new RegressionProblemData(dataset, allowedInputVars, targetVar);
     42
     43      int trainingPartEnd = csvFileParser.Rows * 2 / 3;
     44      regData.TrainingPartition.Start = 0;
     45      regData.TrainingPartition.End = trainingPartEnd;
     46      regData.TestPartition.Start = trainingPartEnd;
     47      regData.TestPartition.End = csvFileParser.Rows;
    4148
    4249      int pos = path.LastIndexOf('\\');
     
    4754        regData.Name = path.Substring(pos, path.Length - pos);
    4855      }
    49       return regData;
    50     }
    51 
    52     protected IRegressionProblemData LoadData(Stream stream, NumberFormatInfo numberFormat, DateTimeFormatInfo dateFormat, char separator) {
    53       TableFileParser csvFileParser = new TableFileParser();
    54 
    55       csvFileParser.Parse(stream, numberFormat, dateFormat, separator);
    56 
    57       Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
    58       string targetVar = csvFileParser.VariableNames.Last();
    59       IEnumerable<string> allowedInputVars = csvFileParser.VariableNames.Where(x => !x.Equals(targetVar));
    60 
    61       RegressionProblemData regData = new RegressionProblemData(dataset, allowedInputVars, targetVar);
    62 
    63       int trainingPartEnd = csvFileParser.Rows * 2 / 3;
    64       regData.TrainingPartition.Start = 0;
    65       regData.TrainingPartition.End = trainingPartEnd;
    66       regData.TestPartition.Start = trainingPartEnd;
    67       regData.TestPartition.End = csvFileParser.Rows;
    68 
    6956      return regData;
    7057    }
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/ResourceRegressionInstanceProvider.cs

    r7849 r7851  
    6464        }
    6565
    66         IRegressionProblemData regData;
     66        TableFileParser csvFileParser = new TableFileParser();
    6767        using (Stream stream = instancesZipFile.GetInputStream(entry)) {
    68           regData = LoadData(stream, numberFormat, dateFormat, separator);
     68          csvFileParser.Parse(stream, numberFormat, dateFormat, separator);
    6969        }
     70
     71        Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
     72        string targetVar = csvFileParser.VariableNames.Last();
     73        IEnumerable<string> allowedInputVars = csvFileParser.VariableNames.Where(x => !x.Equals(targetVar));
     74
     75        IRegressionProblemData regData = new RegressionProblemData(dataset, allowedInputVars, targetVar);
     76
     77        int trainingPartEnd = csvFileParser.Rows * 2 / 3;
     78        regData.TrainingPartition.Start = 0;
     79        regData.TrainingPartition.End = trainingPartEnd;
     80        regData.TestPartition.Start = trainingPartEnd;
     81        regData.TestPartition.End = csvFileParser.Rows;
    7082
    7183        regData.Name = descriptor.Name;
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/TableFileParser.cs

    r7849 r7851  
    7575    }
    7676
     77    /// <summary>
     78    /// Parses a file and determines the format first
     79    /// </summary>
     80    /// <param name="fileName">file which is parsed</param>
     81    public void Parse(string fileName) {
     82      NumberFormatInfo numberFormat;
     83      DateTimeFormatInfo dateTimeFormatInfo;
     84      char separator;
     85      DetermineFileFormat(new FileStream(fileName, FileMode.Open), out numberFormat, out dateTimeFormatInfo, out separator);
     86      Parse(new FileStream(fileName, FileMode.Open), numberFormat, dateTimeFormatInfo, separator);
     87    }
     88
     89    /// <summary>
     90    /// Parses a file with the given formats
     91    /// </summary>
     92    /// <param name="fileName">file which is parsed</param>
     93    /// <param name="numberFormat">Format of numbers</param>
     94    /// <param name="dateTimeFormatInfo">Format of datetime</param>
     95    /// <param name="separator">defines the separator</param>
    7796    public void Parse(string fileName, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator) {
    7897      Parse(new FileStream(fileName, FileMode.Open), numberFormat, dateTimeFormatInfo, separator);
    7998    }
    8099
     100    /// <summary>
     101    /// Takes a Stream and parses it with default format. NumberFormatInfo.InvariantInfo, DateTimeFormatInfo.InvariantInfo and separator = ','
     102    /// </summary>
     103    /// <param name="stream">stream which is parsed</param>
     104    public void Parse(Stream stream) {
     105      NumberFormatInfo numberFormat = NumberFormatInfo.InvariantInfo;
     106      DateTimeFormatInfo dateTimeFormatInfo = DateTimeFormatInfo.InvariantInfo;
     107      char separator = ',';
     108      Parse(stream, numberFormat, dateTimeFormatInfo, separator);
     109    }
     110
     111    /// <summary>
     112    /// Parses a stream with the given formats.
     113    /// </summary>
     114    /// <param name="stream">Stream which is parsed</param>   
     115    /// <param name="numberFormat">Format of numbers</param>
     116    /// <param name="dateTimeFormatInfo">Format of datetime</param>
     117    /// <param name="separator">defines the separator</param>
    81118    public void Parse(Stream stream, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator) {
    82119      using (StreamReader reader = new StreamReader(stream)) {
Note: See TracChangeset for help on using the changeset viewer.