Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
06/11/13 21:41:08 (12 years ago)
Author:
sforsten
Message:

#2070:

  • changed parse methods in TableFileParser to accept a bool which defines, if the first line contains variable names
  • added methods in TableFileParser to check if the first line contains variable names
  • adapted unit tests
  • adapted DataAnalysisImportTypeDialog so that a checkbox can be set to define if the first line contains variable names
  • added the flag NumberStyles.AllowTrailingSign for parsing doubles
Location:
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3
Files:
9 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/CSV/ClassifiactionCSVInstanceProvider.cs

    r9456 r9608  
    5959      TableFileParser csvFileParser = new TableFileParser();
    6060
    61       csvFileParser.Parse(path);
     61      csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));
    6262
    6363      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ResourceClassificationInstanceProvider.cs

    r9456 r9608  
    4949        TableFileParser csvFileParser = new TableFileParser();
    5050        using (Stream stream = instancesZipFile.GetInputStream(entry)) {
    51           csvFileParser.Parse(stream, numberFormat, dateFormat, separator);
     51          csvFileParser.Parse(stream, numberFormat, dateFormat, separator, csvFileParser.AreColumnNamesInFirstLine(stream));
    5252        }
    5353
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/CSV/ClusteringCSVInstanceProvider.cs

    r9456 r9608  
    5858    public override IClusteringProblemData ImportData(string path) {
    5959      var csvFileParser = new TableFileParser();
    60       csvFileParser.Parse(path);
     60      csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));
    6161
    6262      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/DataAnalysisCSVFormat.cs

    r9456 r9608  
    2727    public NumberFormatInfo NumberFormatInfo { get; set; }
    2828    public DateTimeFormatInfo DateTimeFormatInfo { get; set; }
     29    public bool VariableNamesAvailable { get; set; }
    2930  }
    3031}
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/DataAnalysisInstanceProvider.cs

    r9456 r9608  
    3838    public TData ImportData(string path, ImportType type, DataAnalysisCSVFormat csvFormat) {
    3939      TableFileParser csvFileParser = new TableFileParser();
    40       csvFileParser.Parse(path, csvFormat.NumberFormatInfo, csvFormat.DateTimeFormatInfo, csvFormat.Separator);
     40      csvFileParser.Parse(path, csvFormat.NumberFormatInfo, csvFormat.DateTimeFormatInfo, csvFormat.Separator, csvFormat.VariableNamesAvailable);
    4141      return ImportData(path, type, csvFileParser);
    4242    }
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/CSV/RegressionCSVInstanceProvider.cs

    r9456 r9608  
    5757    public override IRegressionProblemData ImportData(string path) {
    5858      TableFileParser csvFileParser = new TableFileParser();
    59       csvFileParser.Parse(path);
     59      csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));
    6060
    6161      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/ResourceRegressionInstanceProvider.cs

    r9456 r9608  
    4949        TableFileParser csvFileParser = new TableFileParser();
    5050        using (Stream stream = instancesZipFile.GetInputStream(entry)) {
    51           csvFileParser.Parse(stream, numberFormat, dateFormat, separator);
     51          csvFileParser.Parse(stream, numberFormat, dateFormat, separator, csvFileParser.AreColumnNamesInFirstLine(stream));
    5252        }
    5353
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/TableFileParser.cs

    r9457 r9608  
    7575    }
    7676
     77    public bool AreColumnNamesInFirstLine(string fileName) {
     78      NumberFormatInfo numberFormat;
     79      DateTimeFormatInfo dateTimeFormatInfo;
     80      char separator;
     81      DetermineFileFormat(fileName, out numberFormat, out dateTimeFormatInfo, out separator);
     82      using (var stream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) {
     83        return AreColumnNamesInFirstLine(stream, numberFormat, dateTimeFormatInfo, separator);
     84      }
     85    }
     86
     87    public bool AreColumnNamesInFirstLine(Stream stream) {
     88      NumberFormatInfo numberFormat = NumberFormatInfo.InvariantInfo;
     89      DateTimeFormatInfo dateTimeFormatInfo = DateTimeFormatInfo.InvariantInfo;
     90      char separator = ',';
     91      return AreColumnNamesInFirstLine(stream, numberFormat, dateTimeFormatInfo, separator);
     92    }
     93
     94    public bool AreColumnNamesInFirstLine(string fileName, NumberFormatInfo numberFormat,
     95                                         DateTimeFormatInfo dateTimeFormatInfo, char separator) {
     96      using (var stream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) {
     97        return AreColumnNamesInFirstLine(stream, numberFormat, dateTimeFormatInfo, separator);
     98      }
     99    }
     100
     101    public bool AreColumnNamesInFirstLine(Stream stream, NumberFormatInfo numberFormat,
     102                                          DateTimeFormatInfo dateTimeFormatInfo, char separator) {
     103      using (StreamReader reader = new StreamReader(stream)) {
     104        tokenizer = new Tokenizer(reader, numberFormat, dateTimeFormatInfo, separator);
     105        return tokenizer.Peek().type != TokenTypeEnum.Double;
     106      }
     107    }
     108
    77109    /// <summary>
    78110    /// Parses a file and determines the format first
    79111    /// </summary>
    80112    /// <param name="fileName">file which is parsed</param>
    81     public void Parse(string fileName) {
     113    /// <param name="columnNamesInFirstLine"></param>
     114    public void Parse(string fileName, bool columnNamesInFirstLine) {
    82115      NumberFormatInfo numberFormat;
    83116      DateTimeFormatInfo dateTimeFormatInfo;
    84117      char separator;
    85       DetermineFileFormat(new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite), out numberFormat, out dateTimeFormatInfo, out separator);
    86       Parse(new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite), numberFormat, dateTimeFormatInfo, separator);
     118      DetermineFileFormat(fileName, out numberFormat, out dateTimeFormatInfo, out separator);
     119      Parse(new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite), numberFormat, dateTimeFormatInfo, separator, columnNamesInFirstLine);
    87120    }
    88121
     
    94127    /// <param name="dateTimeFormatInfo">Format of datetime</param>
    95128    /// <param name="separator">defines the separator</param>
    96     public void Parse(string fileName, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator) {
    97       Parse(new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite), numberFormat, dateTimeFormatInfo, separator);
     129    /// <param name="columnNamesInFirstLine"></param>
     130    public void Parse(string fileName, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator, bool columnNamesInFirstLine) {
     131      using (var stream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) {
     132        Parse(stream, numberFormat, dateTimeFormatInfo, separator, columnNamesInFirstLine);
     133      }
    98134    }
    99135
     
    102138    /// </summary>
    103139    /// <param name="stream">stream which is parsed</param>
    104     public void Parse(Stream stream) {
     140    /// <param name="columnNamesInFirstLine"></param>
     141    public void Parse(Stream stream, bool columnNamesInFirstLine) {
    105142      NumberFormatInfo numberFormat = NumberFormatInfo.InvariantInfo;
    106143      DateTimeFormatInfo dateTimeFormatInfo = DateTimeFormatInfo.InvariantInfo;
    107144      char separator = ',';
    108       Parse(stream, numberFormat, dateTimeFormatInfo, separator);
     145      Parse(stream, numberFormat, dateTimeFormatInfo, separator, columnNamesInFirstLine);
    109146    }
    110147
     
    116153    /// <param name="dateTimeFormatInfo">Format of datetime</param>
    117154    /// <param name="separator">defines the separator</param>
    118     public void Parse(Stream stream, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator) {
     155    /// <param name="columnNamesInFirstLine"></param>
     156    public void Parse(Stream stream, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator, bool columnNamesInFirstLine) {
    119157      using (StreamReader reader = new StreamReader(stream)) {
    120158        tokenizer = new Tokenizer(reader, numberFormat, dateTimeFormatInfo, separator);
    121159        // parse the file
    122         Parse();
     160        Parse(columnNamesInFirstLine);
    123161      }
    124162
     
    340378        if (strToken.Equals(INTERNAL_SEPARATOR)) {
    341379          return SeparatorToken;
    342         } else if (double.TryParse(strToken, NumberStyles.Float, numberFormatInfo, out token.doubleValue)) {
     380        } else if (double.TryParse(strToken, NumberStyles.Float | NumberStyles.AllowTrailingSign, numberFormatInfo, out token.doubleValue)) {
    343381          token.type = TokenTypeEnum.Double;
    344382          return token;
     
    372410
    373411    #region parsing
    374     private void Parse() {
    375       ParseVariableNames();
    376       if (!tokenizer.HasNext()) Error("Couldn't parse data values. Probably because of incorrect number format (the parser expects english number format with a '.' as decimal separator).", "", tokenizer.CurrentLineNumber);
     412    private void Parse(bool columnNamesInFirstLine) {
     413      if (columnNamesInFirstLine) {
     414        ParseVariableNames();
     415        if (!tokenizer.HasNext())
     416          Error(
     417            "Couldn't parse data values. Probably because of incorrect number format (the parser expects english number format with a '.' as decimal separator).",
     418            "", tokenizer.CurrentLineNumber);
     419      }
    377420      ParseValues();
    378421      if (rowValues.Count == 0) Error("Couldn't parse data values. Probably because of incorrect number format (the parser expects english number format with a '.' as decimal separator).", "", tokenizer.CurrentLineNumber);
     
    423466
    424467    private void ParseVariableNames() {
    425       //if first token is double no variables names are given
    426       if (tokenizer.Peek().type == TokenTypeEnum.Double) return;
    427 
    428468      // the first line must contain variable names
    429469      List<Token> tokens = new List<Token>();
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/TimeSeries/CSV/TimeSeriesPrognosisCSVInstanceProvider.cs

    r9456 r9608  
    5757    public override ITimeSeriesPrognosisProblemData ImportData(string path) {
    5858      TableFileParser csvFileParser = new TableFileParser();
    59       csvFileParser.Parse(path);
     59      csvFileParser.Parse(path, csvFileParser.AreColumnNamesInFirstLine(path));
    6060
    6161      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
Note: See TracChangeset for help on using the changeset viewer.