Changeset 9652


Ignore:
Timestamp:
06/24/13 13:32:12 (9 years ago)
Author:
sforsten
Message:

#2047: TableFileParser can now handle white spaces (currently the character '\0' symbolizes white spaces in the TableFileParser)

Location:
trunk/sources
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis.Views/3.3/DataAnalysisImportTypeDialog.cs

    r9608 r9652  
    4242      new KeyValuePair<char, string>(';', "; (Semicolon)" ),
    4343      new KeyValuePair<char, string>(',', ", (Comma)" ),   
    44       new KeyValuePair<char, string>('\t', "\\t (Tab)")
     44      new KeyValuePair<char, string>('\t', "\\t (Tab)"),
     45      new KeyValuePair<char, string>((char)0, "all whitespaces (including tabs and spaces)")
    4546    };
    4647
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/TableFileParser.cs

    r9611 r9652  
    2828using System.Linq;
    2929using System.Runtime.Serialization;
    30 using System.Text;
    3130
    3231namespace HeuristicLab.Problems.Instances.DataAnalysis {
    3332  public class TableFileParser {
    3433    private const int BUFFER_SIZE = 65536;
    35     private static readonly char[] POSSIBLE_SEPARATORS = new char[] { ',', ';', '\t' };
     34    // char used to symbolize whitespaces (no missing values can be handled with whitespaces)
     35    private const char WHITESPACECHAR = (char)0;
     36    private static readonly char[] POSSIBLE_SEPARATORS = new char[] { ',', ';', '\t', WHITESPACECHAR };
    3637    private Tokenizer tokenizer;
    3738    private List<List<object>> rowValues;
     
    360361
    361362      private IEnumerable<string> Split(string line) {
    362         StringBuilder subStr = new StringBuilder();
    363         foreach (char c in line) {
    364           if (c == separator) {
    365             yield return subStr.ToString();
    366             subStr = new StringBuilder();
    367             // all separator characters are transformed to the internally used separator character
     363        IEnumerable<string> splitString;
     364        if (separator == WHITESPACECHAR) {
     365          //separate whitespaces
     366          splitString = line.Split(new char[0], StringSplitOptions.RemoveEmptyEntries);
     367        } else {
     368          splitString = line.Split(separator);
     369        }
     370        int cur = splitString.Count();
     371        foreach (var str in splitString) {
     372          yield return str;
     373          cur--;
     374          // do not return the INTERNAL_SEPARATOR after the last string
     375          if (cur != 0) {
    368376            yield return INTERNAL_SEPARATOR;
    369           } else {
    370             subStr.Append(c);
    371377          }
    372378        }
    373         yield return subStr.ToString();
    374379      }
    375380
Note: See TracChangeset for help on using the changeset viewer.