Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
11/29/15 15:33:15 (9 years ago)
Author:
gkronber
Message:

#2071: only preview first 500 lines of data in CSV import dialog.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/TableFileParser.cs

    r13411 r13413  
    2828using System.Linq;
    2929using System.Runtime.Serialization;
    30 using System.Security.Policy;
    3130
    3231namespace HeuristicLab.Problems.Instances.DataAnalysis {
     
    114113    /// <param name="fileName">file which is parsed</param>
    115114    /// <param name="columnNamesInFirstLine"></param>
    116     public void Parse(string fileName, bool columnNamesInFirstLine) {
     115    public void Parse(string fileName, bool columnNamesInFirstLine, int lineLimit = -1) {
    117116      NumberFormatInfo numberFormat;
    118117      DateTimeFormatInfo dateTimeFormatInfo;
    119118      char separator;
    120119      DetermineFileFormat(fileName, out numberFormat, out dateTimeFormatInfo, out separator);
    121       Parse(new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite), numberFormat, dateTimeFormatInfo, separator, columnNamesInFirstLine);
     120      Parse(new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite), numberFormat, dateTimeFormatInfo, separator, columnNamesInFirstLine, lineLimit);
    122121    }
    123122
     
    130129    /// <param name="separator">defines the separator</param>
    131130    /// <param name="columnNamesInFirstLine"></param>
    132     public void Parse(string fileName, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator, bool columnNamesInFirstLine) {
     131    public void Parse(string fileName, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator, bool columnNamesInFirstLine, int lineLimit = -1) {
    133132      using (var stream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) {
    134         Parse(stream, numberFormat, dateTimeFormatInfo, separator, columnNamesInFirstLine);
     133        Parse(stream, numberFormat, dateTimeFormatInfo, separator, columnNamesInFirstLine, lineLimit);
    135134      }
    136135    }
     
    141140    /// <param name="stream">stream which is parsed</param>
    142141    /// <param name="columnNamesInFirstLine"></param>
    143     public void Parse(Stream stream, bool columnNamesInFirstLine) {
     142    public void Parse(Stream stream, bool columnNamesInFirstLine, int lineLimit = -1) {
    144143      NumberFormatInfo numberFormat = NumberFormatInfo.InvariantInfo;
    145144      DateTimeFormatInfo dateTimeFormatInfo = DateTimeFormatInfo.InvariantInfo;
    146145      char separator = ',';
    147       Parse(stream, numberFormat, dateTimeFormatInfo, separator, columnNamesInFirstLine);
     146      Parse(stream, numberFormat, dateTimeFormatInfo, separator, columnNamesInFirstLine, lineLimit);
    148147    }
    149148
     
    156155    /// <param name="separator">defines the separator</param>
    157156    /// <param name="columnNamesInFirstLine"></param>
    158     public void Parse(Stream stream, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator, bool columnNamesInFirstLine) {
     157    public void Parse(Stream stream, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator, bool columnNamesInFirstLine, int lineLimit = -1) {
    159158      using (StreamReader reader = new StreamReader(stream)) {
    160159        tokenizer = new Tokenizer(reader, numberFormat, dateTimeFormatInfo, separator);
    161160        // parse the file
    162         Parse(columnNamesInFirstLine);
     161        Parse(columnNamesInFirstLine, lineLimit);
    163162      }
    164163
     
    287286    }
    288287
    289 
    290     //internal class Token {
    291     //  public TokenTypeEnum type;
    292     //  public string stringValue;
    293     //  public double doubleValue;
    294     //  public DateTime dateTimeValue;
    295     //
    296     //  public Token(TokenTypeEnum type, string value) {
    297     //    this.type = type;
    298     //    stringValue = value;
    299     //    dateTimeValue = DateTime.MinValue;
    300     //    doubleValue = 0.0;
    301     //  }
    302     //
    303     //  public bool Equals(Token other) {
    304     //    throw new NotImplementedException();
    305     //  }
    306     //
    307     //  public override string ToString() {
    308     //    return stringValue;
    309     //  }
    310     //
    311     //  public override bool Equals(object obj) {
    312     //    return Equals(obj as Token);
    313     //  }
    314     //
    315     //  public override int GetHashCode() {
    316     //    throw new NotSupportedException();
    317     //  }
    318     //}
    319 
    320 
    321288    internal class Tokenizer {
    322289      private StreamReader reader;
     
    344311      }
    345312
    346       // private Token newlineToken;
    347       // public Token NewlineToken {
    348       //   get { return newlineToken; }
    349       //   private set { newlineToken = value; }
    350       // }
    351       // private Token separatorToken;
    352       // public Token SeparatorToken {
    353       //   get { return separatorToken; }
    354       //   private set { separatorToken = value; }
    355       // }
    356 
    357313      public Tokenizer(StreamReader reader, NumberFormatInfo numberFormatInfo, DateTimeFormatInfo dateTimeFormatInfo, char separator) {
    358314        this.reader = reader;
     
    360316        this.dateTimeFormatInfo = dateTimeFormatInfo;
    361317        this.separator = separator;
    362         //separatorToken = new Token(TokenTypeEnum.Separator, INTERNAL_SEPARATOR);
    363         //newlineToken = new Token(TokenTypeEnum.NewLine, Environment.NewLine);
    364318        ReadNextTokens();
    365319      }
     
    458412
    459413    #region parsing
    460     private void Parse(bool columnNamesInFirstLine) {
     414    private void Parse(bool columnNamesInFirstLine, int lineLimit = -1) { // lineLimit = -1 means no limit
    461415      if (columnNamesInFirstLine) {
    462416        ParseVariableNames();
     
    466420            "", tokenizer.CurrentLineNumber);
    467421      }
    468       ParseValues();
     422      ParseValues(lineLimit);
    469423      if (rowValues.Count == 0) Error("Couldn't parse data values. Probably because of incorrect number format (the parser expects english number format with a '.' as decimal separator).", "", tokenizer.CurrentLineNumber);
    470424    }
    471425
    472     private void ParseValues() {
    473       while (tokenizer.HasNext()) {
     426    private void ParseValues(int lineLimit = -1) {
     427      int nLinesParsed = 0;
     428      while (tokenizer.HasNext() && (lineLimit < 0 || nLinesParsed < lineLimit)) {
    474429        if (tokenizer.PeekType() == TokenTypeEnum.NewLine) {
    475430          tokenizer.Skip();
     431          nLinesParsed++;
    476432        } else {
    477433          List<object> row = new List<object>();
     
    483439          }
    484440          ExpectType(TokenTypeEnum.NewLine);
     441          nLinesParsed++;
    485442          // all rows have to have the same number of values           
    486443          // the first row defines how many samples are needed
Note: See TracChangeset for help on using the changeset viewer.