- Timestamp:
- 11/29/15 15:33:15 (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/TableFileParser.cs
r13411 r13413 28 28 using System.Linq; 29 29 using System.Runtime.Serialization; 30 using System.Security.Policy;31 30 32 31 namespace HeuristicLab.Problems.Instances.DataAnalysis { … … 114 113 /// <param name="fileName">file which is parsed</param> 115 114 /// <param name="columnNamesInFirstLine"></param> 116 public void Parse(string fileName, bool columnNamesInFirstLine ) {115 public void Parse(string fileName, bool columnNamesInFirstLine, int lineLimit = -1) { 117 116 NumberFormatInfo numberFormat; 118 117 DateTimeFormatInfo dateTimeFormatInfo; 119 118 char separator; 120 119 DetermineFileFormat(fileName, out numberFormat, out dateTimeFormatInfo, out separator); 121 Parse(new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite), numberFormat, dateTimeFormatInfo, separator, columnNamesInFirstLine );120 Parse(new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite), numberFormat, dateTimeFormatInfo, separator, columnNamesInFirstLine, lineLimit); 122 121 } 123 122 … … 130 129 /// <param name="separator">defines the separator</param> 131 130 /// <param name="columnNamesInFirstLine"></param> 132 public void Parse(string fileName, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator, bool columnNamesInFirstLine ) {131 public void Parse(string fileName, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator, bool columnNamesInFirstLine, int lineLimit = -1) { 133 132 using (var stream = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) { 134 Parse(stream, numberFormat, dateTimeFormatInfo, separator, columnNamesInFirstLine );133 Parse(stream, numberFormat, dateTimeFormatInfo, separator, columnNamesInFirstLine, lineLimit); 135 134 } 136 135 } … … 141 140 /// <param name="stream">stream which is parsed</param> 142 141 /// <param name="columnNamesInFirstLine"></param> 143 public void Parse(Stream stream, bool columnNamesInFirstLine ) {142 public void Parse(Stream stream, bool columnNamesInFirstLine, int lineLimit = -1) { 144 143 NumberFormatInfo numberFormat = NumberFormatInfo.InvariantInfo; 145 144 DateTimeFormatInfo dateTimeFormatInfo = DateTimeFormatInfo.InvariantInfo; 146 145 char separator = ','; 147 Parse(stream, numberFormat, dateTimeFormatInfo, separator, columnNamesInFirstLine );146 Parse(stream, numberFormat, dateTimeFormatInfo, separator, columnNamesInFirstLine, lineLimit); 148 147 } 149 148 … … 156 155 /// <param name="separator">defines the separator</param> 157 156 /// <param name="columnNamesInFirstLine"></param> 158 public void Parse(Stream stream, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator, bool columnNamesInFirstLine ) {157 public void Parse(Stream stream, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator, bool columnNamesInFirstLine, int lineLimit = -1) { 159 158 using (StreamReader reader = new StreamReader(stream)) { 160 159 tokenizer = new Tokenizer(reader, numberFormat, dateTimeFormatInfo, separator); 161 160 // parse the file 162 Parse(columnNamesInFirstLine );161 Parse(columnNamesInFirstLine, lineLimit); 163 162 } 164 163 … … 287 286 } 288 287 289 290 //internal class Token {291 // public TokenTypeEnum type;292 // public string stringValue;293 // public double doubleValue;294 // public DateTime dateTimeValue;295 //296 // public Token(TokenTypeEnum type, string value) {297 // this.type = type;298 // stringValue = value;299 // dateTimeValue = DateTime.MinValue;300 // doubleValue = 0.0;301 // }302 //303 // public bool Equals(Token other) {304 // throw new NotImplementedException();305 // }306 //307 // public override string ToString() {308 // return stringValue;309 // }310 //311 // public override bool Equals(object obj) {312 // return Equals(obj as Token);313 // }314 //315 // public override int GetHashCode() {316 // throw new NotSupportedException();317 // }318 //}319 320 321 288 internal class Tokenizer { 322 289 private StreamReader reader; … … 344 311 } 345 312 346 // private Token newlineToken;347 // public Token NewlineToken {348 // get { return newlineToken; }349 // private set { newlineToken = value; }350 // }351 // private Token separatorToken;352 // public Token SeparatorToken {353 // get { return separatorToken; }354 // private set { separatorToken = value; }355 // }356 357 313 public Tokenizer(StreamReader reader, NumberFormatInfo numberFormatInfo, DateTimeFormatInfo dateTimeFormatInfo, char separator) { 358 314 this.reader = reader; … … 360 316 this.dateTimeFormatInfo = dateTimeFormatInfo; 361 317 this.separator = separator; 362 //separatorToken = new Token(TokenTypeEnum.Separator, INTERNAL_SEPARATOR);363 //newlineToken = new Token(TokenTypeEnum.NewLine, Environment.NewLine);364 318 ReadNextTokens(); 365 319 } … … 458 412 459 413 #region parsing 460 private void Parse(bool columnNamesInFirstLine ) {414 private void Parse(bool columnNamesInFirstLine, int lineLimit = -1) { // lineLimit = -1 means no limit 461 415 if (columnNamesInFirstLine) { 462 416 ParseVariableNames(); … … 466 420 "", tokenizer.CurrentLineNumber); 467 421 } 468 ParseValues( );422 ParseValues(lineLimit); 469 423 if (rowValues.Count == 0) Error("Couldn't parse data values. Probably because of incorrect number format (the parser expects english number format with a '.' as decimal separator).", "", tokenizer.CurrentLineNumber); 470 424 } 471 425 472 private void ParseValues() { 473 while (tokenizer.HasNext()) { 426 private void ParseValues(int lineLimit = -1) { 427 int nLinesParsed = 0; 428 while (tokenizer.HasNext() && (lineLimit < 0 || nLinesParsed < lineLimit)) { 474 429 if (tokenizer.PeekType() == TokenTypeEnum.NewLine) { 475 430 tokenizer.Skip(); 431 nLinesParsed++; 476 432 } else { 477 433 List<object> row = new List<object>(); … … 483 439 } 484 440 ExpectType(TokenTypeEnum.NewLine); 441 nLinesParsed++; 485 442 // all rows have to have the same number of values 486 443 // the first row defines how many samples are needed
Note: See TracChangeset
for help on using the changeset viewer.