Ticket #2859: TableFileParser.cs.patch
File TableFileParser.cs.patch, 5.1 KB (added by pfleck, 6 years ago) |
---|
-
TableFileParser.cs
256 256 this.rows = values.First().Count; 257 257 this.columns = values.Count; 258 258 259 // replace lists with undefined type (object) with double-lists 260 for (int i = 0; i < values.Count; i++) { 261 if (values[i] is List<object>) { 262 values[i] = Enumerable.Repeat(double.NaN, rows).ToList(); 263 } 264 } 265 259 266 // after everything has been parsed make sure the lists are as compact as possible 260 267 foreach (var l in values) { 261 268 var dblList = l as List<double>; … … 307 314 308 315 #region type-dependent dispatch 309 316 private bool IsColumnTypeCompatible(IList list, TokenTypeEnum tokenType) { 310 return (list is List<string>) || // all tokens can be added to a string list 317 return (list is List<object>) || // unknown lists are compatible to everything (potential conversion) 318 (list is List<string>) || // all tokens can be added to a string list 311 319 (tokenType == TokenTypeEnum.Missing) || // empty entries are allowed in all columns 312 320 (tokenType == TokenTypeEnum.Double && list is List<double>) || 313 321 (tokenType == TokenTypeEnum.DateTime && list is List<DateTime>); … … 335 343 } 336 344 337 345 private void AddValue(TokenTypeEnum type, IList list, string strVal, double dblVal, DateTime dateTimeVal) { 346 // Add value if list has a defined type 338 347 var dblList = list as List<double>; 339 348 if (dblList != null) { 340 349 AddValue(type, dblList, dblVal); 341 350 return; 342 351 } 343 344 352 var strList = list as List<string>; 345 353 if (strList != null) { 346 354 AddValue(type, strList, strVal); … … 352 360 return; 353 361 } 354 362 355 list.Add(strVal); // assumes List<object> 363 // Undefined list-type 364 if (type == TokenTypeEnum.Missing) { 365 // add null to track number of missing values 366 list.Add(null); 367 } else { // first non-missing value for undefined list-type 368 var newList = ConvertList(type, list, estimatedNumberOfLines); 369 // replace list 370 var idx = values.IndexOf(list); 371 values[idx] = newList; 372 // recursively call AddValue 373 AddValue(type, newList, strVal, dblVal, dateTimeVal); 374 } 356 375 } 357 376 358 private void AddValue(TokenTypeEnum type, List<double> list, double dblVal) {377 private static void AddValue(TokenTypeEnum type, List<double> list, double dblVal) { 359 378 Contract.Assert(type == TokenTypeEnum.Missing || type == TokenTypeEnum.Double); 360 379 list.Add(type == TokenTypeEnum.Missing ? double.NaN : dblVal); 361 380 } 362 381 363 private void AddValue(TokenTypeEnum type, List<string> list, string strVal) {382 private static void AddValue(TokenTypeEnum type, List<string> list, string strVal) { 364 383 // assumes that strVal is always set to the original token read from the input file 365 384 list.Add(type == TokenTypeEnum.Missing ? string.Empty : strVal); 366 385 } 367 386 368 private void AddValue(TokenTypeEnum type, List<DateTime> list, DateTime dtVal) {387 private static void AddValue(TokenTypeEnum type, List<DateTime> list, DateTime dtVal) { 369 388 Contract.Assert(type == TokenTypeEnum.Missing || type == TokenTypeEnum.DateTime); 370 389 list.Add(type == TokenTypeEnum.Missing ? DateTime.MinValue : dtVal); 371 390 } 372 391 373 private IList CreateList(TokenTypeEnum type, int estimatedNumberOfLines) {392 private static IList CreateList(TokenTypeEnum type, int estimatedNumberOfLines) { 374 393 switch (type) { 375 394 case TokenTypeEnum.String: 376 395 return new List<string>(estimatedNumberOfLines); 377 396 case TokenTypeEnum.Double: 378 case TokenTypeEnum.Missing: // assume double columns379 397 return new List<double>(estimatedNumberOfLines); 380 398 case TokenTypeEnum.DateTime: 381 399 return new List<DateTime>(estimatedNumberOfLines); 400 case TokenTypeEnum.Missing: // List<object> represent list of unknown type 401 return new List<object>(estimatedNumberOfLines); 382 402 default: 383 403 throw new InvalidOperationException(); 384 404 } 385 405 } 406 407 private static IList ConvertList(TokenTypeEnum type, IList list, int estimatedNumberOfLines) { 408 var newList = CreateList(type, estimatedNumberOfLines); 409 object missingValue = GetMissingValue(type); 410 for (int i = 0; i < list.Count; i++) 411 newList.Add(missingValue); 412 return newList; 413 } 414 private static object GetMissingValue(TokenTypeEnum type) { 415 switch (type) { 416 case TokenTypeEnum.String: return string.Empty; 417 case TokenTypeEnum.Double: return double.NaN; 418 case TokenTypeEnum.DateTime: return DateTime.MinValue; 419 default: throw new ArgumentOutOfRangeException("type", type, "No missing value defined"); 420 } 421 } 386 422 #endregion 387 423 388 424 public static void DetermineFileFormat(string path, out NumberFormatInfo numberFormat, out DateTimeFormatInfo dateTimeFormatInfo, out char separator) {