Free cookie consent management tool by TermsFeed Policy Generator

Changeset 15513


Ignore:
Timestamp:
12/12/17 09:57:55 (7 years ago)
Author:
pfleck
Message:

#2859 Fixed problem by temporarily using a List<object> to represent an unknown column-type until the type is known.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/TableFileParser.cs

    r14408 r15513  
    257257      this.columns = values.Count;
    258258
     259      // replace lists with undefined type (object) with double-lists
     260      for (int i = 0; i < values.Count; i++) {
     261        if (values[i] is List<object>) {
     262          values[i] = Enumerable.Repeat(double.NaN, rows).ToList();
     263        }
     264      }
     265
    259266      // after everything has been parsed make sure the lists are as compact as possible
    260267      foreach (var l in values) {
     
    308315    #region type-dependent dispatch
    309316    private bool IsColumnTypeCompatible(IList list, TokenTypeEnum tokenType) {
    310       return (list is List<string>) || // all tokens can be added to a string list
     317      return (list is List<object>) || // unknown lists are compatible to everything (potential conversion)
     318             (list is List<string>) || // all tokens can be added to a string list
    311319             (tokenType == TokenTypeEnum.Missing) || // empty entries are allowed in all columns
    312320             (tokenType == TokenTypeEnum.Double && list is List<double>) ||
     
    336344
    337345    private void AddValue(TokenTypeEnum type, IList list, string strVal, double dblVal, DateTime dateTimeVal) {
     346      // Add value if list has a defined type
    338347      var dblList = list as List<double>;
    339348      if (dblList != null) {
     
    341350        return;
    342351      }
    343 
    344352      var strList = list as List<string>;
    345353      if (strList != null) {
     
    353361      }
    354362
    355       list.Add(strVal); // assumes List<object>
    356     }
    357 
    358     private void AddValue(TokenTypeEnum type, List<double> list, double dblVal) {
     363      // Undefined list-type
     364      if (type == TokenTypeEnum.Missing) {
     365        // add null to track number of missing values
     366        list.Add(null);
     367      } else { // first non-missing value for undefined list-type
     368        var newList = ConvertList(type, list, estimatedNumberOfLines);
     369        // replace list
     370        var idx = values.IndexOf(list);
     371        values[idx] = newList;
     372        // recursively call AddValue
     373        AddValue(type, newList, strVal, dblVal, dateTimeVal);
     374      }
     375    }
     376
     377    private static void AddValue(TokenTypeEnum type, List<double> list, double dblVal) {
    359378      Contract.Assert(type == TokenTypeEnum.Missing || type == TokenTypeEnum.Double);
    360379      list.Add(type == TokenTypeEnum.Missing ? double.NaN : dblVal);
    361380    }
    362381
    363     private void AddValue(TokenTypeEnum type, List<string> list, string strVal) {
     382    private static void AddValue(TokenTypeEnum type, List<string> list, string strVal) {
    364383      // assumes that strVal is always set to the original token read from the input file
    365384      list.Add(type == TokenTypeEnum.Missing ? string.Empty : strVal);
    366385    }
    367386
    368     private void AddValue(TokenTypeEnum type, List<DateTime> list, DateTime dtVal) {
     387    private static void AddValue(TokenTypeEnum type, List<DateTime> list, DateTime dtVal) {
    369388      Contract.Assert(type == TokenTypeEnum.Missing || type == TokenTypeEnum.DateTime);
    370389      list.Add(type == TokenTypeEnum.Missing ? DateTime.MinValue : dtVal);
    371390    }
    372391
    373     private IList CreateList(TokenTypeEnum type, int estimatedNumberOfLines) {
     392    private static IList CreateList(TokenTypeEnum type, int estimatedNumberOfLines) {
    374393      switch (type) {
    375394        case TokenTypeEnum.String:
    376395          return new List<string>(estimatedNumberOfLines);
    377396        case TokenTypeEnum.Double:
    378         case TokenTypeEnum.Missing: // assume double columns
    379397          return new List<double>(estimatedNumberOfLines);
    380398        case TokenTypeEnum.DateTime:
    381399          return new List<DateTime>(estimatedNumberOfLines);
     400        case TokenTypeEnum.Missing: // List<object> represent list of unknown type
     401          return new List<object>(estimatedNumberOfLines);
    382402        default:
    383403          throw new InvalidOperationException();
     404      }
     405    }
     406
     407    private static IList ConvertList(TokenTypeEnum type, IList list, int estimatedNumberOfLines) {
     408      var newList = CreateList(type, estimatedNumberOfLines);
     409      object missingValue = GetMissingValue(type);
     410      for (int i = 0; i < list.Count; i++)
     411        newList.Add(missingValue);
     412      return newList;
     413    }
     414    private static object GetMissingValue(TokenTypeEnum type) {
     415      switch (type) {
     416        case TokenTypeEnum.String: return string.Empty;
     417        case TokenTypeEnum.Double: return double.NaN;
     418        case TokenTypeEnum.DateTime: return DateTime.MinValue;
     419        default: throw new ArgumentOutOfRangeException("type", type, "No missing value defined");
    384420      }
    385421    }
Note: See TracChangeset for help on using the changeset viewer.