Free cookie consent management tool by TermsFeed Policy Generator

Changeset 405


Ignore:
Timestamp:
07/29/08 18:49:51 (16 years ago)
Author:
gkronber
Message:

changed parser to try three different number-formats for the whole file and not for single tokens (ticket #209)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.DataAnalysis/DatasetParser.cs

    r404 r405  
    170170
    171171    public void Import(string importFileName, bool strict) {
    172       StreamReader reader = new StreamReader(importFileName);
    173       this.tokenizer = new Tokenizer(reader);
    174       tokenizer.Separators = new string[] { " ", ";", "\t" };
    175 
    176       try {
    177         // parse the file
    178         Parse(strict);
    179       } finally {
    180         reader.Close();
    181       }
    182 
     172      TryParse(importFileName, strict);
    183173      // translate the list of samples into a DoubleMatrixData item
    184174      samples = new double[samplesList.Count * samplesList[0].Count];
     
    198188    }
    199189
     190    private void TryParse(string importFileName, bool strict) {
     191      Exception lastEx = null;
     192      NumberFormatInfo[] possibleFormats = new NumberFormatInfo[] { NumberFormatInfo.InvariantInfo, CultureInfo.GetCultureInfo("de-DE").NumberFormat, NumberFormatInfo.CurrentInfo };
     193      foreach(NumberFormatInfo numberFormat in possibleFormats) {
     194        using(StreamReader reader = new StreamReader(importFileName)) {
     195          tokenizer = new Tokenizer(reader, numberFormat);
     196          tokenizer.Separators = new string[] { " ", ";", "\t" };
     197          try {
     198            // parse the file
     199            Parse(strict);
     200            return; // parsed without errors -> return;
     201          } catch(DataFormatException ex) {
     202            lastEx = ex;
     203          }
     204        }
     205      }
     206      // all number formats threw an exception -> rethrow the last exception
     207      throw lastEx;
     208    }
     209
    200210    #region tokenizer
    201211    internal enum TokenTypeEnum {
     
    226236      private List<Token> tokens;
    227237      private string[] separators;
     238      private NumberFormatInfo numberFormatInfo;
    228239
    229240      public int CurrentLineNumber = 0;
     
    240251
    241252
    242       public Tokenizer(StreamReader reader) {
     253      public Tokenizer(StreamReader reader, NumberFormatInfo numberFormatInfo) {
    243254        this.reader = reader;
     255        this.numberFormatInfo = numberFormatInfo;
    244256        tokens = new List<Token>();
    245257        ReadNextTokens();
     
    267279          Token token = new Token(TokenTypeEnum.String, strToken);
    268280
    269           // try invariant culture
    270           NumberFormatInfo currentNumberFormatInfo = CultureInfo.InvariantCulture.NumberFormat;
    271           if(int.TryParse(strToken, NumberStyles.Integer, currentNumberFormatInfo, out token.intValue)) {
     281          if(int.TryParse(strToken, NumberStyles.Integer, numberFormatInfo, out token.intValue)) {
    272282            token.type = TokenTypeEnum.Int;
    273283            return token;
    274           } else if(double.TryParse(strToken, NumberStyles.Float, currentNumberFormatInfo, out token.doubleValue)) {
     284          } else if(double.TryParse(strToken, NumberStyles.Float, numberFormatInfo, out token.doubleValue)) {
    275285            token.type = TokenTypeEnum.Double;
    276286            return token;
    277287          }
    278           // try german culture
    279           currentNumberFormatInfo = CultureInfo.GetCultureInfo("de-DE").NumberFormat;
    280           if(int.TryParse(strToken, NumberStyles.Integer, currentNumberFormatInfo, out token.intValue)) {
    281             token.type = TokenTypeEnum.Int;
    282             return token;
    283           } else if(double.TryParse(strToken, NumberStyles.Float, currentNumberFormatInfo, out token.doubleValue)) {
    284             token.type = TokenTypeEnum.Double;
    285             return token;
    286           }
    287 
    288           // try current culture
    289           currentNumberFormatInfo = CultureInfo.CurrentCulture.NumberFormat;
    290           if(int.TryParse(strToken, NumberStyles.Integer, currentNumberFormatInfo, out token.intValue)) {
    291             token.type = TokenTypeEnum.Int;
    292             return token;
    293           } else if(double.TryParse(strToken, NumberStyles.Float, currentNumberFormatInfo, out token.doubleValue)) {
    294             token.type = TokenTypeEnum.Double;
    295             return token;
    296           }
    297 
    298           // nothing worked
     288          // couldn't parse the token as an int or float number
    299289          return token;
    300290        }
Note: See TracChangeset for help on using the changeset viewer.