Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
02/03/16 12:03:11 (8 years ago)
Author:
mkommend
Message:

#2071: Added possibility to specify the encoding when importing files with the table file parser and changed export functionality to use the system's default encoding for creating CSV files.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/TableFileParser.cs

    r13526 r13584  
    2828using System.IO;
    2929using System.Linq;
    30 using System.Runtime;
    3130using System.Runtime.Serialization;
    3231using System.Text;
     
    4140    private int estimatedNumberOfLines = 200; // initial capacity for columns, will be set automatically when data is read from a file
    4241
     42
     43    private Encoding encoding = Encoding.Default;
     44
     45    public Encoding Encoding {
     46      get { return encoding; }
     47      set {
     48        if (value == null) throw new ArgumentNullException("Encoding");
     49        encoding = value;
     50      }
     51    }
     52
     53
    4354    private int rows;
    4455    public int Rows {
     
    104115    public bool AreColumnNamesInFirstLine(Stream stream, NumberFormatInfo numberFormat,
    105116                                          DateTimeFormatInfo dateTimeFormatInfo, char separator) {
    106       using (StreamReader reader = new StreamReader(stream)) {
     117      using (StreamReader reader = new StreamReader(stream, Encoding)) {
    107118        tokenizer = new Tokenizer(reader, numberFormat, dateTimeFormatInfo, separator);
    108119        return (tokenizer.PeekType() != TokenTypeEnum.Double);
     
    143154      var len = new System.IO.FileInfo(fileName).Length;
    144155      var buf = new char[1024 * 1024];
    145       using (var reader = new StreamReader(fileName)) {
     156      using (var reader = new StreamReader(fileName, Encoding)) {
    146157        reader.ReadBlock(buf, 0, buf.Length);
    147158      }
     
    187198    /// <param name="columnNamesInFirstLine"></param>
    188199    public void Parse(Stream stream, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator, bool columnNamesInFirstLine, int lineLimit = -1) {
    189       using (StreamReader reader = new StreamReader(stream)) {
     200      using (StreamReader reader = new StreamReader(stream, Encoding)) {
    190201        tokenizer = new Tokenizer(reader, numberFormat, dateTimeFormatInfo, separator);
    191202        values = new List<IList>();
     
    408419              .Where(c => OccurrencesOf(charCounts, c) > 10)
    409420              .OrderBy(c => -OccurrencesOf(charCounts, c))
    410               .DefaultIfEmpty(' ') 
     421              .DefaultIfEmpty(' ')
    411422              .First();
    412423          }
     
    505516          try {
    506517            BytesRead = reader.BaseStream.Position;
    507           } catch (IOException) {
     518          }
     519          catch (IOException) {
    508520            BytesRead += CurrentLine.Length + 2; // guess
    509           } catch (NotSupportedException) {
     521          }
     522          catch (NotSupportedException) {
    510523            BytesRead += CurrentLine.Length + 2;
    511524          }
Note: See TracChangeset for help on using the changeset viewer.