Changeset 13584


Ignore:
Timestamp:
02/03/16 12:03:11 (4 years ago)
Author:
mkommend
Message:

#2071: Added possibility to specify the encoding when importing files with the table file parser and changed export functionality to use the system's default encoding for creating CSV files.

Location:
trunk/sources
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis.Views/3.3/ClassificationImportTypeDialog.Designer.cs

    r12012 r13584  
    8787      this.ProblemDataSettingsGroupBox.Controls.Add(this.TargetVariableComboBox);
    8888      this.ProblemDataSettingsGroupBox.Controls.Add(this.UniformDistributionOfClassesCheckbox);
    89       this.ProblemDataSettingsGroupBox.Size = new System.Drawing.Size(447, 312);
     89      this.ProblemDataSettingsGroupBox.Size = new System.Drawing.Size(447, 285);
    9090      this.ProblemDataSettingsGroupBox.Controls.SetChildIndex(this.UniformDistributionOfClassesCheckbox, 0);
    9191      this.ProblemDataSettingsGroupBox.Controls.SetChildIndex(this.TargetVariableComboBox, 0);
     
    110110      //
    111111      this.PreviewDatasetMatrix.Location = new System.Drawing.Point(6, 134);
    112       this.PreviewDatasetMatrix.Size = new System.Drawing.Size(435, 172);
     112      this.PreviewDatasetMatrix.Size = new System.Drawing.Size(435, 145);
    113113      //
    114114      // PreviewLabel
     
    118118      // TargetVariableComboBox
    119119      //
    120       this.TargetVariableComboBox.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) 
     120      this.TargetVariableComboBox.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
    121121            | System.Windows.Forms.AnchorStyles.Right)));
    122122      this.TargetVariableComboBox.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis.Views/3.3/DataAnalysisImportTypeDialog.Designer.cs

    r12012 r13584  
    6363      this.SeparatorComboBox = new System.Windows.Forms.ComboBox();
    6464      this.CSVSettingsGroupBox = new System.Windows.Forms.GroupBox();
     65      this.EncodingInfoLabel = new System.Windows.Forms.Label();
     66      this.EncodingLabel = new System.Windows.Forms.Label();
     67      this.EncodingComboBox = new System.Windows.Forms.ComboBox();
     68      this.CheckboxColumnNames = new System.Windows.Forms.CheckBox();
    6569      this.DateTimeFormatInfoLabel = new System.Windows.Forms.Label();
    6670      this.DecimalSeparatorInfoLabel = new System.Windows.Forms.Label();
     
    7276      this.PreviewDatasetMatrix = new HeuristicLab.Data.Views.StringConvertibleMatrixView();
    7377      this.ToolTip = new System.Windows.Forms.ToolTip(this.components);
    74       this.CheckboxColumnNames = new System.Windows.Forms.CheckBox();
    7578      ((System.ComponentModel.ISupportInitialize)(this.TrainingTestTrackBar)).BeginInit();
    7679      this.CSVSettingsGroupBox.SuspendLayout();
     
    249252      this.CSVSettingsGroupBox.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
    250253            | System.Windows.Forms.AnchorStyles.Right)));
     254      this.CSVSettingsGroupBox.Controls.Add(this.EncodingInfoLabel);
     255      this.CSVSettingsGroupBox.Controls.Add(this.EncodingLabel);
     256      this.CSVSettingsGroupBox.Controls.Add(this.EncodingComboBox);
    251257      this.CSVSettingsGroupBox.Controls.Add(this.CheckboxColumnNames);
    252258      this.CSVSettingsGroupBox.Controls.Add(this.DateTimeFormatInfoLabel);
     
    261267      this.CSVSettingsGroupBox.Location = new System.Drawing.Point(12, 32);
    262268      this.CSVSettingsGroupBox.Name = "CSVSettingsGroupBox";
    263       this.CSVSettingsGroupBox.Size = new System.Drawing.Size(447, 126);
     269      this.CSVSettingsGroupBox.Size = new System.Drawing.Size(447, 153);
    264270      this.CSVSettingsGroupBox.TabIndex = 16;
    265271      this.CSVSettingsGroupBox.TabStop = false;
    266272      this.CSVSettingsGroupBox.Text = "CSV Settings";
     273      //
     274      // EncodingInfoLabel
     275      //
     276      this.EncodingInfoLabel.Anchor = ((System.Windows.Forms.AnchorStyles)((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Right)));
     277      this.EncodingInfoLabel.Image = HeuristicLab.Common.Resources.VSImageLibrary.Information;
     278      this.EncodingInfoLabel.Location = new System.Drawing.Point(421, 102);
     279      this.EncodingInfoLabel.Name = "EncodingInfoLabel";
     280      this.EncodingInfoLabel.Size = new System.Drawing.Size(16, 16);
     281      this.EncodingInfoLabel.TabIndex = 27;
     282      this.EncodingInfoLabel.Tag = "Select the encoding the file was saved with.";
     283      this.ToolTip.SetToolTip(this.EncodingInfoLabel, "Select the encoding the file was saved with.");
     284      this.EncodingInfoLabel.DoubleClick += new System.EventHandler(this.ControlToolTip_DoubleClick);
     285      //
     286      // EncodingLabel
     287      //
     288      this.EncodingLabel.AutoSize = true;
     289      this.EncodingLabel.Location = new System.Drawing.Point(6, 103);
     290      this.EncodingLabel.Name = "EncodingLabel";
     291      this.EncodingLabel.Size = new System.Drawing.Size(52, 13);
     292      this.EncodingLabel.TabIndex = 26;
     293      this.EncodingLabel.Text = "Encoding";
     294      //
     295      // EncodingComboBox
     296      //
     297      this.EncodingComboBox.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
     298            | System.Windows.Forms.AnchorStyles.Right)));
     299      this.EncodingComboBox.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
     300      this.EncodingComboBox.Enabled = false;
     301      this.EncodingComboBox.FormattingEnabled = true;
     302      this.EncodingComboBox.Location = new System.Drawing.Point(111, 100);
     303      this.EncodingComboBox.Name = "EncodingComboBox";
     304      this.EncodingComboBox.Size = new System.Drawing.Size(300, 21);
     305      this.EncodingComboBox.TabIndex = 25;
     306      this.EncodingComboBox.SelectionChangeCommitted += new System.EventHandler(this.CSVFormatComboBoxSelectionChangeCommitted);
     307      //
     308      // CheckboxColumnNames
     309      //
     310      this.CheckboxColumnNames.AutoSize = true;
     311      this.CheckboxColumnNames.Location = new System.Drawing.Point(9, 127);
     312      this.CheckboxColumnNames.Name = "CheckboxColumnNames";
     313      this.CheckboxColumnNames.Size = new System.Drawing.Size(144, 17);
     314      this.CheckboxColumnNames.TabIndex = 24;
     315      this.CheckboxColumnNames.Text = "Column names in first line";
     316      this.CheckboxColumnNames.UseVisualStyleBackColor = true;
     317      this.CheckboxColumnNames.CheckedChanged += new System.EventHandler(this.CheckboxColumnNames_CheckedChanged);
    267318      //
    268319      // DateTimeFormatInfoLabel
     
    315366      this.ProblemDataSettingsGroupBox.Controls.Add(this.TrainingTestTrackBar);
    316367      this.ProblemDataSettingsGroupBox.Controls.Add(this.ShuffleDataCheckbox);
    317       this.ProblemDataSettingsGroupBox.Location = new System.Drawing.Point(12, 164);
     368      this.ProblemDataSettingsGroupBox.Location = new System.Drawing.Point(12, 191);
    318369      this.ProblemDataSettingsGroupBox.Name = "ProblemDataSettingsGroupBox";
    319       this.ProblemDataSettingsGroupBox.Size = new System.Drawing.Size(447, 252);
     370      this.ProblemDataSettingsGroupBox.Size = new System.Drawing.Size(447, 225);
    320371      this.ProblemDataSettingsGroupBox.TabIndex = 17;
    321372      this.ProblemDataSettingsGroupBox.TabStop = false;
     
    365416      this.PreviewDatasetMatrix.ShowRowsAndColumnsTextBox = false;
    366417      this.PreviewDatasetMatrix.ShowStatisticalInformation = false;
    367       this.PreviewDatasetMatrix.Size = new System.Drawing.Size(435, 138);
     418      this.PreviewDatasetMatrix.Size = new System.Drawing.Size(435, 111);
    368419      this.PreviewDatasetMatrix.TabIndex = 0;
    369       //
    370       // CheckboxColumnNames
    371       //
    372       this.CheckboxColumnNames.AutoSize = true;
    373       this.CheckboxColumnNames.Location = new System.Drawing.Point(9, 103);
    374       this.CheckboxColumnNames.Name = "CheckboxColumnNames";
    375       this.CheckboxColumnNames.Size = new System.Drawing.Size(144, 17);
    376       this.CheckboxColumnNames.TabIndex = 24;
    377       this.CheckboxColumnNames.Text = "Column names in first line";
    378       this.CheckboxColumnNames.UseVisualStyleBackColor = true;
    379       this.CheckboxColumnNames.CheckedChanged += new System.EventHandler(this.CheckboxColumnNames_CheckedChanged);
    380420      //
    381421      // DataAnalysisImportTypeDialog
     
    437477    protected System.Windows.Forms.ToolTip ToolTip;
    438478    private System.Windows.Forms.CheckBox CheckboxColumnNames;
     479    protected System.Windows.Forms.Label EncodingInfoLabel;
     480    protected System.Windows.Forms.Label EncodingLabel;
     481    protected System.Windows.Forms.ComboBox EncodingComboBox;
    439482  }
    440483}
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis.Views/3.3/DataAnalysisImportTypeDialog.cs

    r13413 r13584  
    2525using System.IO;
    2626using System.Linq;
     27using System.Text;
    2728using System.Windows.Forms;
    2829using HeuristicLab.Core.Views;
     
    3233  public partial class DataAnalysisImportTypeDialog : Form {
    3334
    34     public static readonly List<KeyValuePair<DateTimeFormatInfo, string>> dateTimeFormats = new List<KeyValuePair<DateTimeFormatInfo, string>>{
    35       new KeyValuePair<DateTimeFormatInfo, string>(DateTimeFormatInfo.GetInstance(new CultureInfo("de-DE")), "dd/mm/yyyy hh:MM:ss" ),
    36       new KeyValuePair<DateTimeFormatInfo, string>(DateTimeFormatInfo.InvariantInfo, "mm/dd/yyyy hh:MM:ss" ),
    37       new KeyValuePair<DateTimeFormatInfo, string>(DateTimeFormatInfo.InvariantInfo, "yyyy/mm/dd hh:MM:ss" ),
    38       new KeyValuePair<DateTimeFormatInfo, string>(DateTimeFormatInfo.InvariantInfo, "mm/yyyy/dd hh:MM:ss" )
     35    private static readonly List<KeyValuePair<DateTimeFormatInfo, string>> dateTimeFormats =
     36      new List<KeyValuePair<DateTimeFormatInfo, string>>{
     37        new KeyValuePair<DateTimeFormatInfo, string>(DateTimeFormatInfo.GetInstance(new CultureInfo("de-DE")), "dd/mm/yyyy hh:MM:ss" ),
     38        new KeyValuePair<DateTimeFormatInfo, string>(DateTimeFormatInfo.InvariantInfo, "mm/dd/yyyy hh:MM:ss" ),
     39        new KeyValuePair<DateTimeFormatInfo, string>(DateTimeFormatInfo.InvariantInfo, "yyyy/mm/dd hh:MM:ss" ),
     40        new KeyValuePair<DateTimeFormatInfo, string>(DateTimeFormatInfo.InvariantInfo, "mm/yyyy/dd hh:MM:ss" )
    3941    };
    4042
    41     public static readonly List<KeyValuePair<char, string>> POSSIBLE_SEPARATORS = new List<KeyValuePair<char, string>>{ 
    42       new KeyValuePair<char, string>(';', "; (Semicolon)" ),
    43       new KeyValuePair<char, string>(',', ", (Comma)" ),   
    44       new KeyValuePair<char, string>('\t', "\\t (Tab)"),
    45       new KeyValuePair<char, string>((char)0, "all whitespaces (including tabs and spaces)")
     43    private static readonly List<KeyValuePair<char, string>> POSSIBLE_SEPARATORS =
     44      new List<KeyValuePair<char, string>>{ 
     45        new KeyValuePair<char, string>(';', "; (Semicolon)" ),
     46        new KeyValuePair<char, string>(',', ", (Comma)" ),   
     47        new KeyValuePair<char, string>('\t', "\\t (Tab)"),
     48        new KeyValuePair<char, string>((char)0, "all whitespaces (including tabs and spaces)")
    4649    };
    4750
    48     public static readonly List<KeyValuePair<NumberFormatInfo, string>> POSSIBLE_DECIMAL_SEPARATORS = new List<KeyValuePair<NumberFormatInfo, string>>{
    49       new KeyValuePair<NumberFormatInfo, string>(NumberFormatInfo.GetInstance(new CultureInfo("de-DE")), ", (Comma)"),
    50       new KeyValuePair<NumberFormatInfo, string>(NumberFormatInfo.InvariantInfo, ". (Period)" )   
     51    private static readonly List<KeyValuePair<NumberFormatInfo, string>> POSSIBLE_DECIMAL_SEPARATORS =
     52      new List<KeyValuePair<NumberFormatInfo, string>>{
     53        new KeyValuePair<NumberFormatInfo, string>(NumberFormatInfo.GetInstance(new CultureInfo("de-DE")), ", (Comma)"),
     54        new KeyValuePair<NumberFormatInfo, string>(NumberFormatInfo.InvariantInfo, ". (Period)" )   
    5155    };
     56
     57    private static readonly List<KeyValuePair<Encoding, string>> POSSIBLE_ENCODINGS =
     58      new List<KeyValuePair<Encoding, string>> {
     59        new KeyValuePair<Encoding, string>(Encoding.Default, "Default"),
     60        new KeyValuePair<Encoding, string>(Encoding.ASCII, "ASCII"),
     61        new KeyValuePair<Encoding, string>(Encoding.Unicode, "Unicode"),   
     62        new KeyValuePair<Encoding, string>(Encoding.UTF8, "UTF8")       
     63      };
    5264
    5365    public string Path {
     
    8799      DateTimeFormatComboBox.ValueMember = "Key";
    88100      DateTimeFormatComboBox.DisplayMember = "Value";
     101      EncodingComboBox.DataSource = POSSIBLE_ENCODINGS;
     102      EncodingComboBox.ValueMember = "Key";
     103      EncodingComboBox.DisplayMember = "Value";
     104
    89105    }
    90106
     
    100116      DecimalSeparatorComboBox.Enabled = true;
    101117      DateTimeFormatComboBox.Enabled = true;
     118      EncodingComboBox.Enabled = true;
    102119      ProblemTextBox.Text = openFileDialog.FileName;
    103120      TableFileParser csvParser = new TableFileParser();
     
    125142      try {
    126143        TableFileParser csvParser = new TableFileParser();
     144        csvParser.Encoding = (Encoding)EncodingComboBox.SelectedValue;
    127145        csvParser.Parse(ProblemTextBox.Text,
    128146                        (NumberFormatInfo)DecimalSeparatorComboBox.SelectedValue,
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis.Views/3.3/RegressionImportTypeDialog.Designer.cs

    r12012 r13584  
    6666      this.ProblemDataSettingsGroupBox.Controls.Add(this.TargetVariableLabel);
    6767      this.ProblemDataSettingsGroupBox.Controls.Add(this.TargetVariableComboBox);
    68       this.ProblemDataSettingsGroupBox.Size = new System.Drawing.Size(447, 251);
    6968      this.ProblemDataSettingsGroupBox.Controls.SetChildIndex(this.PreviewLabel, 0);
    7069      this.ProblemDataSettingsGroupBox.Controls.SetChildIndex(this.ShuffelInfoLabel, 0);
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis.Views/3.3/TimeSeriesPrognosisImportTypeDialog.Designer.cs

    r12012 r13584  
    5858      this.ShuffleDataCheckbox.Enabled = false;
    5959      this.ShuffleDataCheckbox.Visible = false;
    60       //
    61       // OkButton
    62       //
    63       this.OkButton.Location = new System.Drawing.Point(303, 407);
    64       //
    65       // CancelationButton
    66       //
    67       this.CancelationButton.Location = new System.Drawing.Point(384, 407);
     60
    6861      //
    6962      // ProblemDataSettingsGroupBox
     
    7265      this.ProblemDataSettingsGroupBox.Controls.Add(this.TargetVariableComboBox);
    7366      this.ProblemDataSettingsGroupBox.Controls.Add(this.TargetVariableLabel);
    74       this.ProblemDataSettingsGroupBox.Size = new System.Drawing.Size(447, 237);
    7567      this.ProblemDataSettingsGroupBox.Controls.SetChildIndex(this.ShuffleDataCheckbox, 0);
    7668      this.ProblemDataSettingsGroupBox.Controls.SetChildIndex(this.TargetVariableLabel, 0);
     
    8981      this.ErrorTextBox.Location = new System.Drawing.Point(6, 19);
    9082      this.ErrorTextBox.Size = new System.Drawing.Size(435, 69);
    91       //
    92       // PreviewDatasetMatrix
    93       //
    94       this.PreviewDatasetMatrix.Size = new System.Drawing.Size(435, 123);
    9583      //
    9684      // SeparatorInfoLabel
     
    114102      // TargetVariableComboBox
    115103      //
    116       this.TargetVariableComboBox.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left) 
     104      this.TargetVariableComboBox.Anchor = ((System.Windows.Forms.AnchorStyles)(((System.Windows.Forms.AnchorStyles.Top | System.Windows.Forms.AnchorStyles.Left)
    117105            | System.Windows.Forms.AnchorStyles.Right)));
    118106      this.TargetVariableComboBox.DropDownStyle = System.Windows.Forms.ComboBoxStyle.DropDownList;
     
    148136      this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
    149137      this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
    150       this.ClientSize = new System.Drawing.Size(471, 442);
     138      this.ClientSize = new System.Drawing.Size(471, 457);
    151139      this.Name = "TimeSeriesPrognosisImportTypeDialog";
    152140      this.Text = "TimeSeries Prognosis CSV Import";
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/DataAnalysisInstanceProvider.cs

    r13414 r13584  
    2424using System.Collections.Generic;
    2525using System.ComponentModel;
    26 using System.Drawing;
    2726using System.Globalization;
    2827using System.IO;
     
    5251      var handler = ProgressChanged;
    5352      if (handler != null)
    54         handler(this, new ProgressChangedEventArgs((int)(100*d), null));
     53        handler(this, new ProgressChangedEventArgs((int)(100 * d), null));
    5554    }
    5655
     
    102101        strBuilder.AppendLine();
    103102      }
    104 
    105       using (var writer = new StreamWriter(path)) {
    106         writer.Write(strBuilder);
     103      using (var fileStream = new FileStream(path, FileMode.Create)) {
     104        using (var writer = new StreamWriter(fileStream, Encoding.Default)) {
     105          writer.Write(strBuilder);
     106        }
    107107      }
    108108    }
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/TableFileParser.cs

    r13526 r13584  
    2828using System.IO;
    2929using System.Linq;
    30 using System.Runtime;
    3130using System.Runtime.Serialization;
    3231using System.Text;
     
    4140    private int estimatedNumberOfLines = 200; // initial capacity for columns, will be set automatically when data is read from a file
    4241
     42
     43    private Encoding encoding = Encoding.Default;
     44
     45    public Encoding Encoding {
     46      get { return encoding; }
     47      set {
     48        if (value == null) throw new ArgumentNullException("Encoding");
     49        encoding = value;
     50      }
     51    }
     52
     53
    4354    private int rows;
    4455    public int Rows {
     
    104115    public bool AreColumnNamesInFirstLine(Stream stream, NumberFormatInfo numberFormat,
    105116                                          DateTimeFormatInfo dateTimeFormatInfo, char separator) {
    106       using (StreamReader reader = new StreamReader(stream)) {
     117      using (StreamReader reader = new StreamReader(stream, Encoding)) {
    107118        tokenizer = new Tokenizer(reader, numberFormat, dateTimeFormatInfo, separator);
    108119        return (tokenizer.PeekType() != TokenTypeEnum.Double);
     
    143154      var len = new System.IO.FileInfo(fileName).Length;
    144155      var buf = new char[1024 * 1024];
    145       using (var reader = new StreamReader(fileName)) {
     156      using (var reader = new StreamReader(fileName, Encoding)) {
    146157        reader.ReadBlock(buf, 0, buf.Length);
    147158      }
     
    187198    /// <param name="columnNamesInFirstLine"></param>
    188199    public void Parse(Stream stream, NumberFormatInfo numberFormat, DateTimeFormatInfo dateTimeFormatInfo, char separator, bool columnNamesInFirstLine, int lineLimit = -1) {
    189       using (StreamReader reader = new StreamReader(stream)) {
     200      using (StreamReader reader = new StreamReader(stream, Encoding)) {
    190201        tokenizer = new Tokenizer(reader, numberFormat, dateTimeFormatInfo, separator);
    191202        values = new List<IList>();
     
    408419              .Where(c => OccurrencesOf(charCounts, c) > 10)
    409420              .OrderBy(c => -OccurrencesOf(charCounts, c))
    410               .DefaultIfEmpty(' ') 
     421              .DefaultIfEmpty(' ')
    411422              .First();
    412423          }
     
    505516          try {
    506517            BytesRead = reader.BaseStream.Position;
    507           } catch (IOException) {
     518          }
     519          catch (IOException) {
    508520            BytesRead += CurrentLine.Length + 2; // guess
    509           } catch (NotSupportedException) {
     521          }
     522          catch (NotSupportedException) {
    510523            BytesRead += CurrentLine.Length + 2;
    511524          }
Note: See TracChangeset for help on using the changeset viewer.