Free cookie consent management tool by TermsFeed Policy Generator

source: branches/3040_VectorBasedGP/HeuristicLab.Problems.Instances.DataAnalysis.Views/3.3/DataAnalysisImportDialog.cs @ 17579

Last change on this file since 17579 was 17414, checked in by pfleck, 5 years ago

#3040 Started adding UCI time series regression benchmarks.
Adapted parser (extracted format options & added parsing for double vectors).

File size: 9.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Globalization;
25using System.IO;
26using System.Linq;
27using System.Text;
28using System.Windows.Forms;
29using HeuristicLab.Core.Views;
30using HeuristicLab.Problems.DataAnalysis;
31
32namespace HeuristicLab.Problems.Instances.DataAnalysis.Views {
33  public partial class DataAnalysisImportDialog : Form {
34
35    private static readonly List<KeyValuePair<DateTimeFormatInfo, string>> POSSIBLE_DATETIME_FORMATS =
36      new List<KeyValuePair<DateTimeFormatInfo, string>>{
37        new KeyValuePair<DateTimeFormatInfo, string>(DateTimeFormatInfo.GetInstance(new CultureInfo("de-DE")), "dd/mm/yyyy hh:MM:ss" ),
38        new KeyValuePair<DateTimeFormatInfo, string>(DateTimeFormatInfo.InvariantInfo, "mm/dd/yyyy hh:MM:ss" ),
39        new KeyValuePair<DateTimeFormatInfo, string>(DateTimeFormatInfo.InvariantInfo, "yyyy/mm/dd hh:MM:ss" ),
40        new KeyValuePair<DateTimeFormatInfo, string>(DateTimeFormatInfo.InvariantInfo, "mm/yyyy/dd hh:MM:ss" )
41    };
42
43    private static readonly List<KeyValuePair<char, string>> POSSIBLE_SEPARATORS =
44      new List<KeyValuePair<char, string>>{
45        new KeyValuePair<char, string>(';', "; (Semicolon)" ),
46        new KeyValuePair<char, string>(',', ", (Comma)" ),
47        new KeyValuePair<char, string>('\t', "\\t (Tab)"),
48        new KeyValuePair<char, string>((char)0, "all whitespaces (including tabs and spaces)")
49    };
50
51    private static readonly List<KeyValuePair<NumberFormatInfo, string>> POSSIBLE_DECIMAL_SEPARATORS =
52      new List<KeyValuePair<NumberFormatInfo, string>>{
53        new KeyValuePair<NumberFormatInfo, string>(NumberFormatInfo.GetInstance(new CultureInfo("de-DE")), ", (Comma)"),
54        new KeyValuePair<NumberFormatInfo, string>(NumberFormatInfo.InvariantInfo, ". (Period)" )
55    };
56
57    private static readonly List<KeyValuePair<Encoding, string>> POSSIBLE_ENCODINGS =
58      new List<KeyValuePair<Encoding, string>> {
59        new KeyValuePair<Encoding, string>(Encoding.Default, "Default"),
60        new KeyValuePair<Encoding, string>(Encoding.ASCII, "ASCII"),
61        new KeyValuePair<Encoding, string>(Encoding.Unicode, "Unicode"),
62        new KeyValuePair<Encoding, string>(Encoding.UTF8, "UTF8")
63      };
64
65    public string Path {
66      get { return ProblemTextBox.Text; }
67    }
68
69    public DataAnalysisImportType ImportType {
70      get {
71        return new DataAnalysisImportType() {
72          Shuffle = ShuffleDataCheckbox.Checked,
73          TrainingPercentage = TrainingTestTrackBar.Value
74        };
75      }
76    }
77
78    public DataAnalysisCSVFormat CSVFormat {
79      get {
80        return new DataAnalysisCSVFormat() {
81          Separator = (char)SeparatorComboBox.SelectedValue,
82          NumberFormatInfo = (NumberFormatInfo)DecimalSeparatorComboBox.SelectedValue,
83          DateTimeFormatInfo = (DateTimeFormatInfo)DateTimeFormatComboBox.SelectedValue,
84          VariableNamesAvailable = CheckboxColumnNames.Checked,
85          Encoding = (Encoding)EncodingComboBox.SelectedValue
86        };
87      }
88    }
89
90    public DataAnalysisImportDialog() {
91      InitializeComponent();
92
93      SeparatorComboBox.DataSource = POSSIBLE_SEPARATORS;
94      SeparatorComboBox.ValueMember = "Key";
95      SeparatorComboBox.DisplayMember = "Value";
96      DecimalSeparatorComboBox.DataSource = POSSIBLE_DECIMAL_SEPARATORS;
97      DecimalSeparatorComboBox.ValueMember = "Key";
98      DecimalSeparatorComboBox.DisplayMember = "Value";
99      DateTimeFormatComboBox.DataSource = POSSIBLE_DATETIME_FORMATS;
100      DateTimeFormatComboBox.ValueMember = "Key";
101      DateTimeFormatComboBox.DisplayMember = "Value";
102      EncodingComboBox.DataSource = POSSIBLE_ENCODINGS;
103      EncodingComboBox.ValueMember = "Key";
104      EncodingComboBox.DisplayMember = "Value";
105
106
107      // set default values based on the current culture
108      var separator = POSSIBLE_SEPARATORS.Where(n => n.Value.Substring(0, 1) == CultureInfo.CurrentCulture.TextInfo.ListSeparator);
109      if (separator.Any())
110        SeparatorComboBox.SelectedItem = separator.First();
111
112      var decimalSeparator = POSSIBLE_DECIMAL_SEPARATORS.Where(n => n.Value.Substring(0, 1) == CultureInfo.CurrentCulture.NumberFormat.CurrencyDecimalSeparator);
113      if (decimalSeparator.Any())
114        DecimalSeparatorComboBox.SelectedItem = decimalSeparator.First();
115    }
116
117    private void TrainingTestTrackBar_ValueChanged(object sender, System.EventArgs e) {
118      TrainingLabel.Text = "Training: " + TrainingTestTrackBar.Value + " %";
119      TestLabel.Text = "Test: " + (TrainingTestTrackBar.Maximum - TrainingTestTrackBar.Value) + " %";
120    }
121
122    protected virtual void OpenFileButtonClick(object sender, System.EventArgs e) {
123      if (openFileDialog.ShowDialog(this) != DialogResult.OK) return;
124
125      SeparatorComboBox.Enabled = true;
126      DecimalSeparatorComboBox.Enabled = true;
127      DateTimeFormatComboBox.Enabled = true;
128      EncodingComboBox.Enabled = true;
129      ProblemTextBox.Text = openFileDialog.FileName;
130      TableFileParser csvParser = new TableFileParser();
131      var formatOptions = new TableFileFormatOptions {
132        NumberFormat = (NumberFormatInfo)DecimalSeparatorComboBox.SelectedValue,
133        DateTimeFormat = (DateTimeFormatInfo)DateTimeFormatComboBox.SelectedValue,
134        ColumnSeparator = (char)SeparatorComboBox.SelectedValue
135      };
136      CheckboxColumnNames.Checked = csvParser.AreColumnNamesInFirstLine(ProblemTextBox.Text, formatOptions);
137      ParseCSVFile();
138    }
139
140    protected virtual void CSVFormatComboBoxSelectionChangeCommitted(object sender, EventArgs e) {
141      if (string.IsNullOrEmpty(ProblemTextBox.Text)) return;
142
143      ParseCSVFile();
144    }
145
146    protected virtual void CheckboxColumnNames_CheckedChanged(object sender, EventArgs e) {
147      if (string.IsNullOrEmpty(ProblemTextBox.Text)) return;
148
149      ParseCSVFile();
150    }
151
152    protected void ParseCSVFile() {
153      PreviewDatasetMatrix.Content = null;
154      try {
155        TableFileParser csvParser = new TableFileParser();
156        csvParser.Encoding = (Encoding)EncodingComboBox.SelectedValue;
157        var formatOptions = new TableFileFormatOptions {
158          NumberFormat = (NumberFormatInfo)DecimalSeparatorComboBox.SelectedValue,
159          DateTimeFormat = (DateTimeFormatInfo)DateTimeFormatComboBox.SelectedValue,
160          ColumnSeparator = (char)SeparatorComboBox.SelectedValue
161        };
162        csvParser.Parse(ProblemTextBox.Text, formatOptions, CheckboxColumnNames.Checked, lineLimit: 500);
163        IEnumerable<string> variableNamesWithType = GetVariableNamesWithType(csvParser);
164        PreviewDatasetMatrix.Content = new Dataset(variableNamesWithType, csvParser.Values);
165
166        CheckAdditionalConstraints(csvParser);
167
168        ErrorTextBox.Text = String.Empty;
169        ErrorTextBox.Visible = false;
170        OkButton.Enabled = true;
171      } catch (Exception ex) {
172        if (ex is IOException || ex is InvalidOperationException || ex is ArgumentException) {
173          OkButton.Enabled = false;
174          ErrorTextBox.Text = ex.Message;
175          ErrorTextBox.Visible = true;
176        } else {
177          throw;
178        }
179      }
180    }
181
182    protected virtual void CheckAdditionalConstraints(TableFileParser csvParser) {
183      if (!csvParser.Values.Any(x => x is List<double>)) {
184        throw new ArgumentException("No double column could be found!");
185      }
186    }
187
188    private IEnumerable<string> GetVariableNamesWithType(TableFileParser csvParser) {
189      IList<string> variableNamesWithType = csvParser.VariableNames.ToList();
190      for (int i = 0; i < csvParser.Values.Count; i++) {
191        if (csvParser.Values[i] is List<double>) {
192          variableNamesWithType[i] += " (Double)";
193        } else if (csvParser.Values[i] is List<string>) {
194          variableNamesWithType[i] += " (String)";
195        } else if (csvParser.Values[i] is List<DateTime>) {
196          variableNamesWithType[i] += " (DateTime)";
197        } else {
198          throw new ArgumentException("The variable values must be of type List<double>, List<string> or List<DateTime>");
199        }
200      }
201      return variableNamesWithType;
202    }
203
204    protected void ControlToolTip_DoubleClick(object sender, EventArgs e) {
205      Control control = sender as Control;
206      if (control != null) {
207        using (TextDialog dialog = new TextDialog(control.Name, (string)control.Tag, true)) {
208          dialog.ShowDialog(this);
209        }
210      }
211    }
212  }
213}
Note: See TracBrowser for help on using the repository browser.