#region License Information
/* HeuristicLab
* Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using HeuristicLab.Core.Views;
using HeuristicLab.Problems.DataAnalysis;
namespace HeuristicLab.Problems.Instances.DataAnalysis.Views {
public partial class DataAnalysisImportDialog : Form {
private static readonly List> POSSIBLE_DATETIME_FORMATS =
new List>{
new KeyValuePair(DateTimeFormatInfo.GetInstance(new CultureInfo("de-DE")), "dd/mm/yyyy hh:MM:ss" ),
new KeyValuePair(DateTimeFormatInfo.InvariantInfo, "mm/dd/yyyy hh:MM:ss" ),
new KeyValuePair(DateTimeFormatInfo.InvariantInfo, "yyyy/mm/dd hh:MM:ss" ),
new KeyValuePair(DateTimeFormatInfo.InvariantInfo, "mm/yyyy/dd hh:MM:ss" )
};
private static readonly List> POSSIBLE_SEPARATORS =
new List>{
new KeyValuePair(';', "; (Semicolon)" ),
new KeyValuePair(',', ", (Comma)" ),
new KeyValuePair('\t', "\\t (Tab)"),
new KeyValuePair((char)0, "all whitespaces (including tabs and spaces)")
};
private static readonly List> POSSIBLE_DECIMAL_SEPARATORS =
new List>{
new KeyValuePair(NumberFormatInfo.GetInstance(new CultureInfo("de-DE")), ", (Comma)"),
new KeyValuePair(NumberFormatInfo.InvariantInfo, ". (Period)" )
};
private static readonly List> POSSIBLE_ENCODINGS =
new List> {
new KeyValuePair(Encoding.Default, "Default"),
new KeyValuePair(Encoding.ASCII, "ASCII"),
new KeyValuePair(Encoding.Unicode, "Unicode"),
new KeyValuePair(Encoding.UTF8, "UTF8")
};
public string Path {
get { return ProblemTextBox.Text; }
}
public DataAnalysisImportType ImportType {
get {
return new DataAnalysisImportType() {
Shuffle = ShuffleDataCheckbox.Checked,
TrainingPercentage = TrainingTestTrackBar.Value
};
}
}
public DataAnalysisCSVFormat CSVFormat {
get {
return new DataAnalysisCSVFormat() {
Separator = (char)SeparatorComboBox.SelectedValue,
NumberFormatInfo = (NumberFormatInfo)DecimalSeparatorComboBox.SelectedValue,
DateTimeFormatInfo = (DateTimeFormatInfo)DateTimeFormatComboBox.SelectedValue,
VariableNamesAvailable = CheckboxColumnNames.Checked,
Encoding = (Encoding)EncodingComboBox.SelectedValue
};
}
}
public DataAnalysisImportDialog() {
InitializeComponent();
SeparatorComboBox.DataSource = POSSIBLE_SEPARATORS;
SeparatorComboBox.ValueMember = "Key";
SeparatorComboBox.DisplayMember = "Value";
DecimalSeparatorComboBox.DataSource = POSSIBLE_DECIMAL_SEPARATORS;
DecimalSeparatorComboBox.ValueMember = "Key";
DecimalSeparatorComboBox.DisplayMember = "Value";
DateTimeFormatComboBox.DataSource = POSSIBLE_DATETIME_FORMATS;
DateTimeFormatComboBox.ValueMember = "Key";
DateTimeFormatComboBox.DisplayMember = "Value";
EncodingComboBox.DataSource = POSSIBLE_ENCODINGS;
EncodingComboBox.ValueMember = "Key";
EncodingComboBox.DisplayMember = "Value";
// set default values based on the current culture
var separator = POSSIBLE_SEPARATORS.Where(n => n.Value.Substring(0, 1) == CultureInfo.CurrentCulture.TextInfo.ListSeparator);
if (separator.Any())
SeparatorComboBox.SelectedItem = separator.First();
var decimalSeparator = POSSIBLE_DECIMAL_SEPARATORS.Where(n => n.Value.Substring(0, 1) == CultureInfo.CurrentCulture.NumberFormat.CurrencyDecimalSeparator);
if (decimalSeparator.Any())
DecimalSeparatorComboBox.SelectedItem = decimalSeparator.First();
}
private void TrainingTestTrackBar_ValueChanged(object sender, System.EventArgs e) {
TrainingLabel.Text = "Training: " + TrainingTestTrackBar.Value + " %";
TestLabel.Text = "Test: " + (TrainingTestTrackBar.Maximum - TrainingTestTrackBar.Value) + " %";
}
protected virtual void OpenFileButtonClick(object sender, System.EventArgs e) {
if (openFileDialog.ShowDialog(this) != DialogResult.OK) return;
SeparatorComboBox.Enabled = true;
DecimalSeparatorComboBox.Enabled = true;
DateTimeFormatComboBox.Enabled = true;
EncodingComboBox.Enabled = true;
ProblemTextBox.Text = openFileDialog.FileName;
TableFileParser csvParser = new TableFileParser();
var formatOptions = new TableFileFormatOptions {
NumberFormat = (NumberFormatInfo)DecimalSeparatorComboBox.SelectedValue,
DateTimeFormat = (DateTimeFormatInfo)DateTimeFormatComboBox.SelectedValue,
ColumnSeparator = (char)SeparatorComboBox.SelectedValue
};
CheckboxColumnNames.Checked = csvParser.AreColumnNamesInFirstLine(ProblemTextBox.Text, formatOptions);
ParseCSVFile();
}
protected virtual void CSVFormatComboBoxSelectionChangeCommitted(object sender, EventArgs e) {
if (string.IsNullOrEmpty(ProblemTextBox.Text)) return;
ParseCSVFile();
}
protected virtual void CheckboxColumnNames_CheckedChanged(object sender, EventArgs e) {
if (string.IsNullOrEmpty(ProblemTextBox.Text)) return;
ParseCSVFile();
}
protected void ParseCSVFile() {
PreviewDatasetMatrix.Content = null;
try {
TableFileParser csvParser = new TableFileParser();
csvParser.Encoding = (Encoding)EncodingComboBox.SelectedValue;
var formatOptions = new TableFileFormatOptions {
NumberFormat = (NumberFormatInfo)DecimalSeparatorComboBox.SelectedValue,
DateTimeFormat = (DateTimeFormatInfo)DateTimeFormatComboBox.SelectedValue,
ColumnSeparator = (char)SeparatorComboBox.SelectedValue
};
csvParser.Parse(ProblemTextBox.Text, formatOptions, CheckboxColumnNames.Checked, lineLimit: 500);
IEnumerable variableNamesWithType = GetVariableNamesWithType(csvParser);
PreviewDatasetMatrix.Content = new Dataset(variableNamesWithType, csvParser.Values);
CheckAdditionalConstraints(csvParser);
ErrorTextBox.Text = String.Empty;
ErrorTextBox.Visible = false;
OkButton.Enabled = true;
} catch (Exception ex) {
if (ex is IOException || ex is InvalidOperationException || ex is ArgumentException) {
OkButton.Enabled = false;
ErrorTextBox.Text = ex.Message;
ErrorTextBox.Visible = true;
} else {
throw;
}
}
}
protected virtual void CheckAdditionalConstraints(TableFileParser csvParser) {
if (!csvParser.Values.Any(x => x is List)) {
throw new ArgumentException("No double column could be found!");
}
}
private IEnumerable GetVariableNamesWithType(TableFileParser csvParser) {
IList variableNamesWithType = csvParser.VariableNames.ToList();
for (int i = 0; i < csvParser.Values.Count; i++) {
if (csvParser.Values[i] is List) {
variableNamesWithType[i] += " (Double)";
} else if (csvParser.Values[i] is List) {
variableNamesWithType[i] += " (String)";
} else if (csvParser.Values[i] is List) {
variableNamesWithType[i] += " (DateTime)";
} else {
throw new ArgumentException("The variable values must be of type List, List or List");
}
}
return variableNamesWithType;
}
protected void ControlToolTip_DoubleClick(object sender, EventArgs e) {
Control control = sender as Control;
if (control != null) {
using (TextDialog dialog = new TextDialog(control.Name, (string)control.Tag, true)) {
dialog.ShowDialog(this);
}
}
}
}
}