Free cookie consent management tool by TermsFeed Policy Generator

source: branches/3040_VectorBasedGP/HeuristicLab.Problems.Instances.DataAnalysis/3.3/DataAnalysisInstanceProvider.cs @ 17874

Last change on this file since 17874 was 17785, checked in by pfleck, 4 years ago

#3040 Made vector separator symbol configurable in the CSV import dialog.

File size: 4.5 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections;
24using System.Collections.Generic;
25using System.ComponentModel;
26using System.Globalization;
27using System.IO;
28using System.Linq;
29using System.Text;
30using HeuristicLab.Problems.DataAnalysis;
31using HeuristicLab.Random;
32
33namespace HeuristicLab.Problems.Instances.DataAnalysis {
34  public abstract class DataAnalysisInstanceProvider<TData, ImportType> : ProblemInstanceProvider<TData>
35    where TData : class, IDataAnalysisProblemData
36    where ImportType : DataAnalysisImportType {
37
38    public event ProgressChangedEventHandler ProgressChanged;
39
40    public TData ImportData(string path, ImportType type, DataAnalysisCSVFormat csvFormat) {
41      TableFileParser csvFileParser = new TableFileParser();
42      csvFileParser.Encoding = csvFormat.Encoding;
43      long fileSize = new FileInfo(path).Length;
44      csvFileParser.ProgressChanged += (sender, e) => {
45        OnProgressChanged(e / (double)fileSize);
46      };
47      var formatOptions = new TableFileFormatOptions {
48        NumberFormat = csvFormat.NumberFormatInfo,
49        DateTimeFormat = csvFormat.DateTimeFormatInfo,
50        ColumnSeparator = csvFormat.Separator,
51        VectorSeparator = csvFormat.VectorSeparator
52      };
53      csvFileParser.Parse(path, formatOptions, csvFormat.VariableNamesAvailable);
54      return ImportData(path, type, csvFileParser);
55    }
56
57    protected virtual void OnProgressChanged(double d) {
58      var handler = ProgressChanged;
59      if (handler != null)
60        handler(this, new ProgressChangedEventArgs((int)(100 * d), null));
61    }
62
63    protected virtual TData ImportData(string path, ImportType type, TableFileParser csvFileParser) {
64      throw new NotSupportedException();
65    }
66
67    protected List<IList> Shuffle(List<IList> values) {
68      int count = values.First().Count;
69      int[] indices = Enumerable.Range(0, count).Shuffle(new FastRandom()).ToArray();
70      List<IList> shuffled = new List<IList>(values.Count);
71      for (int col = 0; col < values.Count; col++) {
72
73        if (values[col] is List<double>)
74          shuffled.Add(new List<double>());
75        else if (values[col] is List<DateTime>)
76          shuffled.Add(new List<DateTime>());
77        else if (values[col] is List<string>)
78          shuffled.Add(new List<string>());
79        else
80          throw new InvalidOperationException();
81
82        for (int i = 0; i < count; i++) {
83          shuffled[col].Add(values[col][indices[i]]);
84        }
85      }
86      return shuffled;
87    }
88
89    public override bool CanExportData {
90      get { return true; }
91    }
92    public override void ExportData(TData instance, string path) {
93      var strBuilder = new StringBuilder();
94      var colSep = CultureInfo.CurrentCulture.TextInfo.ListSeparator;
95      foreach (var variable in instance.Dataset.VariableNames) {
96        strBuilder.Append(variable.Replace(colSep, String.Empty) + colSep);
97      }
98      strBuilder.Remove(strBuilder.Length - colSep.Length, colSep.Length);
99      strBuilder.AppendLine();
100
101      var dataset = instance.Dataset;
102
103      for (int i = 0; i < dataset.Rows; i++) {
104        for (int j = 0; j < dataset.Columns; j++) {
105          if (j > 0) strBuilder.Append(colSep);
106          strBuilder.Append(dataset.GetValue(i, j));
107        }
108        strBuilder.AppendLine();
109      }
110      using (var fileStream = new FileStream(path, FileMode.Create)) {
111        Encoding encoding = Encoding.GetEncoding(Encoding.Default.CodePage,
112          new EncoderReplacementFallback("*"),
113          new DecoderReplacementFallback("*"));
114        using (var writer = new StreamWriter(fileStream, encoding)) {
115          writer.Write(strBuilder);
116        }
117      }
118    }
119  }
120}
Note: See TracBrowser for help on using the repository browser.