Free cookie consent management tool by TermsFeed Policy Generator

source: branches/ProblemInstancesRegressionAndClassification/HeuristicLab.Problems.Instances.Regression/3.4/RegressionInstanceProvider.cs @ 7805

Last change on this file since 7805 was 7805, checked in by sforsten, 12 years ago

#1784: changes have been applied, according to the review comments of mkommend

File size: 3.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2012 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections;
24using System.Collections.Generic;
25using System.Globalization;
26using System.IO;
27using System.Linq;
28using System.Text;
29using HeuristicLab.Problems.DataAnalysis;
30
31namespace HeuristicLab.Problems.Instances.Regression {
32  public abstract class RegressionInstanceProvider : IProblemInstanceProvider<IRegressionProblemData> {
33
34    public IRegressionProblemData LoadData(string path) {
35      NumberFormatInfo numberFormat;
36      DateTimeFormatInfo dateFormat;
37      char separator;
38      TableFileParser.DetermineFileFormat(path, out numberFormat, out dateFormat, out separator);
39
40      IRegressionProblemData regData = LoadData(new FileStream(path, FileMode.Open), numberFormat, dateFormat, separator);
41
42      int pos = path.LastIndexOf('\\');
43      if (pos < 0)
44        regData.Name = path;
45      else {
46        pos++;
47        regData.Name = path.Substring(pos, path.Length - pos);
48      }
49      return regData;
50    }
51
52    protected IRegressionProblemData LoadData(Stream stream, NumberFormatInfo numberFormat, DateTimeFormatInfo dateFormat, char separator) {
53      TableFileParser csvFileParser = new TableFileParser();
54
55      csvFileParser.Parse(stream, numberFormat, dateFormat, separator);
56
57      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
58      string targetVar = csvFileParser.VariableNames.Last();
59      IEnumerable<string> allowedInputVars = csvFileParser.VariableNames.Where(x => !x.Equals(targetVar));
60
61      RegressionProblemData regData = new RegressionProblemData(dataset, allowedInputVars, targetVar);
62
63      int trainingPartEnd = csvFileParser.Rows * 2 / 3;
64      regData.TrainingPartition.Start = 0;
65      regData.TrainingPartition.End = trainingPartEnd;
66      regData.TestPartition.Start = trainingPartEnd;
67      regData.TestPartition.End = csvFileParser.Rows;
68
69      return regData;
70    }
71
72    public void SaveData(IRegressionProblemData instance, string path) {
73      StringBuilder strBuilder = new StringBuilder();
74
75      foreach (var variable in instance.InputVariables) {
76        strBuilder.Append(variable + ";");
77      }
78      strBuilder.Remove(strBuilder.Length - 1, 1);
79      strBuilder.AppendLine();
80
81      Dataset dataset = instance.Dataset;
82
83      for (int i = 0; i < dataset.Rows; i++) {
84        for (int j = 0; j < dataset.Columns; j++) {
85          strBuilder.Append(dataset.GetValue(i, j) + ";");
86        }
87        strBuilder.Remove(strBuilder.Length - 1, 1);
88        strBuilder.AppendLine();
89      }
90
91      using (StreamWriter writer = new StreamWriter(path)) {
92        writer.Write(strBuilder);
93      }
94    }
95
96    public abstract IEnumerable<IDataDescriptor> GetDataDescriptors();
97    public abstract IRegressionProblemData LoadData(IDataDescriptor descriptor);
98
99    public abstract string Name { get; }
100    public abstract string Description { get; }
101    public abstract Uri WebLink { get; }
102    public abstract string ReferencePublication { get; }
103  }
104}
Note: See TracBrowser for help on using the repository browser.