Changeset 8598


Ignore:
Timestamp:
09/07/12 14:53:39 (7 years ago)
Author:
sforsten
Message:

#1942: csv files for data analysis problems can be shuffled when imported

Location:
trunk/sources
Files:
13 added
8 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab 3.3.sln

    r8401 r8598  
    345345EndProject
    346346Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.Algorithms.GradientDescent", "HeuristicLab.Algorithms.GradientDescent\3.3\HeuristicLab.Algorithms.GradientDescent.csproj", "{1256B945-EEA9-4BE4-9880-76B5B113F089}"
     347EndProject
     348Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "HeuristicLab.Problems.Instances.DataAnalysis.Views-3.3", "HeuristicLab.Problems.Instances.DataAnalysis.Views\3.3\HeuristicLab.Problems.Instances.DataAnalysis.Views-3.3.csproj", "{72232235-B6CF-4E6C-B086-9E9E11AA0717}"
    347349EndProject
    348350Global
     
    16881690    {1256B945-EEA9-4BE4-9880-76B5B113F089}.Release|x86.ActiveCfg = Release|x86
    16891691    {1256B945-EEA9-4BE4-9880-76B5B113F089}.Release|x86.Build.0 = Release|x86
     1692    {72232235-B6CF-4E6C-B086-9E9E11AA0717}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
     1693    {72232235-B6CF-4E6C-B086-9E9E11AA0717}.Debug|Any CPU.Build.0 = Debug|Any CPU
     1694    {72232235-B6CF-4E6C-B086-9E9E11AA0717}.Debug|x64.ActiveCfg = Debug|Any CPU
     1695    {72232235-B6CF-4E6C-B086-9E9E11AA0717}.Debug|x86.ActiveCfg = Debug|Any CPU
     1696    {72232235-B6CF-4E6C-B086-9E9E11AA0717}.Release|Any CPU.ActiveCfg = Release|Any CPU
     1697    {72232235-B6CF-4E6C-B086-9E9E11AA0717}.Release|Any CPU.Build.0 = Release|Any CPU
     1698    {72232235-B6CF-4E6C-B086-9E9E11AA0717}.Release|x64.ActiveCfg = Release|Any CPU
     1699    {72232235-B6CF-4E6C-B086-9E9E11AA0717}.Release|x86.ActiveCfg = Release|Any CPU
    16901700  EndGlobalSection
    16911701  GlobalSection(SolutionProperties) = preSolution
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/CSV/ClassifiactionCSVInstanceProvider.cs

    r8566 r8598  
    2121
    2222using System;
     23using System.Collections;
    2324using System.Collections.Generic;
    2425using System.Globalization;
     
    8788    }
    8889
     90    public override IClassificationProblemData ImportData(string path, DataAnalysisImportType type) {
     91      TableFileParser csvFileParser = new TableFileParser();
     92      csvFileParser.Parse(path);
     93
     94      List<IList> values = csvFileParser.Values;
     95      if (type.Shuffle) {
     96        values = Shuffle(values);
     97      }
     98
     99      Dataset dataset = new Dataset(csvFileParser.VariableNames, values);
     100      string targetVar = dataset.DoubleVariables.Last();
     101
     102      // turn of input variables that are constant in the training partition
     103      var allowedInputVars = new List<string>();
     104      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     105      foreach (var variableName in dataset.DoubleVariables) {
     106        if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     107          variableName != targetVar)
     108          allowedInputVars.Add(variableName);
     109      }
     110
     111      ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);
     112
     113      int trainingPartEnd = trainingIndizes.Last();
     114      classificationData.TrainingPartition.Start = trainingIndizes.First();
     115      classificationData.TrainingPartition.End = trainingPartEnd;
     116      classificationData.TestPartition.Start = trainingPartEnd;
     117      classificationData.TestPartition.End = csvFileParser.Rows;
     118
     119      classificationData.Name = Path.GetFileName(path);
     120
     121      return classificationData;
     122    }
     123
    89124    public override bool CanExportData {
    90125      get { return true; }
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ClassificationInstanceProvider.cs

    r8192 r8598  
    2323
    2424namespace HeuristicLab.Problems.Instances.DataAnalysis {
    25   public abstract class ClassificationInstanceProvider : ProblemInstanceProvider<IClassificationProblemData> {
     25  public abstract class ClassificationInstanceProvider : DataAnalysisInstanceProvider<IClassificationProblemData> {
    2626  }
    2727}
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/CSV/ClusteringCSVInstanceProvider.cs

    r8566 r8598  
    2121
    2222using System;
     23using System.Collections;
    2324using System.Collections.Generic;
    2425using System.Globalization;
     
    5960    public override IClusteringProblemData ImportData(string path) {
    6061      var csvFileParser = new TableFileParser();
    61 
    6262      csvFileParser.Parse(path);
    6363
    64       var dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
     64      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
     65      string targetVar = dataset.DoubleVariables.Last();
    6566
    6667      // turn of input variables that are constant in the training partition
     
    6869      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
    6970      foreach (var variableName in dataset.DoubleVariables) {
    70         if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0)
     71        if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     72          variableName != targetVar)
    7173          allowedInputVars.Add(variableName);
    7274      }
    7375
    74       var clusteringData = new ClusteringProblemData(dataset, allowedInputVars);
     76      ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars);
     77
     78      int trainingPartEnd = trainingIndizes.Last();
     79      clusteringData.TrainingPartition.Start = trainingIndizes.First();
     80      clusteringData.TrainingPartition.End = trainingPartEnd;
     81      clusteringData.TestPartition.Start = trainingPartEnd;
     82      clusteringData.TestPartition.End = csvFileParser.Rows;
     83
     84      clusteringData.Name = Path.GetFileName(path);
     85
     86      return clusteringData;
     87    }
     88
     89    public override IClusteringProblemData ImportData(string path, DataAnalysisImportType type) {
     90      TableFileParser csvFileParser = new TableFileParser();
     91      csvFileParser.Parse(path);
     92
     93      List<IList> values = csvFileParser.Values;
     94      if (type.Shuffle) {
     95        values = Shuffle(values);
     96      }
     97
     98      Dataset dataset = new Dataset(csvFileParser.VariableNames, values);
     99      string targetVar = dataset.DoubleVariables.Last();
     100
     101      // turn of input variables that are constant in the training partition
     102      var allowedInputVars = new List<string>();
     103      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     104      foreach (var variableName in dataset.DoubleVariables) {
     105        if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     106          variableName != targetVar)
     107          allowedInputVars.Add(variableName);
     108      }
     109
     110      ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars);
    75111
    76112      int trainingPartEnd = trainingIndizes.Last();
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/ClusteringInstanceProvider.cs

    r8192 r8598  
    2323
    2424namespace HeuristicLab.Problems.Instances.DataAnalysis {
    25   public abstract class ClusteringInstanceProvider : ProblemInstanceProvider<IClusteringProblemData> {
     25  public abstract class ClusteringInstanceProvider : DataAnalysisInstanceProvider<IClusteringProblemData> {
    2626  }
    2727}
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/HeuristicLab.Problems.Instances.DataAnalysis-3.3.csproj

    r8595 r8598  
    123123    <Compile Include="Clustering\ClusteringInstanceProvider.cs" />
    124124    <Compile Include="Clustering\CSV\ClusteringCSVInstanceProvider.cs" />
     125    <Compile Include="DataAnalysisImportType.cs" />
     126    <Compile Include="DataAnalysisInstanceProvider.cs" />
    125127    <Compile Include="Plugin.cs" />
    126128    <Compile Include="Properties\AssemblyInfo.cs" />
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/CSV/RegressionCSVInstanceProvider.cs

    r8566 r8598  
    2121
    2222using System;
     23using System.Collections;
    2324using System.Collections.Generic;
    2425using System.Globalization;
     
    8586    }
    8687
     88    public override IRegressionProblemData ImportData(string path, DataAnalysisImportType type) {
     89      TableFileParser csvFileParser = new TableFileParser();
     90      csvFileParser.Parse(path);
     91
     92      List<IList> values = csvFileParser.Values;
     93      if (type.Shuffle) {
     94        values = Shuffle(values);
     95      }
     96      Dataset dataset = new Dataset(csvFileParser.VariableNames, values);
     97      string targetVar = dataset.DoubleVariables.Last();
     98
     99      // turn of input variables that are constant in the training partition
     100      var allowedInputVars = new List<string>();
     101      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     102      foreach (var variableName in dataset.DoubleVariables) {
     103        if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     104          variableName != targetVar)
     105          allowedInputVars.Add(variableName);
     106      }
     107
     108      RegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, targetVar);
     109
     110      int trainingPartEnd = trainingIndizes.Last();
     111      regressionData.TrainingPartition.Start = trainingIndizes.First();
     112      regressionData.TrainingPartition.End = trainingPartEnd;
     113      regressionData.TestPartition.Start = trainingPartEnd;
     114      regressionData.TestPartition.End = csvFileParser.Rows;
     115
     116      regressionData.Name = Path.GetFileName(path);
     117
     118      return regressionData;
     119    }
     120
    87121    public override bool CanExportData {
    88122      get { return true; }
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/RegressionInstanceProvider.cs

    r8192 r8598  
    2323
    2424namespace HeuristicLab.Problems.Instances.DataAnalysis {
    25   public abstract class RegressionInstanceProvider : ProblemInstanceProvider<IRegressionProblemData> {
     25  public abstract class RegressionInstanceProvider : DataAnalysisInstanceProvider<IRegressionProblemData> {
    2626  }
    2727}
Note: See TracChangeset for help on using the changeset viewer.