Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
09/07/12 14:53:39 (12 years ago)
Author:
sforsten
Message:

#1942: csv files for data analysis problems can be shuffled when imported

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/CSV/ClusteringCSVInstanceProvider.cs

    r8566 r8598  
    2121
    2222using System;
     23using System.Collections;
    2324using System.Collections.Generic;
    2425using System.Globalization;
     
    5960    public override IClusteringProblemData ImportData(string path) {
    6061      var csvFileParser = new TableFileParser();
    61 
    6262      csvFileParser.Parse(path);
    6363
    64       var dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
     64      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
     65      string targetVar = dataset.DoubleVariables.Last();
    6566
    6667      // turn of input variables that are constant in the training partition
     
    6869      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
    6970      foreach (var variableName in dataset.DoubleVariables) {
    70         if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0)
     71        if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     72          variableName != targetVar)
    7173          allowedInputVars.Add(variableName);
    7274      }
    7375
    74       var clusteringData = new ClusteringProblemData(dataset, allowedInputVars);
     76      ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars);
     77
     78      int trainingPartEnd = trainingIndizes.Last();
     79      clusteringData.TrainingPartition.Start = trainingIndizes.First();
     80      clusteringData.TrainingPartition.End = trainingPartEnd;
     81      clusteringData.TestPartition.Start = trainingPartEnd;
     82      clusteringData.TestPartition.End = csvFileParser.Rows;
     83
     84      clusteringData.Name = Path.GetFileName(path);
     85
     86      return clusteringData;
     87    }
     88
     89    public override IClusteringProblemData ImportData(string path, DataAnalysisImportType type) {
     90      TableFileParser csvFileParser = new TableFileParser();
     91      csvFileParser.Parse(path);
     92
     93      List<IList> values = csvFileParser.Values;
     94      if (type.Shuffle) {
     95        values = Shuffle(values);
     96      }
     97
     98      Dataset dataset = new Dataset(csvFileParser.VariableNames, values);
     99      string targetVar = dataset.DoubleVariables.Last();
     100
     101      // turn of input variables that are constant in the training partition
     102      var allowedInputVars = new List<string>();
     103      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     104      foreach (var variableName in dataset.DoubleVariables) {
     105        if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     106          variableName != targetVar)
     107          allowedInputVars.Add(variableName);
     108      }
     109
     110      ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars);
    75111
    76112      int trainingPartEnd = trainingIndizes.Last();
Note: See TracChangeset for help on using the changeset viewer.