Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
09/07/12 14:53:39 (12 years ago)
Author:
sforsten
Message:

#1942: csv files for data analysis problems can be shuffled when imported

Location:
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/CSV/ClassifiactionCSVInstanceProvider.cs

    r8566 r8598  
    2121
    2222using System;
     23using System.Collections;
    2324using System.Collections.Generic;
    2425using System.Globalization;
     
    8788    }
    8889
     90    public override IClassificationProblemData ImportData(string path, DataAnalysisImportType type) {
     91      TableFileParser csvFileParser = new TableFileParser();
     92      csvFileParser.Parse(path);
     93
     94      List<IList> values = csvFileParser.Values;
     95      if (type.Shuffle) {
     96        values = Shuffle(values);
     97      }
     98
     99      Dataset dataset = new Dataset(csvFileParser.VariableNames, values);
     100      string targetVar = dataset.DoubleVariables.Last();
     101
     102      // turn of input variables that are constant in the training partition
     103      var allowedInputVars = new List<string>();
     104      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     105      foreach (var variableName in dataset.DoubleVariables) {
     106        if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     107          variableName != targetVar)
     108          allowedInputVars.Add(variableName);
     109      }
     110
     111      ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);
     112
     113      int trainingPartEnd = trainingIndizes.Last();
     114      classificationData.TrainingPartition.Start = trainingIndizes.First();
     115      classificationData.TrainingPartition.End = trainingPartEnd;
     116      classificationData.TestPartition.Start = trainingPartEnd;
     117      classificationData.TestPartition.End = csvFileParser.Rows;
     118
     119      classificationData.Name = Path.GetFileName(path);
     120
     121      return classificationData;
     122    }
     123
    89124    public override bool CanExportData {
    90125      get { return true; }
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/ClassificationInstanceProvider.cs

    r8192 r8598  
    2323
    2424namespace HeuristicLab.Problems.Instances.DataAnalysis {
    25   public abstract class ClassificationInstanceProvider : ProblemInstanceProvider<IClassificationProblemData> {
     25  public abstract class ClassificationInstanceProvider : DataAnalysisInstanceProvider<IClassificationProblemData> {
    2626  }
    2727}
Note: See TracChangeset for help on using the changeset viewer.