Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
09/07/12 14:53:39 (12 years ago)
Author:
sforsten
Message:

#1942: csv files for data analysis problems can be shuffled when imported

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/CSV/RegressionCSVInstanceProvider.cs

    r8566 r8598  
    2121
    2222using System;
     23using System.Collections;
    2324using System.Collections.Generic;
    2425using System.Globalization;
     
    8586    }
    8687
     88    public override IRegressionProblemData ImportData(string path, DataAnalysisImportType type) {
     89      TableFileParser csvFileParser = new TableFileParser();
     90      csvFileParser.Parse(path);
     91
     92      List<IList> values = csvFileParser.Values;
     93      if (type.Shuffle) {
     94        values = Shuffle(values);
     95      }
     96      Dataset dataset = new Dataset(csvFileParser.VariableNames, values);
     97      string targetVar = dataset.DoubleVariables.Last();
     98
     99      // turn of input variables that are constant in the training partition
     100      var allowedInputVars = new List<string>();
     101      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     102      foreach (var variableName in dataset.DoubleVariables) {
     103        if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     104          variableName != targetVar)
     105          allowedInputVars.Add(variableName);
     106      }
     107
     108      RegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, targetVar);
     109
     110      int trainingPartEnd = trainingIndizes.Last();
     111      regressionData.TrainingPartition.Start = trainingIndizes.First();
     112      regressionData.TrainingPartition.End = trainingPartEnd;
     113      regressionData.TestPartition.Start = trainingPartEnd;
     114      regressionData.TestPartition.End = csvFileParser.Rows;
     115
     116      regressionData.Name = Path.GetFileName(path);
     117
     118      return regressionData;
     119    }
     120
    87121    public override bool CanExportData {
    88122      get { return true; }
Note: See TracChangeset for help on using the changeset viewer.