Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
09/14/12 18:58:15 (12 years ago)
Author:
gkronber
Message:

#1847 merged r8205:8635 from trunk into branch

Location:
branches/GP-MoveOperators
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/GP-MoveOperators

  • branches/GP-MoveOperators/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/CSV/ClusteringCSVInstanceProvider.cs

    r8199 r8660  
    2121
    2222using System;
     23using System.Collections;
    2324using System.Collections.Generic;
     25using System.Globalization;
    2426using System.IO;
     27using System.Linq;
    2528using System.Text;
     29using HeuristicLab.Common;
    2630using HeuristicLab.Problems.DataAnalysis;
    2731
     
    2933  public class ClusteringCSVInstanceProvider : ClusteringInstanceProvider {
    3034    public override string Name {
    31       get { return "Comma-separated Values File"; }
     35      get { return "CSV File"; }
    3236    }
    3337    public override string Description {
     
    5660    public override IClusteringProblemData ImportData(string path) {
    5761      var csvFileParser = new TableFileParser();
    58 
    5962      csvFileParser.Parse(path);
    6063
    61       var dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
    62       var claData = new ClusteringProblemData(dataset, dataset.DoubleVariables);
     64      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
     65      string targetVar = dataset.DoubleVariables.Last();
    6366
    64       int trainingPartEnd = csvFileParser.Rows * 2 / 3;
    65       claData.TrainingPartition.Start = 0;
    66       claData.TrainingPartition.End = trainingPartEnd;
    67       claData.TestPartition.Start = trainingPartEnd;
    68       claData.TestPartition.End = csvFileParser.Rows;
    69       int pos = path.LastIndexOf('\\');
    70       if (pos < 0)
    71         claData.Name = path;
    72       else {
    73         pos++;
    74         claData.Name = path.Substring(pos, path.Length - pos);
     67      // turn of input variables that are constant in the training partition
     68      var allowedInputVars = new List<string>();
     69      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     70      if (trainingIndizes.Count() >= 2) {
     71        foreach (var variableName in dataset.DoubleVariables) {
     72          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     73            variableName != targetVar)
     74            allowedInputVars.Add(variableName);
     75        }
     76      } else {
     77        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => x.Equals(targetVar)));
    7578      }
    7679
    77       return claData;
     80      ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars);
     81
     82      int trainingPartEnd = trainingIndizes.Last();
     83      clusteringData.TrainingPartition.Start = trainingIndizes.First();
     84      clusteringData.TrainingPartition.End = trainingPartEnd;
     85      clusteringData.TestPartition.Start = trainingPartEnd;
     86      clusteringData.TestPartition.End = csvFileParser.Rows;
     87
     88      clusteringData.Name = Path.GetFileName(path);
     89
     90      return clusteringData;
     91    }
     92
     93    public override IClusteringProblemData ImportData(string path, DataAnalysisImportType type) {
     94      TableFileParser csvFileParser = new TableFileParser();
     95      csvFileParser.Parse(path);
     96
     97      List<IList> values = csvFileParser.Values;
     98      if (type.Shuffle) {
     99        values = Shuffle(values);
     100      }
     101
     102      Dataset dataset = new Dataset(csvFileParser.VariableNames, values);
     103      string targetVar = dataset.DoubleVariables.Last();
     104
     105      // turn of input variables that are constant in the training partition
     106      var allowedInputVars = new List<string>();
     107      int trainingPartEnd = (csvFileParser.Rows * type.Training) / 100;
     108      var trainingIndizes = Enumerable.Range(0, trainingPartEnd);
     109      foreach (var variableName in dataset.DoubleVariables) {
     110        if (trainingIndizes.Count() >= 2 && dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     111          variableName != targetVar)
     112          allowedInputVars.Add(variableName);
     113      }
     114
     115      ClusteringProblemData clusteringData = new ClusteringProblemData(dataset, allowedInputVars);
     116
     117      clusteringData.TrainingPartition.Start = 0;
     118      clusteringData.TrainingPartition.End = trainingPartEnd;
     119      clusteringData.TestPartition.Start = trainingPartEnd;
     120      clusteringData.TestPartition.End = csvFileParser.Rows;
     121
     122      clusteringData.Name = Path.GetFileName(path);
     123
     124      return clusteringData;
    78125    }
    79126
     
    85132
    86133      foreach (var variable in instance.InputVariables) {
    87         strBuilder.Append(variable + ";");
     134        strBuilder.Append(variable + CultureInfo.CurrentCulture.TextInfo.ListSeparator);
    88135      }
    89       strBuilder.Remove(strBuilder.Length - 1, 1);
     136      strBuilder.Remove(strBuilder.Length - CultureInfo.CurrentCulture.TextInfo.ListSeparator.Length, CultureInfo.CurrentCulture.TextInfo.ListSeparator.Length);
    90137      strBuilder.AppendLine();
    91138
     
    94141      for (int i = 0; i < dataset.Rows; i++) {
    95142        for (int j = 0; j < dataset.Columns; j++) {
    96           strBuilder.Append(dataset.GetValue(i, j) + ";");
     143          if (j > 0) strBuilder.Append(CultureInfo.CurrentCulture.TextInfo.ListSeparator);
     144          strBuilder.Append(dataset.GetValue(i, j));
    97145        }
    98         strBuilder.Remove(strBuilder.Length - 1, 1);
    99146        strBuilder.AppendLine();
    100147      }
Note: See TracChangeset for help on using the changeset viewer.