Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
09/14/12 18:58:15 (12 years ago)
Author:
gkronber
Message:

#1847 merged r8205:8635 from trunk into branch

Location:
branches/GP-MoveOperators
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/GP-MoveOperators

  • branches/GP-MoveOperators/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/CSV/ClassifiactionCSVInstanceProvider.cs

    r8206 r8660  
    2121
    2222using System;
     23using System.Collections;
    2324using System.Collections.Generic;
     25using System.Globalization;
    2426using System.IO;
    2527using System.Linq;
    2628using System.Text;
     29using HeuristicLab.Common;
    2730using HeuristicLab.Problems.DataAnalysis;
    2831
     
    3033  public class ClassificationCSVInstanceProvider : ClassificationInstanceProvider {
    3134    public override string Name {
    32       get { return "Comma-separated Values File"; }
     35      get { return "CSV File"; }
    3336    }
    3437    public override string Description {
     
    6164
    6265      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
    63       string targetVar = csvFileParser.VariableNames.Where(x => dataset.DoubleVariables.Contains(x)).Last();
    64       IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar));
     66      string targetVar = dataset.DoubleVariables.Last();
    6567
    66       ClassificationProblemData claData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);
    67 
    68       int trainingPartEnd = csvFileParser.Rows * 2 / 3;
    69       claData.TrainingPartition.Start = 0;
    70       claData.TrainingPartition.End = trainingPartEnd;
    71       claData.TestPartition.Start = trainingPartEnd;
    72       claData.TestPartition.End = csvFileParser.Rows;
    73       int pos = path.LastIndexOf('\\');
    74       if (pos < 0)
    75         claData.Name = path;
    76       else {
    77         pos++;
    78         claData.Name = path.Substring(pos, path.Length - pos);
     68      // turn of input variables that are constant in the training partition
     69      var allowedInputVars = new List<string>();
     70      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     71      if (trainingIndizes.Count() >= 2) {
     72        foreach (var variableName in dataset.DoubleVariables) {
     73          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     74            variableName != targetVar)
     75            allowedInputVars.Add(variableName);
     76        }
     77      } else {
     78        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => x.Equals(targetVar)));
    7979      }
    8080
    81       return claData;
     81      ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);
     82
     83      int trainingPartEnd = trainingIndizes.Last();
     84      classificationData.TrainingPartition.Start = trainingIndizes.First();
     85      classificationData.TrainingPartition.End = trainingPartEnd;
     86      classificationData.TestPartition.Start = trainingPartEnd;
     87      classificationData.TestPartition.End = csvFileParser.Rows;
     88
     89      classificationData.Name = Path.GetFileName(path);
     90
     91      return classificationData;
     92    }
     93
     94    public override IClassificationProblemData ImportData(string path, DataAnalysisImportType type) {
     95      TableFileParser csvFileParser = new TableFileParser();
     96      csvFileParser.Parse(path);
     97
     98      List<IList> values = csvFileParser.Values;
     99      if (type.Shuffle) {
     100        values = Shuffle(values);
     101      }
     102
     103      Dataset dataset = new Dataset(csvFileParser.VariableNames, values);
     104      string targetVar = dataset.DoubleVariables.Last();
     105
     106      // turn of input variables that are constant in the training partition
     107      var allowedInputVars = new List<string>();
     108      int trainingPartEnd = (csvFileParser.Rows * type.Training) / 100;
     109      var trainingIndizes = Enumerable.Range(0, trainingPartEnd);
     110      foreach (var variableName in dataset.DoubleVariables) {
     111        if (trainingIndizes.Count() >= 2 && dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     112          variableName != targetVar)
     113          allowedInputVars.Add(variableName);
     114      }
     115
     116      ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);
     117
     118      classificationData.TrainingPartition.Start = 0;
     119      classificationData.TrainingPartition.End = trainingPartEnd;
     120      classificationData.TestPartition.Start = trainingPartEnd;
     121      classificationData.TestPartition.End = csvFileParser.Rows;
     122
     123      classificationData.Name = Path.GetFileName(path);
     124
     125      return classificationData;
    82126    }
    83127
     
    86130    }
    87131    public override void ExportData(IClassificationProblemData instance, string path) {
    88       StringBuilder strBuilder = new StringBuilder();
     132      var strBuilder = new StringBuilder();
    89133
    90134      foreach (var variable in instance.InputVariables) {
    91         strBuilder.Append(variable + ";");
     135        strBuilder.Append(variable + CultureInfo.CurrentCulture.TextInfo.ListSeparator);
    92136      }
    93       strBuilder.Remove(strBuilder.Length - 1, 1);
     137      strBuilder.Remove(strBuilder.Length - CultureInfo.CurrentCulture.TextInfo.ListSeparator.Length, CultureInfo.CurrentCulture.TextInfo.ListSeparator.Length);
    94138      strBuilder.AppendLine();
    95139
    96       Dataset dataset = instance.Dataset;
     140      var dataset = instance.Dataset;
    97141
    98142      for (int i = 0; i < dataset.Rows; i++) {
    99143        for (int j = 0; j < dataset.Columns; j++) {
    100           strBuilder.Append(dataset.GetValue(i, j) + ";");
     144          if (j > 0) strBuilder.Append(CultureInfo.CurrentCulture.TextInfo.ListSeparator);
     145          strBuilder.Append(dataset.GetValue(i, j));
    101146        }
    102         strBuilder.Remove(strBuilder.Length - 1, 1);
    103147        strBuilder.AppendLine();
    104148      }
    105149
    106       using (StreamWriter writer = new StreamWriter(path)) {
     150      using (var writer = new StreamWriter(path)) {
    107151        writer.Write(strBuilder);
    108152      }
Note: See TracChangeset for help on using the changeset viewer.