Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
09/14/12 18:58:15 (12 years ago)
Author:
gkronber
Message:

#1847 merged r8205:8635 from trunk into branch

Location:
branches/GP-MoveOperators
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/GP-MoveOperators

  • branches/GP-MoveOperators/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/CSV/RegressionCSVInstanceProvider.cs

    r8206 r8660  
    2121
    2222using System;
     23using System.Collections;
    2324using System.Collections.Generic;
     25using System.Globalization;
    2426using System.IO;
    2527using System.Linq;
    2628using System.Text;
     29using HeuristicLab.Common;
    2730using HeuristicLab.Problems.DataAnalysis;
    2831
     
    3033  public class RegressionCSVInstanceProvider : RegressionInstanceProvider {
    3134    public override string Name {
    32       get { return "Comma-separated Values File"; }
     35      get { return "CSV File"; }
    3336    }
    3437    public override string Description {
     
    5962
    6063      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
    61       string targetVar = csvFileParser.VariableNames.Where(x => dataset.DoubleVariables.Contains(x)).Last();
     64      string targetVar = dataset.DoubleVariables.Last();
    6265
    63       IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar));
     66      // turn of input variables that are constant in the training partition
     67      var allowedInputVars = new List<string>();
     68      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     69      foreach (var variableName in dataset.DoubleVariables) {
     70        if (trainingIndizes.Count() >= 2 && dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     71          variableName != targetVar)
     72          allowedInputVars.Add(variableName);
     73      }
    6474
    65       IRegressionProblemData regData = new RegressionProblemData(dataset, allowedInputVars, targetVar);
     75      IRegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, targetVar);
    6676
    67       int trainingPartEnd = csvFileParser.Rows * 2 / 3;
    68       regData.TrainingPartition.Start = 0;
    69       regData.TrainingPartition.End = trainingPartEnd;
    70       regData.TestPartition.Start = trainingPartEnd;
    71       regData.TestPartition.End = csvFileParser.Rows;
     77      var trainingPartEnd = trainingIndizes.Last();
     78      regressionData.TrainingPartition.Start = trainingIndizes.First();
     79      regressionData.TrainingPartition.End = trainingPartEnd;
     80      regressionData.TestPartition.Start = trainingPartEnd;
     81      regressionData.TestPartition.End = csvFileParser.Rows;
    7282
    73       int pos = path.LastIndexOf('\\');
    74       if (pos < 0)
    75         regData.Name = path;
    76       else {
    77         pos++;
    78         regData.Name = path.Substring(pos, path.Length - pos);
     83      regressionData.Name = Path.GetFileName(path);
     84
     85      return regressionData;
     86    }
     87
     88    public override IRegressionProblemData ImportData(string path, DataAnalysisImportType type) {
     89      TableFileParser csvFileParser = new TableFileParser();
     90      csvFileParser.Parse(path);
     91
     92      List<IList> values = csvFileParser.Values;
     93      if (type.Shuffle) {
     94        values = Shuffle(values);
    7995      }
    80       return regData;
     96      Dataset dataset = new Dataset(csvFileParser.VariableNames, values);
     97      string targetVar = dataset.DoubleVariables.Last();
     98
     99      // turn of input variables that are constant in the training partition
     100      var allowedInputVars = new List<string>();
     101      int trainingPartEnd = (csvFileParser.Rows * type.Training) / 100;
     102      trainingPartEnd = trainingPartEnd > 0 ? trainingPartEnd : 1;
     103      var trainingIndizes = Enumerable.Range(0, trainingPartEnd);
     104      if (trainingIndizes.Count() >= 2) {
     105        foreach (var variableName in dataset.DoubleVariables) {
     106          if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     107            variableName != targetVar)
     108            allowedInputVars.Add(variableName);
     109        }
     110      } else {
     111        allowedInputVars.AddRange(dataset.DoubleVariables.Where(x => x.Equals(targetVar)));
     112      }
     113
     114      RegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, targetVar);
     115
     116      regressionData.TrainingPartition.Start = 0;
     117      regressionData.TrainingPartition.End = trainingPartEnd;
     118      regressionData.TestPartition.Start = trainingPartEnd;
     119      regressionData.TestPartition.End = csvFileParser.Rows;
     120
     121      regressionData.Name = Path.GetFileName(path);
     122
     123      return regressionData;
    81124    }
    82125
     
    85128    }
    86129    public override void ExportData(IRegressionProblemData instance, string path) {
    87       StringBuilder strBuilder = new StringBuilder();
     130      var strBuilder = new StringBuilder();
    88131
    89132      foreach (var variable in instance.InputVariables) {
    90         strBuilder.Append(variable + ";");
     133        strBuilder.Append(variable + CultureInfo.CurrentCulture.TextInfo.ListSeparator);
    91134      }
    92       strBuilder.Remove(strBuilder.Length - 1, 1);
     135      strBuilder.Remove(strBuilder.Length - CultureInfo.CurrentCulture.TextInfo.ListSeparator.Length, CultureInfo.CurrentCulture.TextInfo.ListSeparator.Length);
    93136      strBuilder.AppendLine();
    94137
    95       Dataset dataset = instance.Dataset;
     138      var dataset = instance.Dataset;
    96139
    97140      for (int i = 0; i < dataset.Rows; i++) {
    98141        for (int j = 0; j < dataset.Columns; j++) {
    99           strBuilder.Append(dataset.GetValue(i, j) + ";");
     142          if (j > 0) strBuilder.Append(CultureInfo.CurrentCulture.TextInfo.ListSeparator);
     143          strBuilder.Append(dataset.GetValue(i, j));
    100144        }
    101         strBuilder.Remove(strBuilder.Length - 1, 1);
    102145        strBuilder.AppendLine();
    103146      }
    104147
    105       using (StreamWriter writer = new StreamWriter(path)) {
     148      using (var writer = new StreamWriter(path)) {
    106149        writer.Write(strBuilder);
    107150      }
Note: See TracChangeset for help on using the changeset viewer.