Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
09/06/12 09:52:52 (13 years ago)
Author:
ascheibe
Message:

#1861 merged changes from trunk into branch

Location:
branches/HeuristicLab.Mono
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Mono

  • branches/HeuristicLab.Mono/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/CSV/ClassifiactionCSVInstanceProvider.cs

    r8211 r8585  
    2222using System;
    2323using System.Collections.Generic;
     24using System.Globalization;
    2425using System.IO;
    2526using System.Linq;
    2627using System.Text;
     28using HeuristicLab.Common;
    2729using HeuristicLab.Problems.DataAnalysis;
    2830
     
    6163
    6264      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
    63       string targetVar = csvFileParser.VariableNames.Where(x => dataset.DoubleVariables.Contains(x)).Last();
    64       IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar));
     65      string targetVar = dataset.DoubleVariables.Last();
    6566
    66       ClassificationProblemData claData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);
    67 
    68       int trainingPartEnd = csvFileParser.Rows * 2 / 3;
    69       claData.TrainingPartition.Start = 0;
    70       claData.TrainingPartition.End = trainingPartEnd;
    71       claData.TestPartition.Start = trainingPartEnd;
    72       claData.TestPartition.End = csvFileParser.Rows;
    73       int pos = path.LastIndexOf('\\');
    74       if (pos < 0)
    75         claData.Name = path;
    76       else {
    77         pos++;
    78         claData.Name = path.Substring(pos, path.Length - pos);
     67      // turn of input variables that are constant in the training partition
     68      var allowedInputVars = new List<string>();
     69      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     70      foreach (var variableName in dataset.DoubleVariables) {
     71        if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     72          variableName != targetVar)
     73          allowedInputVars.Add(variableName);
    7974      }
    8075
    81       return claData;
     76      ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);
     77
     78      int trainingPartEnd = trainingIndizes.Last();
     79      classificationData.TrainingPartition.Start = trainingIndizes.First();
     80      classificationData.TrainingPartition.End = trainingPartEnd;
     81      classificationData.TestPartition.Start = trainingPartEnd;
     82      classificationData.TestPartition.End = csvFileParser.Rows;
     83
     84      classificationData.Name = Path.GetFileName(path);
     85
     86      return classificationData;
    8287    }
    8388
     
    8691    }
    8792    public override void ExportData(IClassificationProblemData instance, string path) {
    88       StringBuilder strBuilder = new StringBuilder();
     93      var strBuilder = new StringBuilder();
    8994
    9095      foreach (var variable in instance.InputVariables) {
    91         strBuilder.Append(variable + ";");
     96        strBuilder.Append(variable + CultureInfo.CurrentCulture.TextInfo.ListSeparator);
    9297      }
    93       strBuilder.Remove(strBuilder.Length - 1, 1);
     98      strBuilder.Remove(strBuilder.Length - CultureInfo.CurrentCulture.TextInfo.ListSeparator.Length, CultureInfo.CurrentCulture.TextInfo.ListSeparator.Length);
    9499      strBuilder.AppendLine();
    95100
    96       Dataset dataset = instance.Dataset;
     101      var dataset = instance.Dataset;
    97102
    98103      for (int i = 0; i < dataset.Rows; i++) {
    99104        for (int j = 0; j < dataset.Columns; j++) {
    100           strBuilder.Append(dataset.GetValue(i, j) + ";");
     105          if (j > 0) strBuilder.Append(CultureInfo.CurrentCulture.TextInfo.ListSeparator);
     106          strBuilder.Append(dataset.GetValue(i, j));
    101107        }
    102         strBuilder.Remove(strBuilder.Length - 1, 1);
    103108        strBuilder.AppendLine();
    104109      }
    105110
    106       using (StreamWriter writer = new StreamWriter(path)) {
     111      using (var writer = new StreamWriter(path)) {
    107112        writer.Write(strBuilder);
    108113      }
  • branches/HeuristicLab.Mono/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/CSV/ClusteringCSVInstanceProvider.cs

    r8211 r8585  
    2222using System;
    2323using System.Collections.Generic;
     24using System.Globalization;
    2425using System.IO;
     26using System.Linq;
    2527using System.Text;
     28using HeuristicLab.Common;
    2629using HeuristicLab.Problems.DataAnalysis;
    2730
     
    6063
    6164      var dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
    62       var claData = new ClusteringProblemData(dataset, dataset.DoubleVariables);
    6365
    64       int trainingPartEnd = csvFileParser.Rows * 2 / 3;
    65       claData.TrainingPartition.Start = 0;
    66       claData.TrainingPartition.End = trainingPartEnd;
    67       claData.TestPartition.Start = trainingPartEnd;
    68       claData.TestPartition.End = csvFileParser.Rows;
    69       int pos = path.LastIndexOf('\\');
    70       if (pos < 0)
    71         claData.Name = path;
    72       else {
    73         pos++;
    74         claData.Name = path.Substring(pos, path.Length - pos);
     66      // turn of input variables that are constant in the training partition
     67      var allowedInputVars = new List<string>();
     68      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     69      foreach (var variableName in dataset.DoubleVariables) {
     70        if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0)
     71          allowedInputVars.Add(variableName);
    7572      }
    7673
    77       return claData;
     74      var clusteringData = new ClusteringProblemData(dataset, allowedInputVars);
     75
     76      int trainingPartEnd = trainingIndizes.Last();
     77      clusteringData.TrainingPartition.Start = trainingIndizes.First();
     78      clusteringData.TrainingPartition.End = trainingPartEnd;
     79      clusteringData.TestPartition.Start = trainingPartEnd;
     80      clusteringData.TestPartition.End = csvFileParser.Rows;
     81
     82      clusteringData.Name = Path.GetFileName(path);
     83
     84      return clusteringData;
    7885    }
    7986
     
    8592
    8693      foreach (var variable in instance.InputVariables) {
    87         strBuilder.Append(variable + ";");
     94        strBuilder.Append(variable + CultureInfo.CurrentCulture.TextInfo.ListSeparator);
    8895      }
    89       strBuilder.Remove(strBuilder.Length - 1, 1);
     96      strBuilder.Remove(strBuilder.Length - CultureInfo.CurrentCulture.TextInfo.ListSeparator.Length, CultureInfo.CurrentCulture.TextInfo.ListSeparator.Length);
    9097      strBuilder.AppendLine();
    9198
     
    94101      for (int i = 0; i < dataset.Rows; i++) {
    95102        for (int j = 0; j < dataset.Columns; j++) {
    96           strBuilder.Append(dataset.GetValue(i, j) + ";");
     103          if (j > 0) strBuilder.Append(CultureInfo.CurrentCulture.TextInfo.ListSeparator);
     104          strBuilder.Append(dataset.GetValue(i, j));
    97105        }
    98         strBuilder.Remove(strBuilder.Length - 1, 1);
    99106        strBuilder.AppendLine();
    100107      }
  • branches/HeuristicLab.Mono/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Plugin.cs.frame

    r8246 r8585  
    2525  [Plugin("HeuristicLab.Problems.Instances.DataAnalysis", "3.3.7.$WCREV$")]
    2626  [PluginFile("HeuristicLab.Problems.Instances.DataAnalysis-3.3.dll", PluginFileType.Assembly)]
     27  [PluginDependency("HeuristicLab.Common", "3.3")]
    2728  [PluginDependency("HeuristicLab.Core", "3.3")]
    2829  [PluginDependency("HeuristicLab.Data", "3.3")]
  • branches/HeuristicLab.Mono/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/CSV/RegressionCSVInstanceProvider.cs

    r8211 r8585  
    2222using System;
    2323using System.Collections.Generic;
     24using System.Globalization;
    2425using System.IO;
    2526using System.Linq;
    2627using System.Text;
     28using HeuristicLab.Common;
    2729using HeuristicLab.Problems.DataAnalysis;
    2830
     
    5961
    6062      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
    61       string targetVar = csvFileParser.VariableNames.Where(x => dataset.DoubleVariables.Contains(x)).Last();
     63      string targetVar = dataset.DoubleVariables.Last();
    6264
    63       IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar));
     65      // turn of input variables that are constant in the training partition
     66      var allowedInputVars = new List<string>();
     67      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     68      foreach (var variableName in dataset.DoubleVariables) {
     69        if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     70          variableName != targetVar)
     71          allowedInputVars.Add(variableName);
     72      }
    6473
    65       IRegressionProblemData regData = new RegressionProblemData(dataset, allowedInputVars, targetVar);
     74      IRegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, targetVar);
    6675
    67       int trainingPartEnd = csvFileParser.Rows * 2 / 3;
    68       regData.TrainingPartition.Start = 0;
    69       regData.TrainingPartition.End = trainingPartEnd;
    70       regData.TestPartition.Start = trainingPartEnd;
    71       regData.TestPartition.End = csvFileParser.Rows;
     76      var trainingPartEnd = trainingIndizes.Last();
     77      regressionData.TrainingPartition.Start = trainingIndizes.First();
     78      regressionData.TrainingPartition.End = trainingPartEnd;
     79      regressionData.TestPartition.Start = trainingPartEnd;
     80      regressionData.TestPartition.End = csvFileParser.Rows;
    7281
    73       int pos = path.LastIndexOf('\\');
    74       if (pos < 0)
    75         regData.Name = path;
    76       else {
    77         pos++;
    78         regData.Name = path.Substring(pos, path.Length - pos);
    79       }
    80       return regData;
     82      regressionData.Name = Path.GetFileName(path);
     83
     84      return regressionData;
    8185    }
    8286
     
    8589    }
    8690    public override void ExportData(IRegressionProblemData instance, string path) {
    87       StringBuilder strBuilder = new StringBuilder();
     91      var strBuilder = new StringBuilder();
    8892
    8993      foreach (var variable in instance.InputVariables) {
    90         strBuilder.Append(variable + ";");
     94        strBuilder.Append(variable + CultureInfo.CurrentCulture.TextInfo.ListSeparator);
    9195      }
    92       strBuilder.Remove(strBuilder.Length - 1, 1);
     96      strBuilder.Remove(strBuilder.Length - CultureInfo.CurrentCulture.TextInfo.ListSeparator.Length, CultureInfo.CurrentCulture.TextInfo.ListSeparator.Length);
    9397      strBuilder.AppendLine();
    9498
    95       Dataset dataset = instance.Dataset;
     99      var dataset = instance.Dataset;
    96100
    97101      for (int i = 0; i < dataset.Rows; i++) {
    98102        for (int j = 0; j < dataset.Columns; j++) {
    99           strBuilder.Append(dataset.GetValue(i, j) + ";");
     103          if (j > 0) strBuilder.Append(CultureInfo.CurrentCulture.TextInfo.ListSeparator);
     104          strBuilder.Append(dataset.GetValue(i, j));
    100105        }
    101         strBuilder.Remove(strBuilder.Length - 1, 1);
    102106        strBuilder.AppendLine();
    103107      }
    104108
    105       using (StreamWriter writer = new StreamWriter(path)) {
     109      using (var writer = new StreamWriter(path)) {
    106110        writer.Write(strBuilder);
    107111      }
  • branches/HeuristicLab.Mono/HeuristicLab.Problems.Instances.DataAnalysis/3.3/TableFileParser.cs

    r7851 r8585  
    3232namespace HeuristicLab.Problems.Instances.DataAnalysis {
    3333  public class TableFileParser {
    34     private const int BUFFER_SIZE = 1024;
     34    private const int BUFFER_SIZE = 65536;
    3535    private static readonly char[] POSSIBLE_SEPARATORS = new char[] { ',', ';', '\t' };
    3636    private Tokenizer tokenizer;
Note: See TracChangeset for help on using the changeset viewer.