Changeset 8566


Ignore:
Timestamp:
09/04/12 11:32:31 (7 years ago)
Author:
gkronber
Message:

#1927 implemented check to deactivate input variables that are constant in the training partition in the CSV problem instance providers for regression, classification and clustering.

Location:
trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/CSV/ClassifiactionCSVInstanceProvider.cs

    r8530 r8566  
    2626using System.Linq;
    2727using System.Text;
     28using HeuristicLab.Common;
    2829using HeuristicLab.Problems.DataAnalysis;
    2930
     
    6263
    6364      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
    64       string targetVar = csvFileParser.VariableNames.Where(x => dataset.DoubleVariables.Contains(x)).Last();
    65       IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar));
     65      string targetVar = dataset.DoubleVariables.Last();
    6666
    67       ClassificationProblemData claData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);
    68 
    69       int trainingPartEnd = csvFileParser.Rows * 2 / 3;
    70       claData.TrainingPartition.Start = 0;
    71       claData.TrainingPartition.End = trainingPartEnd;
    72       claData.TestPartition.Start = trainingPartEnd;
    73       claData.TestPartition.End = csvFileParser.Rows;
    74       int pos = path.LastIndexOf('\\');
    75       if (pos < 0)
    76         claData.Name = path;
    77       else {
    78         pos++;
    79         claData.Name = path.Substring(pos, path.Length - pos);
     67      // turn of input variables that are constant in the training partition
     68      var allowedInputVars = new List<string>();
     69      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     70      foreach (var variableName in dataset.DoubleVariables) {
     71        if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     72          variableName != targetVar)
     73          allowedInputVars.Add(variableName);
    8074      }
    8175
    82       return claData;
     76      ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);
     77
     78      int trainingPartEnd = trainingIndizes.Last();
     79      classificationData.TrainingPartition.Start = trainingIndizes.First();
     80      classificationData.TrainingPartition.End = trainingPartEnd;
     81      classificationData.TestPartition.Start = trainingPartEnd;
     82      classificationData.TestPartition.End = csvFileParser.Rows;
     83
     84      classificationData.Name = Path.GetFileName(path);
     85
     86      return classificationData;
    8387    }
    8488
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/CSV/ClusteringCSVInstanceProvider.cs

    r8530 r8566  
    2424using System.Globalization;
    2525using System.IO;
     26using System.Linq;
    2627using System.Text;
     28using HeuristicLab.Common;
    2729using HeuristicLab.Problems.DataAnalysis;
    2830
     
    6163
    6264      var dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
    63       var claData = new ClusteringProblemData(dataset, dataset.DoubleVariables);
    6465
    65       int trainingPartEnd = csvFileParser.Rows * 2 / 3;
    66       claData.TrainingPartition.Start = 0;
    67       claData.TrainingPartition.End = trainingPartEnd;
    68       claData.TestPartition.Start = trainingPartEnd;
    69       claData.TestPartition.End = csvFileParser.Rows;
    70       int pos = path.LastIndexOf('\\');
    71       if (pos < 0)
    72         claData.Name = path;
    73       else {
    74         pos++;
    75         claData.Name = path.Substring(pos, path.Length - pos);
     66      // turn of input variables that are constant in the training partition
     67      var allowedInputVars = new List<string>();
     68      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     69      foreach (var variableName in dataset.DoubleVariables) {
     70        if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0)
     71          allowedInputVars.Add(variableName);
    7672      }
    7773
    78       return claData;
     74      var clusteringData = new ClusteringProblemData(dataset, allowedInputVars);
     75
     76      int trainingPartEnd = trainingIndizes.Last();
     77      clusteringData.TrainingPartition.Start = trainingIndizes.First();
     78      clusteringData.TrainingPartition.End = trainingPartEnd;
     79      clusteringData.TestPartition.Start = trainingPartEnd;
     80      clusteringData.TestPartition.End = csvFileParser.Rows;
     81
     82      clusteringData.Name = Path.GetFileName(path);
     83
     84      return clusteringData;
    7985    }
    8086
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/CSV/RegressionCSVInstanceProvider.cs

    r8530 r8566  
    2626using System.Linq;
    2727using System.Text;
     28using HeuristicLab.Common;
    2829using HeuristicLab.Problems.DataAnalysis;
    2930
     
    6061
    6162      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
    62       string targetVar = csvFileParser.VariableNames.Where(x => dataset.DoubleVariables.Contains(x)).Last();
     63      string targetVar = dataset.DoubleVariables.Last();
    6364
    64       IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar));
     65      // turn of input variables that are constant in the training partition
     66      var allowedInputVars = new List<string>();
     67      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     68      foreach (var variableName in dataset.DoubleVariables) {
     69        if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     70          variableName != targetVar)
     71          allowedInputVars.Add(variableName);
     72      }
    6573
    66       IRegressionProblemData regData = new RegressionProblemData(dataset, allowedInputVars, targetVar);
     74      IRegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, targetVar);
    6775
    68       int trainingPartEnd = csvFileParser.Rows * 2 / 3;
    69       regData.TrainingPartition.Start = 0;
    70       regData.TrainingPartition.End = trainingPartEnd;
    71       regData.TestPartition.Start = trainingPartEnd;
    72       regData.TestPartition.End = csvFileParser.Rows;
     76      var trainingPartEnd = trainingIndizes.Last();
     77      regressionData.TrainingPartition.Start = trainingIndizes.First();
     78      regressionData.TrainingPartition.End = trainingPartEnd;
     79      regressionData.TestPartition.Start = trainingPartEnd;
     80      regressionData.TestPartition.End = csvFileParser.Rows;
    7381
    74       int pos = path.LastIndexOf('\\');
    75       if (pos < 0)
    76         regData.Name = path;
    77       else {
    78         pos++;
    79         regData.Name = path.Substring(pos, path.Length - pos);
    80       }
    81       return regData;
     82      regressionData.Name = Path.GetFileName(path);
     83
     84      return regressionData;
    8285    }
    8386
Note: See TracChangeset for help on using the changeset viewer.