Ignore:
Timestamp:
09/04/12 11:32:31 (10 years ago)
Author:
gkronber
Message:

#1927 implemented check to deactivate input variables that are constant in the training partition in the CSV problem instance providers for regression, classification and clustering.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Clustering/CSV/ClusteringCSVInstanceProvider.cs

    r8530 r8566  
    2424using System.Globalization;
    2525using System.IO;
     26using System.Linq;
    2627using System.Text;
     28using HeuristicLab.Common;
    2729using HeuristicLab.Problems.DataAnalysis;
    2830
     
    6163
    6264      var dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
    63       var claData = new ClusteringProblemData(dataset, dataset.DoubleVariables);
    6465
    65       int trainingPartEnd = csvFileParser.Rows * 2 / 3;
    66       claData.TrainingPartition.Start = 0;
    67       claData.TrainingPartition.End = trainingPartEnd;
    68       claData.TestPartition.Start = trainingPartEnd;
    69       claData.TestPartition.End = csvFileParser.Rows;
    70       int pos = path.LastIndexOf('\\');
    71       if (pos < 0)
    72         claData.Name = path;
    73       else {
    74         pos++;
    75         claData.Name = path.Substring(pos, path.Length - pos);
     66      // turn of input variables that are constant in the training partition
     67      var allowedInputVars = new List<string>();
     68      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     69      foreach (var variableName in dataset.DoubleVariables) {
     70        if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0)
     71          allowedInputVars.Add(variableName);
    7672      }
    7773
    78       return claData;
     74      var clusteringData = new ClusteringProblemData(dataset, allowedInputVars);
     75
     76      int trainingPartEnd = trainingIndizes.Last();
     77      clusteringData.TrainingPartition.Start = trainingIndizes.First();
     78      clusteringData.TrainingPartition.End = trainingPartEnd;
     79      clusteringData.TestPartition.Start = trainingPartEnd;
     80      clusteringData.TestPartition.End = csvFileParser.Rows;
     81
     82      clusteringData.Name = Path.GetFileName(path);
     83
     84      return clusteringData;
    7985    }
    8086
Note: See TracChangeset for help on using the changeset viewer.