Ignore:
Timestamp:
09/04/12 11:32:31 (10 years ago)
Author:
gkronber
Message:

#1927 implemented check to deactivate input variables that are constant in the training partition in the CSV problem instance providers for regression, classification and clustering.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Classification/CSV/ClassifiactionCSVInstanceProvider.cs

    r8530 r8566  
    2626using System.Linq;
    2727using System.Text;
     28using HeuristicLab.Common;
    2829using HeuristicLab.Problems.DataAnalysis;
    2930
     
    6263
    6364      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
    64       string targetVar = csvFileParser.VariableNames.Where(x => dataset.DoubleVariables.Contains(x)).Last();
    65       IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar));
     65      string targetVar = dataset.DoubleVariables.Last();
    6666
    67       ClassificationProblemData claData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);
    68 
    69       int trainingPartEnd = csvFileParser.Rows * 2 / 3;
    70       claData.TrainingPartition.Start = 0;
    71       claData.TrainingPartition.End = trainingPartEnd;
    72       claData.TestPartition.Start = trainingPartEnd;
    73       claData.TestPartition.End = csvFileParser.Rows;
    74       int pos = path.LastIndexOf('\\');
    75       if (pos < 0)
    76         claData.Name = path;
    77       else {
    78         pos++;
    79         claData.Name = path.Substring(pos, path.Length - pos);
     67      // turn of input variables that are constant in the training partition
     68      var allowedInputVars = new List<string>();
     69      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     70      foreach (var variableName in dataset.DoubleVariables) {
     71        if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     72          variableName != targetVar)
     73          allowedInputVars.Add(variableName);
    8074      }
    8175
    82       return claData;
     76      ClassificationProblemData classificationData = new ClassificationProblemData(dataset, allowedInputVars, targetVar);
     77
     78      int trainingPartEnd = trainingIndizes.Last();
     79      classificationData.TrainingPartition.Start = trainingIndizes.First();
     80      classificationData.TrainingPartition.End = trainingPartEnd;
     81      classificationData.TestPartition.Start = trainingPartEnd;
     82      classificationData.TestPartition.End = csvFileParser.Rows;
     83
     84      classificationData.Name = Path.GetFileName(path);
     85
     86      return classificationData;
    8387    }
    8488
Note: See TracChangeset for help on using the changeset viewer.