Ignore:
Timestamp:
09/04/12 11:32:31 (10 years ago)
Author:
gkronber
Message:

#1927 implemented check to deactivate input variables that are constant in the training partition in the CSV problem instance providers for regression, classification and clustering.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.Instances.DataAnalysis/3.3/Regression/CSV/RegressionCSVInstanceProvider.cs

    r8530 r8566  
    2626using System.Linq;
    2727using System.Text;
     28using HeuristicLab.Common;
    2829using HeuristicLab.Problems.DataAnalysis;
    2930
     
    6061
    6162      Dataset dataset = new Dataset(csvFileParser.VariableNames, csvFileParser.Values);
    62       string targetVar = csvFileParser.VariableNames.Where(x => dataset.DoubleVariables.Contains(x)).Last();
     63      string targetVar = dataset.DoubleVariables.Last();
    6364
    64       IEnumerable<string> allowedInputVars = dataset.DoubleVariables.Where(x => !x.Equals(targetVar));
     65      // turn of input variables that are constant in the training partition
     66      var allowedInputVars = new List<string>();
     67      var trainingIndizes = Enumerable.Range(0, (csvFileParser.Rows * 2) / 3);
     68      foreach (var variableName in dataset.DoubleVariables) {
     69        if (dataset.GetDoubleValues(variableName, trainingIndizes).Range() > 0 &&
     70          variableName != targetVar)
     71          allowedInputVars.Add(variableName);
     72      }
    6573
    66       IRegressionProblemData regData = new RegressionProblemData(dataset, allowedInputVars, targetVar);
     74      IRegressionProblemData regressionData = new RegressionProblemData(dataset, allowedInputVars, targetVar);
    6775
    68       int trainingPartEnd = csvFileParser.Rows * 2 / 3;
    69       regData.TrainingPartition.Start = 0;
    70       regData.TrainingPartition.End = trainingPartEnd;
    71       regData.TestPartition.Start = trainingPartEnd;
    72       regData.TestPartition.End = csvFileParser.Rows;
     76      var trainingPartEnd = trainingIndizes.Last();
     77      regressionData.TrainingPartition.Start = trainingIndizes.First();
     78      regressionData.TrainingPartition.End = trainingPartEnd;
     79      regressionData.TestPartition.Start = trainingPartEnd;
     80      regressionData.TestPartition.End = csvFileParser.Rows;
    7381
    74       int pos = path.LastIndexOf('\\');
    75       if (pos < 0)
    76         regData.Name = path;
    77       else {
    78         pos++;
    79         regData.Name = path.Substring(pos, path.Length - pos);
    80       }
    81       return regData;
     82      regressionData.Name = Path.GetFileName(path);
     83
     84      return regressionData;
    8285    }
    8386
Note: See TracChangeset for help on using the changeset viewer.