Changeset 10982


Ignore:
Timestamp:
06/11/14 13:38:58 (6 years ago)
Author:
rstoll
Message:
  • ProblemCreator did not take into account:

non double input variables
treated target variable as an additional input variable
did not consider constant input variables

Location:
branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.4
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.4/Implementations/CorrelationMatrixContent.cs

    r10978 r10982  
    5151      : base(original, cloner) {
    5252    }
     53
    5354    public override IDeepCloneable Clone(Cloner cloner) {
    5455      return new CorrelationMatrixContent(this, cloner);
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.4/ProblemDataCreator.cs

    r10922 r10982  
    2222using System;
    2323using System.Collections.Generic;
     24using HeuristicLab.Common;
    2425using HeuristicLab.Problems.DataAnalysis;
    2526using HeuristicLab.Problems.DataAnalysis.Transformations;
     
    3536    private Dataset exporteDataset;
    3637
    37     private IEnumerable<string> InputVariables { get { return context.Data.VariableNames; } }
    3838    private IList<ITransformation> Transformations { get { return context.Data.Transformations; } }
    3939
     
    6565      var targetVariable = oldProblemData.TargetVariable;
    6666      // target variable must be double and must exist in the new dataset
    67       return new RegressionProblemData(ExportedDataset, InputVariables, targetVariable, Transformations);
     67      return new RegressionProblemData(ExportedDataset, GetDoubleInputVariables(targetVariable), targetVariable, Transformations);
    6868    }
    6969
     
    7171      var targetVariable = oldProblemData.TargetVariable;
    7272      // target variable must be double and must exist in the new dataset
    73       return new ClassificationProblemData(ExportedDataset, InputVariables, targetVariable, Transformations);
     73      return new ClassificationProblemData(ExportedDataset, GetDoubleInputVariables(targetVariable), targetVariable, Transformations);
    7474    }
    7575
    7676    private IDataAnalysisProblemData CreateClusteringData(ClusteringProblemData oldProblemData) {
    77       return new ClusteringProblemData(ExportedDataset, InputVariables, Transformations);
     77      return new ClusteringProblemData(ExportedDataset, GetDoubleInputVariables(String.Empty), Transformations);
    7878    }
    7979
     
    8686      problemData.TestPartition.End = ppData.TestPartition.End;
    8787    }
     88
     89    private IEnumerable<string> GetDoubleInputVariables(string targetVariable) {
     90      var variableNames = new List<string>();
     91      for (int i = 0; i < context.Data.Columns; ++i) {
     92        var variableName = context.Data.GetVariableName(i);
     93        if (context.Data.IsType<double>(i)
     94          && variableName != targetVariable
     95          && IsNotConstantInputVariable(context.Data.GetValues<double>(i))) {
     96
     97          variableNames.Add(variableName);
     98        }
     99      }
     100      return variableNames;
     101    }
     102
     103    private bool IsNotConstantInputVariable(IList<double> list) {
     104      return context.Data.TrainingPartition.End - context.Data.TrainingPartition.Start > 1 || list.Range() > 0;
     105    }
    88106  }
    89107}
Note: See TracChangeset for help on using the changeset viewer.