Free cookie consent management tool by TermsFeed Policy Generator

Changeset 6223 for trunk


Ignore:
Timestamp:
05/17/11 14:55:51 (14 years ago)
Author:
gkronber
Message:

#1524 made some additional changes to restricting the set of allowed target variables.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationProblemData.cs

    r6186 r6223  
    3737    private const string ClassNamesParameterName = "ClassNames";
    3838    private const string ClassificationPenaltiesParameterName = "ClassificationPenalties";
    39     private const int MaximumNumberOfClass = 100;
     39    private const int MaximumNumberOfClasses = 20;
    4040    private const int InspectedRowsToDetermineTargets = 500;
    4141
     
    266266
    267267    private static IEnumerable<string> CheckVariablesForPossibleTargetVariables(Dataset dataset) {
     268      int maxSamples = Math.Min(InspectedRowsToDetermineTargets, dataset.Rows);
    268269      var validTargetVariables = from v in dataset.VariableNames
    269                                  let DistinctValues = dataset.Rows > InspectedRowsToDetermineTargets ? dataset.GetVariableValues(v, 0, InspectedRowsToDetermineTargets).Distinct().Count()
    270                                                                         : dataset.GetVariableValues(v).Distinct().Count()
    271                                  where DistinctValues < MaximumNumberOfClass
     270                                 let DistinctValues = dataset.GetVariableValues(v)
     271                                   .Take(maxSamples)
     272                                   .Distinct()
     273                                   .Count()
     274                                 where DistinctValues < MaximumNumberOfClasses
    272275                                 select v;
    273276
    274277      if (!validTargetVariables.Any())
    275         throw new ArgumentException("Import of classification problem data was not successfull, because no target variable was found." +
    276           " A target variable must have at most " + MaximumNumberOfClass + " distinct values to be applicable to classification.");
     278        throw new ArgumentException("Import of classification problem data was not successful, because no target variable was found." +
     279          " A target variable must have at most " + MaximumNumberOfClasses + " distinct values to be applicable to classification.");
    277280      return validTargetVariables;
    278281    }
Note: See TracChangeset for help on using the changeset viewer.