Changeset 6195 for branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationProblemData.cs
- Timestamp:
- 05/14/11 16:45:46 (13 years ago)
- Location:
- branches/histogram
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/histogram
-
branches/histogram/HeuristicLab.Problems.DataAnalysis
- Property svn:mergeinfo changed
/branches/DataAnalysis SolutionEnsembles/HeuristicLab.Problems.DataAnalysis (added) merged: 5816 /trunk/sources/HeuristicLab.Problems.DataAnalysis merged: 6092,6095,6099,6184,6186
- Property svn:mergeinfo changed
-
branches/histogram/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationProblemData.cs
r5847 r6195 37 37 private const string ClassNamesParameterName = "ClassNames"; 38 38 private const string ClassificationPenaltiesParameterName = "ClassificationPenalties"; 39 private const int MaximumNumberOfClass = 100; 40 private const int InspectedRowsToDetermineTargets = 500; 39 41 40 42 #region default data … … 252 254 public ClassificationProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, string targetVariable) 253 255 : base(dataset, allowedInputVariables) { 254 var variables = InputVariables.Select(x => x.AsReadOnly()).ToList(); 255 Parameters.Add(new ConstrainedValueParameter<StringValue>(TargetVariableParameterName, new ItemSet<StringValue>(variables), variables.Where(x => x.Value == targetVariable).First())); 256 var validTargetVariableValues = CheckVariablesForPossibleTargetVariables(dataset).Select(x => new StringValue(x).AsReadOnly()).ToList(); 257 var target = validTargetVariableValues.Where(x => x.Value == targetVariable).DefaultIfEmpty(validTargetVariableValues.First()).First(); 258 259 Parameters.Add(new ConstrainedValueParameter<StringValue>(TargetVariableParameterName, new ItemSet<StringValue>(validTargetVariableValues), target)); 256 260 Parameters.Add(new FixedValueParameter<StringMatrix>(ClassNamesParameterName, "")); 257 261 Parameters.Add(new FixedValueParameter<DoubleMatrix>(ClassificationPenaltiesParameterName, "")); … … 260 264 RegisterParameterEvents(); 261 265 } 266 267 private static IEnumerable<string> CheckVariablesForPossibleTargetVariables(Dataset dataset) { 268 var validTargetVariables = from v in dataset.VariableNames 269 let DistinctValues = dataset.Rows > InspectedRowsToDetermineTargets ? dataset.GetVariableValues(v, 0, InspectedRowsToDetermineTargets).Distinct().Count() 270 : dataset.GetVariableValues(v).Distinct().Count() 271 where DistinctValues < MaximumNumberOfClass 272 select v; 273 274 if (!validTargetVariables.Any()) 275 throw new ArgumentException("Import of classification problem data was not successfull, because no target variable was found." + 276 " A target variable must have at most " + MaximumNumberOfClass + " distinct values to be applicable to classification."); 277 return validTargetVariables; 278 } 279 262 280 263 281 private void ResetTargetVariableDependentMembers() {
Note: See TracChangeset
for help on using the changeset viewer.