Changeset 6186 for trunk/sources/HeuristicLab.Problems.DataAnalysis
- Timestamp:
- 05/11/11 17:59:18 (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Problems.DataAnalysis/3.4/Implementation/Classification/ClassificationProblemData.cs
r5847 r6186 37 37 private const string ClassNamesParameterName = "ClassNames"; 38 38 private const string ClassificationPenaltiesParameterName = "ClassificationPenalties"; 39 private const int MaximumNumberOfClass = 100; 40 private const int InspectedRowsToDetermineTargets = 500; 39 41 40 42 #region default data … … 252 254 public ClassificationProblemData(Dataset dataset, IEnumerable<string> allowedInputVariables, string targetVariable) 253 255 : base(dataset, allowedInputVariables) { 254 var variables = InputVariables.Select(x => x.AsReadOnly()).ToList(); 255 Parameters.Add(new ConstrainedValueParameter<StringValue>(TargetVariableParameterName, new ItemSet<StringValue>(variables), variables.Where(x => x.Value == targetVariable).First())); 256 var validTargetVariableValues = CheckVariablesForPossibleTargetVariables(dataset).Select(x => new StringValue(x).AsReadOnly()).ToList(); 257 var target = validTargetVariableValues.Where(x => x.Value == targetVariable).DefaultIfEmpty(validTargetVariableValues.First()).First(); 258 259 Parameters.Add(new ConstrainedValueParameter<StringValue>(TargetVariableParameterName, new ItemSet<StringValue>(validTargetVariableValues), target)); 256 260 Parameters.Add(new FixedValueParameter<StringMatrix>(ClassNamesParameterName, "")); 257 261 Parameters.Add(new FixedValueParameter<DoubleMatrix>(ClassificationPenaltiesParameterName, "")); … … 260 264 RegisterParameterEvents(); 261 265 } 266 267 private static IEnumerable<string> CheckVariablesForPossibleTargetVariables(Dataset dataset) { 268 var validTargetVariables = from v in dataset.VariableNames 269 let DistinctValues = dataset.Rows > InspectedRowsToDetermineTargets ? dataset.GetVariableValues(v, 0, InspectedRowsToDetermineTargets).Distinct().Count() 270 : dataset.GetVariableValues(v).Distinct().Count() 271 where DistinctValues < MaximumNumberOfClass 272 select v; 273 274 if (!validTargetVariables.Any()) 275 throw new ArgumentException("Import of classification problem data was not successfull, because no target variable was found." + 276 " A target variable must have at most " + MaximumNumberOfClass + " distinct values to be applicable to classification."); 277 return validTargetVariables; 278 } 279 262 280 263 281 private void ResetTargetVariableDependentMembers() {
Note: See TracChangeset
for help on using the changeset viewer.