Changeset 10570 for branches/ClassificationModelComparison
- Timestamp:
- 03/11/14 15:41:18 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/OneRTest.cs
r10569 r10570 67 67 List<Split> bestSplits = null; 68 68 string bestVariable = string.Empty; 69 double bestMissingValuesClass = double.NaN; 70 var classValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices); 69 71 70 var classValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices);71 72 foreach (var variable in problemData.AllowedInputVariables) { 72 73 var inputValues = problemData.Dataset.GetDoubleValues(variable, problemData.TrainingIndices); 73 74 var samples = inputValues.Zip(classValues, (i, v) => new Sample(i, v)).OrderBy(s => s.inputValue); 75 76 var missingValuesDistribution = samples.Where(s => double.IsNaN(s.inputValue)).GroupBy(s => s.classValue).ToDictionary(s => s.Key, s => s.Count()).MaxItems(s => s.Value).FirstOrDefault(); 74 77 75 78 //calculate class distributions for all distinct inputValues … … 77 80 List<double> thresholds = new List<double>(); 78 81 double lastValue = double.NaN; 79 foreach (var sample in samples ) {82 foreach (var sample in samples.Where(s => !double.IsNaN(s.inputValue))) { 80 83 if (sample.inputValue > lastValue || double.IsNaN(lastValue)) { 81 84 if (!double.IsNaN(lastValue)) thresholds.Add((lastValue + sample.inputValue) / 2); … … 96 99 for (int i = 1; i < classDistributions.Count; i++) { 97 100 var samplesInSplit = distribution.Max(d => d.Value); 98 //join splits if t oo few sample in split or the distributions has the same maximum class value101 //join splits if there are too few samples in the split or the distributions has the same maximum class value as the current split 99 102 if (samplesInSplit < minBucketSize || 100 103 classDistributions[i].MaxItems(d => d.Value).Select(d => d.Key).Contains( … … 113 116 int correctClassified = 0; 114 117 int splitIndex = 0; 115 foreach (var sample in samples ) {118 foreach (var sample in samples.Where(s => !double.IsNaN(s.inputValue))) { 116 119 while (sample.inputValue >= splits[splitIndex].thresholdValue) 117 120 splitIndex++; 118 121 correctClassified += sample.classValue == splits[splitIndex].classValue ? 1 : 0; 119 122 } 123 correctClassified += missingValuesDistribution.Value; 120 124 121 125 if (correctClassified > bestClassified) { … … 123 127 bestSplits = splits; 124 128 bestVariable = variable; 129 bestMissingValuesClass = missingValuesDistribution.Value == 0 ? double.NaN : missingValuesDistribution.Key; 125 130 } 126 131 } … … 134 139 } 135 140 136 var model = new OneRClassificationModel(bestVariable, bestSplits.Select(s => s.thresholdValue).ToArray(), bestSplits.Select(s => s.classValue).ToArray(), 0);141 var model = new OneRClassificationModel(bestVariable, bestSplits.Select(s => s.thresholdValue).ToArray(), bestSplits.Select(s => s.classValue).ToArray(), bestMissingValuesClass); 137 142 var solution = new OneRClassificationSolution(model, (IClassificationProblemData)problemData.Clone()); 138 143
Note: See TracChangeset
for help on using the changeset viewer.