Changeset 9135 for branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear
- Timestamp:
- 01/09/13 16:27:12 (12 years ago)
- Location:
- branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/OneR.cs
r9119 r9135 84 84 string bestVariable = null; 85 85 Dictionary<double, double> bestSplits = null; 86 double missingValuesClass = double.NaN; 86 87 int correctClassified = 0; 87 88 … … 98 99 bool done = false; 99 100 int curRow = 0; 101 102 if (curRow < inputVariableValues.Length && Double.IsNaN(inputVariableValues[curRow])) { 103 while (curRow < inputVariableValues.Length && Double.IsNaN(inputVariableValues[curRow])) { 104 classCount[classValuesInDataset[curRow]] += 1; 105 curRow++; 106 } 107 if (ExistsDominatingClass(classCount, out dominatingClass)) { 108 missingValuesClass = dominatingClass; 109 } else { 110 missingValuesClass = GetRandomMaxClass(classCount, random); 111 } 112 correctClassified += classCount[missingValuesClass]; 113 classCount = PrepareClassCountDictionary(classValues); 114 } 100 115 while (curRow < inputVariableValues.Length) { 101 116 if (newBucket) { … … 108 123 } 109 124 curSplit = inputVariableValues[curRow]; 110 curRow = SetCurRow Correctly(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit);125 curRow = SetCurRowToEndOfSplit(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit); 111 126 newBucket = false; 112 127 } 113 114 128 115 129 if (ExistsDominatingClass(classCount, out dominatingClass)) { 116 130 while (curRow + 1 < classValuesInDataset.Length && 117 IsNextSplitStillDominationClass(curRow, inputVariableValues, classValuesInDataset, curSplit, dominatingClass)) { 118 // curRow + i < classValuesInDataset.Length && classValuesInDataset[curRow + i] == dominatingClass) { 119 curSplit = inputVariableValues[curRow + 1]; 120 classCount[classValuesInDataset[curRow + 1]] += 1; 131 IsNextSplitStillDominatingClass(curRow, inputVariableValues, classValuesInDataset, curSplit, dominatingClass)) { 121 132 curRow++; 122 curRow = SetCurRowCorrectly(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit); 133 curSplit = inputVariableValues[curRow]; 134 classCount[classValuesInDataset[curRow]] += 1; 135 curRow = SetCurRowToEndOfSplit(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit); 123 136 } 124 137 … … 144 157 curSplit = inputVariableValues[curRow]; 145 158 classCount[classValuesInDataset[curRow]] += 1; 146 curRow = SetCurRow Correctly(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit);159 curRow = SetCurRowToEndOfSplit(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit); 147 160 } 148 161 } … … 150 163 if (!done) { 151 164 curSplit = Double.PositiveInfinity; 152 153 IList<double> possibleClasses = new List<double>(); 154 int max = 0; 155 foreach (var item in classCount) { 156 if (max < item.Value) { 157 max = item.Value; 158 possibleClasses = new List<double>(); 159 possibleClasses.Add(item.Key); 160 } else { 161 possibleClasses.Add(item.Key); 162 } 163 } 164 int classindex = random.Next(possibleClasses.Count); 165 splits.Add(curSplit, possibleClasses[classindex]); 166 167 curCorrectClassified += classCount[possibleClasses[classindex]]; 165 double randomClass = GetRandomMaxClass(classCount, random); 166 splits.Add(curSplit, randomClass); 167 168 curCorrectClassified += classCount[randomClass]; 168 169 } 169 170 … … 177 178 Dictionary<double, double> mergedSplits = MergeSplits(bestSplits); 178 179 179 var model = new OneRClassificationModel(bestVariable, mergedSplits.Keys.ToArray(), mergedSplits.Values.ToArray() );180 var model = new OneRClassificationModel(bestVariable, mergedSplits.Keys.ToArray(), mergedSplits.Values.ToArray(), missingValuesClass); 180 181 var solution = new OneRClassificationSolution(model, (IClassificationProblemData)problemData.Clone()); 181 182 … … 183 184 } 184 185 185 private static bool IsNextSplitStillDominationClass(int curRow, double[] inputVariableValues, double[] classValuesInDataset, double curSplit, double dominatingClass) { 186 private static double GetRandomMaxClass(Dictionary<double, int> classCount, IRandom random) { 187 IList<double> possibleClasses = new List<double>(); 188 int max = 0; 189 foreach (var item in classCount) { 190 if (max < item.Value) { 191 max = item.Value; 192 possibleClasses = new List<double>(); 193 possibleClasses.Add(item.Key); 194 } else if (max == item.Value) { 195 possibleClasses.Add(item.Key); 196 } 197 } 198 int classindex = random.Next(possibleClasses.Count); 199 return possibleClasses[classindex]; 200 } 201 202 private static bool IsNextSplitStillDominatingClass(int curRow, double[] inputVariableValues, double[] classValuesInDataset, double curSplit, double dominatingClass) { 186 203 if (curRow >= classValuesInDataset.Length) { 187 204 return false; … … 205 222 } 206 223 207 private static int SetCurRowCorrectly(int curRow, double[] inputVariableValues, double[] classValuesInDataset, Dictionary<double, int> classCount, double curSplit) { 224 // needed if variable contains the same value several times 225 private static int SetCurRowToEndOfSplit(int curRow, double[] inputVariableValues, double[] classValuesInDataset, Dictionary<double, int> classCount, double curSplit) { 208 226 while (curRow + 1 < inputVariableValues.Length && inputVariableValues[curRow + 1] == curSplit) { 209 227 curRow++; -
branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/OneR/OneRClassificationModel.cs
r9119 r9135 50 50 } 51 51 52 [Storable] 53 protected double missingValuesClass; 54 public double MissingValuesClass { 55 get { return missingValuesClass; } 56 } 57 52 58 [StorableConstructor] 53 59 protected OneRClassificationModel(bool deserializing) : base(deserializing) { } … … 60 66 public override IDeepCloneable Clone(Cloner cloner) { return new OneRClassificationModel(this, cloner); } 61 67 62 public OneRClassificationModel(string variable, double[] splits, double[] classes )68 public OneRClassificationModel(string variable, double[] splits, double[] classes, double missingValuesClass = double.NaN) 63 69 : base() { 64 70 if (splits.Length != classes.Length) { … … 73 79 this.splits = splits; 74 80 this.classes = classes; 81 this.missingValuesClass = missingValuesClass; 75 82 } 76 83 … … 85 92 Array.Sort(values, rowsArray); 86 93 int curSplit = 0, curIndex = 0; 94 while (curIndex < values.Length && Double.IsNaN(values[curIndex])) { 95 estimated[curIndex] = MissingValuesClass; 96 curIndex++; 97 } 87 98 while (curSplit < Splits.Length) { 88 99 while (curIndex < values.Length && Splits[curSplit] > values[curIndex]) {
Note: See TracChangeset
for help on using the changeset viewer.