Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/09/13 16:27:12 (12 years ago)
Author:
sforsten
Message:

#1998:

  • OneR handles missing values separately
  • adapted OneRClassificationModelView to show the class of missing values
  • with a double-click on the row header in ClassificationSolutionComparisonView the selected solution opens in a new view
  • put a try catch block around linear discriminant analysis solution (it is only shown, if it doesn't throw an exception)
Location:
branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/OneR.cs

    r9119 r9135  
    8484      string bestVariable = null;
    8585      Dictionary<double, double> bestSplits = null;
     86      double missingValuesClass = double.NaN;
    8687      int correctClassified = 0;
    8788
     
    9899        bool done = false;
    99100        int curRow = 0;
     101
     102        if (curRow < inputVariableValues.Length && Double.IsNaN(inputVariableValues[curRow])) {
     103          while (curRow < inputVariableValues.Length && Double.IsNaN(inputVariableValues[curRow])) {
     104            classCount[classValuesInDataset[curRow]] += 1;
     105            curRow++;
     106          }
     107          if (ExistsDominatingClass(classCount, out dominatingClass)) {
     108            missingValuesClass = dominatingClass;
     109          } else {
     110            missingValuesClass = GetRandomMaxClass(classCount, random);
     111          }
     112          correctClassified += classCount[missingValuesClass];
     113          classCount = PrepareClassCountDictionary(classValues);
     114        }
    100115        while (curRow < inputVariableValues.Length) {
    101116          if (newBucket) {
     
    108123            }
    109124            curSplit = inputVariableValues[curRow];
    110             curRow = SetCurRowCorrectly(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit);
     125            curRow = SetCurRowToEndOfSplit(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit);
    111126            newBucket = false;
    112127          }
    113 
    114128
    115129          if (ExistsDominatingClass(classCount, out dominatingClass)) {
    116130            while (curRow + 1 < classValuesInDataset.Length &&
    117               IsNextSplitStillDominationClass(curRow, inputVariableValues, classValuesInDataset, curSplit, dominatingClass)) {
    118               // curRow + i < classValuesInDataset.Length && classValuesInDataset[curRow + i] == dominatingClass) {
    119               curSplit = inputVariableValues[curRow + 1];
    120               classCount[classValuesInDataset[curRow + 1]] += 1;
     131              IsNextSplitStillDominatingClass(curRow, inputVariableValues, classValuesInDataset, curSplit, dominatingClass)) {
    121132              curRow++;
    122               curRow = SetCurRowCorrectly(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit);
     133              curSplit = inputVariableValues[curRow];
     134              classCount[classValuesInDataset[curRow]] += 1;
     135              curRow = SetCurRowToEndOfSplit(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit);
    123136            }
    124137
     
    144157            curSplit = inputVariableValues[curRow];
    145158            classCount[classValuesInDataset[curRow]] += 1;
    146             curRow = SetCurRowCorrectly(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit);
     159            curRow = SetCurRowToEndOfSplit(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit);
    147160          }
    148161        }
     
    150163        if (!done) {
    151164          curSplit = Double.PositiveInfinity;
    152 
    153           IList<double> possibleClasses = new List<double>();
    154           int max = 0;
    155           foreach (var item in classCount) {
    156             if (max < item.Value) {
    157               max = item.Value;
    158               possibleClasses = new List<double>();
    159               possibleClasses.Add(item.Key);
    160             } else {
    161               possibleClasses.Add(item.Key);
    162             }
    163           }
    164           int classindex = random.Next(possibleClasses.Count);
    165           splits.Add(curSplit, possibleClasses[classindex]);
    166 
    167           curCorrectClassified += classCount[possibleClasses[classindex]];
     165          double randomClass = GetRandomMaxClass(classCount, random);
     166          splits.Add(curSplit, randomClass);
     167
     168          curCorrectClassified += classCount[randomClass];
    168169        }
    169170
     
    177178      Dictionary<double, double> mergedSplits = MergeSplits(bestSplits);
    178179
    179       var model = new OneRClassificationModel(bestVariable, mergedSplits.Keys.ToArray(), mergedSplits.Values.ToArray());
     180      var model = new OneRClassificationModel(bestVariable, mergedSplits.Keys.ToArray(), mergedSplits.Values.ToArray(), missingValuesClass);
    180181      var solution = new OneRClassificationSolution(model, (IClassificationProblemData)problemData.Clone());
    181182
     
    183184    }
    184185
    185     private static bool IsNextSplitStillDominationClass(int curRow, double[] inputVariableValues, double[] classValuesInDataset, double curSplit, double dominatingClass) {
     186    private static double GetRandomMaxClass(Dictionary<double, int> classCount, IRandom random) {
     187      IList<double> possibleClasses = new List<double>();
     188      int max = 0;
     189      foreach (var item in classCount) {
     190        if (max < item.Value) {
     191          max = item.Value;
     192          possibleClasses = new List<double>();
     193          possibleClasses.Add(item.Key);
     194        } else if (max == item.Value) {
     195          possibleClasses.Add(item.Key);
     196        }
     197      }
     198      int classindex = random.Next(possibleClasses.Count);
     199      return possibleClasses[classindex];
     200    }
     201
     202    private static bool IsNextSplitStillDominatingClass(int curRow, double[] inputVariableValues, double[] classValuesInDataset, double curSplit, double dominatingClass) {
    186203      if (curRow >= classValuesInDataset.Length) {
    187204        return false;
     
    205222    }
    206223
    207     private static int SetCurRowCorrectly(int curRow, double[] inputVariableValues, double[] classValuesInDataset, Dictionary<double, int> classCount, double curSplit) {
     224    // needed if variable contains the same value several times
     225    private static int SetCurRowToEndOfSplit(int curRow, double[] inputVariableValues, double[] classValuesInDataset, Dictionary<double, int> classCount, double curSplit) {
    208226      while (curRow + 1 < inputVariableValues.Length && inputVariableValues[curRow + 1] == curSplit) {
    209227        curRow++;
  • branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/OneR/OneRClassificationModel.cs

    r9119 r9135  
    5050    }
    5151
     52    [Storable]
     53    protected double missingValuesClass;
     54    public double MissingValuesClass {
     55      get { return missingValuesClass; }
     56    }
     57
    5258    [StorableConstructor]
    5359    protected OneRClassificationModel(bool deserializing) : base(deserializing) { }
     
    6066    public override IDeepCloneable Clone(Cloner cloner) { return new OneRClassificationModel(this, cloner); }
    6167
    62     public OneRClassificationModel(string variable, double[] splits, double[] classes)
     68    public OneRClassificationModel(string variable, double[] splits, double[] classes, double missingValuesClass = double.NaN)
    6369      : base() {
    6470      if (splits.Length != classes.Length) {
     
    7379      this.splits = splits;
    7480      this.classes = classes;
     81      this.missingValuesClass = missingValuesClass;
    7582    }
    7683
     
    8592      Array.Sort(values, rowsArray);
    8693      int curSplit = 0, curIndex = 0;
     94      while (curIndex < values.Length && Double.IsNaN(values[curIndex])) {
     95        estimated[curIndex] = MissingValuesClass;
     96        curIndex++;
     97      }
    8798      while (curSplit < Splits.Length) {
    8899        while (curIndex < values.Length && Splits[curSplit] > values[curIndex]) {
Note: See TracChangeset for help on using the changeset viewer.