Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
01/07/13 17:33:56 (12 years ago)
Author:
sforsten
Message:

#1998:

  • added OneRClassificationModelView
  • added ClassificationSolutionComparisonView
  • added several calculators (ConfusionMatrixCalculator, FOneScoreCalculator, MatthewsCorrelationCoefficientCalculator)
  • fixed bug in OneR
  • added StorableClass and Item attribute to several classes
Location:
branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/OneR.cs

    r9074 r9119  
    9595        double curSplit = Double.NegativeInfinity;
    9696        Dictionary<double, double> splits = new Dictionary<double, double>();
    97         int curBucketSize = 0;
     97        bool newBucket = true;
    9898        bool done = false;
    99         for (int curRow = 0; curRow < inputVariableValues.Length; curRow++) {
    100           if (curBucketSize < minBucketSize) {
    101             curSplit = inputVariableValues[curRow];
    102             classCount[classValuesInDataset[curRow]] += 1;
    103             curBucketSize++;
    104             continue;
    105           }
     99        int curRow = 0;
     100        while (curRow < inputVariableValues.Length) {
     101          if (newBucket) {
     102            for (int i = 0; i < minBucketSize && curRow + i < inputVariableValues.Length; i++) {
     103              classCount[classValuesInDataset[curRow + i]] += 1;
     104            }
     105            curRow += minBucketSize;
     106            if (curRow >= inputVariableValues.Length) {
     107              break;
     108            }
     109            curSplit = inputVariableValues[curRow];
     110            curRow = SetCurRowCorrectly(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit);
     111            newBucket = false;
     112          }
     113
    106114
    107115          if (ExistsDominatingClass(classCount, out dominatingClass)) {
    108             int i = 1;
    109             while (curRow + i < classValuesInDataset.Length
    110               && classValuesInDataset[curRow + i] == dominatingClass) {
    111               curSplit = inputVariableValues[curRow + i];
    112               classCount[classValuesInDataset[curRow]] += 1;
    113               i++;
     116            while (curRow + 1 < classValuesInDataset.Length &&
     117              IsNextSplitStillDominationClass(curRow, inputVariableValues, classValuesInDataset, curSplit, dominatingClass)) {
     118              // curRow + i < classValuesInDataset.Length && classValuesInDataset[curRow + i] == dominatingClass) {
     119              curSplit = inputVariableValues[curRow + 1];
     120              classCount[classValuesInDataset[curRow + 1]] += 1;
     121              curRow++;
     122              curRow = SetCurRowCorrectly(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit);
    114123            }
    115124
    116125            curCorrectClassified += classCount[dominatingClass];
    117             done = curRow + i >= inputVariableValues.Length;
     126            done = curRow >= inputVariableValues.Length - 1;
    118127
    119128            if (done) {
     
    123132            }
    124133
     134            curRow++;
    125135            //intervals exclude end
    126             curSplit = inputVariableValues[curRow + i];
     136            curSplit = inputVariableValues[curRow];
    127137            splits.Add(curSplit, dominatingClass);
    128138
    129             //intervals exclude start
    130             curSplit = inputVariableValues[curRow + i - 1];
     139            //intervals include start
     140            curSplit = inputVariableValues[curRow];
    131141            classCount = PrepareClassCountDictionary(classValues);
    132             curBucketSize = 0;
    133             curRow += i - 1;
    134 
     142            newBucket = true;
    135143          } else {
    136144            curSplit = inputVariableValues[curRow];
    137145            classCount[classValuesInDataset[curRow]] += 1;
    138             curBucketSize++;
     146            curRow = SetCurRowCorrectly(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit);
    139147          }
    140148        }
     
    173181
    174182      return solution;
     183    }
     184
     185    private static bool IsNextSplitStillDominationClass(int curRow, double[] inputVariableValues, double[] classValuesInDataset, double curSplit, double dominatingClass) {
     186      if (curRow >= classValuesInDataset.Length) {
     187        return false;
     188      }
     189      double nextSplit = inputVariableValues[curRow + 1];
     190      int i = 1;
     191      while (curRow + i < classValuesInDataset.Length
     192        && inputVariableValues[curRow + i] == nextSplit
     193        && classValuesInDataset[curRow + i] == dominatingClass) {
     194        i++;
     195      }
     196      if (curRow + i >= classValuesInDataset.Length) {
     197        return true;
     198      }
     199      if (inputVariableValues[curRow + i] != nextSplit) {
     200        return true;
     201      }
     202      // the next split would also contain values of a class which
     203      // is not dominating (classValuesInDataset[curRow + i] != dominatingClass)
     204      return false;
     205    }
     206
     207    private static int SetCurRowCorrectly(int curRow, double[] inputVariableValues, double[] classValuesInDataset, Dictionary<double, int> classCount, double curSplit) {
     208      while (curRow + 1 < inputVariableValues.Length && inputVariableValues[curRow + 1] == curSplit) {
     209        curRow++;
     210        classCount[classValuesInDataset[curRow]] += 1;
     211      }
     212      return curRow;
    175213    }
    176214
  • branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/OneR/OneRClassificationModel.cs

    r9074 r9119  
    2929
    3030namespace HeuristicLab.Algorithms.DataAnalysis {
     31  [StorableClass]
     32  [Item("1R Classification Model", "A model that uses intervals for one variable to determine the class.")]
    3133  public class OneRClassificationModel : NamedItem, IClassificationModel {
    3234    [Storable]
     
    5254    protected OneRClassificationModel(OneRClassificationModel original, Cloner cloner)
    5355      : base(original, cloner) {
    54       this.splits = original.splits;
     56      this.variable = (string)original.variable;
     57      this.splits = (double[])original.splits.Clone();
     58      this.classes = (double[])original.classes.Clone();
    5559    }
    5660    public override IDeepCloneable Clone(Cloner cloner) { return new OneRClassificationModel(this, cloner); }
     
    5862    public OneRClassificationModel(string variable, double[] splits, double[] classes)
    5963      : base() {
     64      if (splits.Length != classes.Length) {
     65        throw new ArgumentException("Number of splits and classes has to be equal.");
     66      }
    6067      if (!Double.IsPositiveInfinity(splits[splits.Length - 1])) {
    6168        throw new ArgumentException("Last split has to be double.PositiveInfinity, so that all values are covered.");
  • branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/OneR/OneRClassificationSolution.cs

    r9074 r9119  
    2121
    2222using HeuristicLab.Common;
     23using HeuristicLab.Core;
    2324using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    2425using HeuristicLab.Problems.DataAnalysis;
    2526
    2627namespace HeuristicLab.Algorithms.DataAnalysis {
     28  [StorableClass]
     29  [Item(Name = "1R Classification Solution", Description = "Represents a 1R classification solution (model + data).")]
    2730  public class OneRClassificationSolution : ClassificationSolution {
    28     public new ConstantClassificationModel Model {
    29       get { return (ConstantClassificationModel)base.Model; }
     31    public new OneRClassificationModel Model {
     32      get { return (OneRClassificationModel)base.Model; }
    3033      set { base.Model = value; }
    3134    }
Note: See TracChangeset for help on using the changeset viewer.