Free cookie consent management tool by TermsFeed Policy Generator

Changeset 9135


Ignore:
Timestamp:
01/09/13 16:27:12 (12 years ago)
Author:
sforsten
Message:

#1998:

  • OneR handles missing values separately
  • adapted OneRClassificationModelView to show the class of missing values
  • with a double-click on the row header in ClassificationSolutionComparisonView the selected solution opens in a new view
  • put a try catch block around linear discriminant analysis solution (it is only shown, if it doesn't throw an exception)
Location:
branches/ClassificationModelComparison
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis.Views/3.4/OneRClassificationModelView.Designer.cs

    r9119 r9135  
    4646    private void InitializeComponent() {
    4747      this.dataGridView = new System.Windows.Forms.DataGridView();
    48       this.variableLabel = new System.Windows.Forms.Label();
    4948      this.intervalstart = new System.Windows.Forms.DataGridViewTextBoxColumn();
    5049      this.intervalend = new System.Windows.Forms.DataGridViewTextBoxColumn();
    5150      this.classcolumn = new System.Windows.Forms.DataGridViewTextBoxColumn();
     51      this.variableLabel = new System.Windows.Forms.Label();
     52      this.MissingValuesClassLabel = new System.Windows.Forms.Label();
    5253      ((System.ComponentModel.ISupportInitialize)(this.dataGridView)).BeginInit();
    5354      this.SuspendLayout();
     
    6869      this.dataGridView.Name = "dataGridView";
    6970      this.dataGridView.ReadOnly = true;
    70       this.dataGridView.Size = new System.Drawing.Size(361, 196);
     71      this.dataGridView.Size = new System.Drawing.Size(389, 230);
    7172      this.dataGridView.TabIndex = 0;
    72       //
    73       // variableLabel
    74       //
    75       this.variableLabel.AutoSize = true;
    76       this.variableLabel.Location = new System.Drawing.Point(3, 10);
    77       this.variableLabel.Name = "variableLabel";
    78       this.variableLabel.Size = new System.Drawing.Size(48, 13);
    79       this.variableLabel.TabIndex = 1;
    80       this.variableLabel.Text = "Variable:";
    8173      //
    8274      // intervalstart
     
    9890      this.classcolumn.ReadOnly = true;
    9991      //
     92      // variableLabel
     93      //
     94      this.variableLabel.AutoSize = true;
     95      this.variableLabel.Location = new System.Drawing.Point(3, 10);
     96      this.variableLabel.Name = "variableLabel";
     97      this.variableLabel.Size = new System.Drawing.Size(48, 13);
     98      this.variableLabel.TabIndex = 1;
     99      this.variableLabel.Text = "Variable:";
     100      //
     101      // MissingValuesClassLabel
     102      //
     103      this.MissingValuesClassLabel.Anchor = System.Windows.Forms.AnchorStyles.Top;
     104      this.MissingValuesClassLabel.AutoSize = true;
     105      this.MissingValuesClassLabel.Location = new System.Drawing.Point(165, 10);
     106      this.MissingValuesClassLabel.Name = "MissingValuesClassLabel";
     107      this.MissingValuesClassLabel.Size = new System.Drawing.Size(118, 13);
     108      this.MissingValuesClassLabel.TabIndex = 2;
     109      this.MissingValuesClassLabel.Text = "Class of missing values:";
     110      //
    100111      // OneRClassificationModelView
    101112      //
    102113      this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
    103114      this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
     115      this.Controls.Add(this.MissingValuesClassLabel);
    104116      this.Controls.Add(this.variableLabel);
    105117      this.Controls.Add(this.dataGridView);
    106118      this.Name = "OneRClassificationModelView";
    107       this.Size = new System.Drawing.Size(367, 231);
     119      this.Size = new System.Drawing.Size(395, 265);
    108120      ((System.ComponentModel.ISupportInitialize)(this.dataGridView)).EndInit();
    109121      this.ResumeLayout(false);
     
    119131    private System.Windows.Forms.DataGridViewTextBoxColumn intervalend;
    120132    private System.Windows.Forms.DataGridViewTextBoxColumn classcolumn;
     133    private System.Windows.Forms.Label MissingValuesClassLabel;
    121134  }
    122135}
  • branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis.Views/3.4/OneRClassificationModelView.cs

    r9119 r9135  
    4949        if (Content == null) {
    5050          variableLabel.Text = "Variable: ";
     51          MissingValuesClassLabel.Text = "Class of missing values: ";
    5152          dataGridView.Rows.Clear();
    5253        } else {
    5354          variableLabel.Text = "Variable: " + Content.Variable;
     55          MissingValuesClassLabel.Text = "Class of missing values: " + Content.MissingValuesClass;
    5456
    5557          dataGridView.RowCount = Content.Classes.Length;
  • branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/OneR.cs

    r9119 r9135  
    8484      string bestVariable = null;
    8585      Dictionary<double, double> bestSplits = null;
     86      double missingValuesClass = double.NaN;
    8687      int correctClassified = 0;
    8788
     
    9899        bool done = false;
    99100        int curRow = 0;
     101
     102        if (curRow < inputVariableValues.Length && Double.IsNaN(inputVariableValues[curRow])) {
     103          while (curRow < inputVariableValues.Length && Double.IsNaN(inputVariableValues[curRow])) {
     104            classCount[classValuesInDataset[curRow]] += 1;
     105            curRow++;
     106          }
     107          if (ExistsDominatingClass(classCount, out dominatingClass)) {
     108            missingValuesClass = dominatingClass;
     109          } else {
     110            missingValuesClass = GetRandomMaxClass(classCount, random);
     111          }
     112          correctClassified += classCount[missingValuesClass];
     113          classCount = PrepareClassCountDictionary(classValues);
     114        }
    100115        while (curRow < inputVariableValues.Length) {
    101116          if (newBucket) {
     
    108123            }
    109124            curSplit = inputVariableValues[curRow];
    110             curRow = SetCurRowCorrectly(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit);
     125            curRow = SetCurRowToEndOfSplit(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit);
    111126            newBucket = false;
    112127          }
    113 
    114128
    115129          if (ExistsDominatingClass(classCount, out dominatingClass)) {
    116130            while (curRow + 1 < classValuesInDataset.Length &&
    117               IsNextSplitStillDominationClass(curRow, inputVariableValues, classValuesInDataset, curSplit, dominatingClass)) {
    118               // curRow + i < classValuesInDataset.Length && classValuesInDataset[curRow + i] == dominatingClass) {
    119               curSplit = inputVariableValues[curRow + 1];
    120               classCount[classValuesInDataset[curRow + 1]] += 1;
     131              IsNextSplitStillDominatingClass(curRow, inputVariableValues, classValuesInDataset, curSplit, dominatingClass)) {
    121132              curRow++;
    122               curRow = SetCurRowCorrectly(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit);
     133              curSplit = inputVariableValues[curRow];
     134              classCount[classValuesInDataset[curRow]] += 1;
     135              curRow = SetCurRowToEndOfSplit(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit);
    123136            }
    124137
     
    144157            curSplit = inputVariableValues[curRow];
    145158            classCount[classValuesInDataset[curRow]] += 1;
    146             curRow = SetCurRowCorrectly(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit);
     159            curRow = SetCurRowToEndOfSplit(curRow, inputVariableValues, classValuesInDataset, classCount, curSplit);
    147160          }
    148161        }
     
    150163        if (!done) {
    151164          curSplit = Double.PositiveInfinity;
    152 
    153           IList<double> possibleClasses = new List<double>();
    154           int max = 0;
    155           foreach (var item in classCount) {
    156             if (max < item.Value) {
    157               max = item.Value;
    158               possibleClasses = new List<double>();
    159               possibleClasses.Add(item.Key);
    160             } else {
    161               possibleClasses.Add(item.Key);
    162             }
    163           }
    164           int classindex = random.Next(possibleClasses.Count);
    165           splits.Add(curSplit, possibleClasses[classindex]);
    166 
    167           curCorrectClassified += classCount[possibleClasses[classindex]];
     165          double randomClass = GetRandomMaxClass(classCount, random);
     166          splits.Add(curSplit, randomClass);
     167
     168          curCorrectClassified += classCount[randomClass];
    168169        }
    169170
     
    177178      Dictionary<double, double> mergedSplits = MergeSplits(bestSplits);
    178179
    179       var model = new OneRClassificationModel(bestVariable, mergedSplits.Keys.ToArray(), mergedSplits.Values.ToArray());
     180      var model = new OneRClassificationModel(bestVariable, mergedSplits.Keys.ToArray(), mergedSplits.Values.ToArray(), missingValuesClass);
    180181      var solution = new OneRClassificationSolution(model, (IClassificationProblemData)problemData.Clone());
    181182
     
    183184    }
    184185
    185     private static bool IsNextSplitStillDominationClass(int curRow, double[] inputVariableValues, double[] classValuesInDataset, double curSplit, double dominatingClass) {
     186    private static double GetRandomMaxClass(Dictionary<double, int> classCount, IRandom random) {
     187      IList<double> possibleClasses = new List<double>();
     188      int max = 0;
     189      foreach (var item in classCount) {
     190        if (max < item.Value) {
     191          max = item.Value;
     192          possibleClasses = new List<double>();
     193          possibleClasses.Add(item.Key);
     194        } else if (max == item.Value) {
     195          possibleClasses.Add(item.Key);
     196        }
     197      }
     198      int classindex = random.Next(possibleClasses.Count);
     199      return possibleClasses[classindex];
     200    }
     201
     202    private static bool IsNextSplitStillDominatingClass(int curRow, double[] inputVariableValues, double[] classValuesInDataset, double curSplit, double dominatingClass) {
    186203      if (curRow >= classValuesInDataset.Length) {
    187204        return false;
     
    205222    }
    206223
    207     private static int SetCurRowCorrectly(int curRow, double[] inputVariableValues, double[] classValuesInDataset, Dictionary<double, int> classCount, double curSplit) {
     224    // needed if variable contains the same value several times
     225    private static int SetCurRowToEndOfSplit(int curRow, double[] inputVariableValues, double[] classValuesInDataset, Dictionary<double, int> classCount, double curSplit) {
    208226      while (curRow + 1 < inputVariableValues.Length && inputVariableValues[curRow + 1] == curSplit) {
    209227        curRow++;
  • branches/ClassificationModelComparison/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/OneR/OneRClassificationModel.cs

    r9119 r9135  
    5050    }
    5151
     52    [Storable]
     53    protected double missingValuesClass;
     54    public double MissingValuesClass {
     55      get { return missingValuesClass; }
     56    }
     57
    5258    [StorableConstructor]
    5359    protected OneRClassificationModel(bool deserializing) : base(deserializing) { }
     
    6066    public override IDeepCloneable Clone(Cloner cloner) { return new OneRClassificationModel(this, cloner); }
    6167
    62     public OneRClassificationModel(string variable, double[] splits, double[] classes)
     68    public OneRClassificationModel(string variable, double[] splits, double[] classes, double missingValuesClass = double.NaN)
    6369      : base() {
    6470      if (splits.Length != classes.Length) {
     
    7379      this.splits = splits;
    7480      this.classes = classes;
     81      this.missingValuesClass = missingValuesClass;
    7582    }
    7683
     
    8592      Array.Sort(values, rowsArray);
    8693      int curSplit = 0, curIndex = 0;
     94      while (curIndex < values.Length && Double.IsNaN(values[curIndex])) {
     95        estimated[curIndex] = MissingValuesClass;
     96        curIndex++;
     97      }
    8798      while (curSplit < Splits.Length) {
    8899        while (curIndex < values.Length && Splits[curSplit] > values[curIndex]) {
  • branches/ClassificationModelComparison/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/ClassificationSolutionComparisonView.Designer.cs

    r9119 r9135  
    6060      this.dataGridView.Size = new System.Drawing.Size(418, 257);
    6161      this.dataGridView.TabIndex = 0;
     62      this.dataGridView.MouseDoubleClick += new System.Windows.Forms.MouseEventHandler(this.dataGridView_MouseDoubleClick);
    6263      //
    6364      // ClassificationSolutionComparisonView
  • branches/ClassificationModelComparison/HeuristicLab.Problems.DataAnalysis.Views/3.4/Classification/ClassificationSolutionComparisonView.cs

    r9119 r9135  
    3333  [Content(typeof(IClassificationSolution))]
    3434  public partial class ClassificationSolutionComparisonView : DataAnalysisSolutionEvaluationView {
     35    private List<IClassificationSolution> solutions;
     36
    3537    public ClassificationSolutionComparisonView() {
    3638      InitializeComponent();
     
    7577          dataGridView.Rows.Clear();
    7678          dataGridView.Columns.Clear();
     79          solutions.Clear();
    7780        } else {
    7881
    7982          IClassificationProblemData problemData = Content.ProblemData;
    8083          Dataset dataset = problemData.Dataset;
    81           List<IClassificationSolution> solutions = new List<IClassificationSolution>() { Content };
     84          solutions = new List<IClassificationSolution>() { Content };
    8285          solutions.AddRange(GenerateClassificationSolutions(problemData));
    8386
     
    126129
    127130    private IEnumerable<IClassificationSolution> GenerateClassificationSolutions(IClassificationProblemData problemData) {
     131      var solutions = new List<IClassificationSolution>();
    128132      var zeroR = ZeroR.CreateZeroRSolution(problemData);
    129133      zeroR.Name = "0R Classification Solution";
     134      solutions.Add(zeroR);
    130135      var oneR = OneR.CreateOneRSolution(problemData, 6, new FastRandom());
    131136      oneR.Name = "1R Classification Solution";
    132       var lda = LinearDiscriminantAnalysis.CreateLinearDiscriminantAnalysisSolution(problemData);
    133       lda.Name = "Linear Discriminant Analysis Solution";
    134       return new List<IClassificationSolution>() { zeroR, oneR, lda };
     137      solutions.Add(oneR);
     138      try {
     139        var lda = LinearDiscriminantAnalysis.CreateLinearDiscriminantAnalysisSolution(problemData);
     140        lda.Name = "Linear Discriminant Analysis Solution";
     141        solutions.Add(lda);
     142      }
     143      catch (NotSupportedException) { }
     144      catch (ArgumentException) { }
     145      return solutions;
     146    }
     147
     148    private void dataGridView_MouseDoubleClick(object sender, MouseEventArgs e) {
     149      var hittestinfo = dataGridView.HitTest(e.X, e.Y);
     150      if (hittestinfo.Type != DataGridViewHitTestType.RowHeader) { return; }
     151      if (hittestinfo.RowIndex > solutions.Count) { return; }
     152
     153      MainFormManager.MainForm.ShowContent(solutions[hittestinfo.RowIndex]);
    135154    }
    136155  }
Note: See TracChangeset for help on using the changeset viewer.