Free cookie consent management tool by TermsFeed Policy Generator

source: branches/RegressionBenchmarks/HeuristicLab.Problems.DataAnalysis.Benchmarks/3.4/ClassificationBenchmark/RealWorldProblems/Mammography.cs @ 7317

Last change on this file since 7317 was 7138, checked in by sforsten, 13 years ago

#1669:
-Iris benchmark has been corrected and data set will ordered randomly
-Benchmarks of Trent McConaghy have been corrected
-Descriptions have been added (Mammography and Iris)
-Bug fix in ClassificationRealWorldBenchmark

File size: 3.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2011 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections;
24using System.Collections.Generic;
25using System.Linq;
26using HeuristicLab.Data;
27
28namespace HeuristicLab.Problems.DataAnalysis.Benchmarks {
29  public class Mammography : ClassificationRealWorldBenchmark {
30
31    private const string fileName = "mammography.csv";
32
33    public Mammography() {
34      Name = "RealWorldProblem Mammography";
35      Description = "Data Set Information: Features are computed from a digitized image of a fine needle aspirate (FNA) of a breast mass."
36        + "Website: http://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Diagnostic%29" + Environment.NewLine
37        + "Attribute Information:" + Environment.NewLine
38        + "1) ID number" + Environment.NewLine
39        + "2) Diagnosis (M = malignant, B = benign)" + Environment.NewLine
40        + "3-32)" + Environment.NewLine + Environment.NewLine
41        + "Ten real-valued features are computed for each cell nucleus:" + Environment.NewLine + Environment.NewLine
42        + "a) radius (mean of distances from center to points on the perimeter)" + Environment.NewLine
43        + "b) texture (standard deviation of gray-scale values)" + Environment.NewLine
44        + "c) perimeter" + Environment.NewLine
45        + "d) area" + Environment.NewLine
46        + "e) smoothness (local variation in radius lengths)" + Environment.NewLine
47        + "f) compactness (perimeter^2 / area - 1.0)" + Environment.NewLine
48        + "g) concavity (severity of concave portions of the contour)" + Environment.NewLine
49        + "h) concave points (number of concave portions of the contour)" + Environment.NewLine
50        + "i) symmetry" + Environment.NewLine
51        + "j) fractal dimension (\"coastline approximation\" - 1)";
52    }
53
54    protected override List<IList> GetData() {
55      csvFileParser = Benchmark.getParserForFile(fileName);
56
57      targetVariable = csvFileParser.VariableNames.Last();
58      inputVariables = new List<string>(csvFileParser.VariableNames.Take(csvFileParser.Columns - 1));
59      int trainingPartEnd = csvFileParser.Rows * 2 / 3;
60      trainingPartition = new IntRange(0, trainingPartEnd);
61      testPartition = new IntRange(trainingPartEnd, csvFileParser.Rows);
62
63      return csvFileParser.Values.Skip(csvFileParser.Columns - 1).Union(csvFileParser.Values.Take(csvFileParser.Columns - 1)).ToList();
64    }
65  }
66}
Note: See TracBrowser for help on using the repository browser.