1 | #region License Information
|
---|
2 | /* HeuristicLab
|
---|
3 | * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
4 | *
|
---|
5 | * This file is part of HeuristicLab.
|
---|
6 | *
|
---|
7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
8 | * it under the terms of the GNU General Public License as published by
|
---|
9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
10 | * (at your option) any later version.
|
---|
11 | *
|
---|
12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
15 | * GNU General Public License for more details.
|
---|
16 | *
|
---|
17 | * You should have received a copy of the GNU General Public License
|
---|
18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
19 | */
|
---|
20 | #endregion
|
---|
21 |
|
---|
22 | using System.IO;
|
---|
23 | using System.Linq;
|
---|
24 | using HEAL.Attic;
|
---|
25 | using HeuristicLab.Algorithms.GeneticAlgorithm;
|
---|
26 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
27 | using HeuristicLab.Problems.DataAnalysis;
|
---|
28 | using HeuristicLab.Problems.DataAnalysis.Symbolic;
|
---|
29 | using HeuristicLab.Problems.DataAnalysis.Symbolic.Classification;
|
---|
30 | using HeuristicLab.Problems.Instances.DataAnalysis;
|
---|
31 | using HeuristicLab.Selection;
|
---|
32 | using Microsoft.VisualStudio.TestTools.UnitTesting;
|
---|
33 |
|
---|
34 | namespace HeuristicLab.Tests {
|
---|
35 | [TestClass]
|
---|
36 | public class GPSymbolicClassificationSampleTest {
|
---|
37 | private const string SampleFileName = "SGP_SymbClass";
|
---|
38 |
|
---|
39 | private static readonly ProtoBufSerializer serializer = new ProtoBufSerializer();
|
---|
40 |
|
---|
41 | [TestMethod]
|
---|
42 | [TestCategory("Samples.Create")]
|
---|
43 | [TestProperty("Time", "medium")]
|
---|
44 | public void CreateGpSymbolicClassificationSampleTest() {
|
---|
45 | var ga = CreateGpSymbolicClassificationSample();
|
---|
46 | string path = Path.Combine(SamplesUtils.SamplesDirectory, SampleFileName + SamplesUtils.SampleFileExtension);
|
---|
47 | serializer .Serialize(ga, path);
|
---|
48 | }
|
---|
49 |
|
---|
50 | [TestMethod]
|
---|
51 | [TestCategory("Samples.Execute")]
|
---|
52 | [TestProperty("Time", "long")]
|
---|
53 | public void RunGpSymbolicClassificationSampleTest() {
|
---|
54 | var ga = CreateGpSymbolicClassificationSample();
|
---|
55 | ga.SetSeedRandomly.Value = false;
|
---|
56 | SamplesUtils.RunAlgorithm(ga);
|
---|
57 | Assert.AreEqual(0.141880203907627, SamplesUtils.GetDoubleResult(ga, "BestQuality"), 1E-8);
|
---|
58 | Assert.AreEqual(4.3246992327753295, SamplesUtils.GetDoubleResult(ga, "CurrentAverageQuality"), 1E-8);
|
---|
59 | Assert.AreEqual(100.62175156249987, SamplesUtils.GetDoubleResult(ga, "CurrentWorstQuality"), 1E-8);
|
---|
60 | Assert.AreEqual(100900, SamplesUtils.GetIntResult(ga, "EvaluatedSolutions"));
|
---|
61 | var bestTrainingSolution = (IClassificationSolution)ga.Results["Best training solution"].Value;
|
---|
62 | Assert.AreEqual(0.80875, bestTrainingSolution.TrainingAccuracy, 1E-8);
|
---|
63 | Assert.AreEqual(0.795031055900621, bestTrainingSolution.TestAccuracy, 1E-8);
|
---|
64 | var bestValidationSolution = (IClassificationSolution)ga.Results["Best validation solution"].Value;
|
---|
65 | Assert.AreEqual(0.81375, bestValidationSolution.TrainingAccuracy, 1E-8);
|
---|
66 | Assert.AreEqual(0.788819875776398, bestValidationSolution.TestAccuracy, 1E-8);
|
---|
67 | }
|
---|
68 |
|
---|
69 | private GeneticAlgorithm CreateGpSymbolicClassificationSample() {
|
---|
70 | GeneticAlgorithm ga = new GeneticAlgorithm();
|
---|
71 | #region Problem Configuration
|
---|
72 | SymbolicClassificationSingleObjectiveProblem symbClassProblem = new SymbolicClassificationSingleObjectiveProblem();
|
---|
73 | symbClassProblem.Name = "Mammography Classification Problem";
|
---|
74 | symbClassProblem.Description = "Mammography dataset imported from the UCI machine learning repository (http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass)";
|
---|
75 | UCIInstanceProvider provider = new UCIInstanceProvider();
|
---|
76 | var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("Mammography, M. Elter, 2007")).Single();
|
---|
77 | var mammoData = (ClassificationProblemData)provider.LoadData(instance);
|
---|
78 | mammoData.TargetVariableParameter.Value = mammoData.TargetVariableParameter.ValidValues
|
---|
79 | .First(v => v.Value == "Severity");
|
---|
80 | mammoData.InputVariables.SetItemCheckedState(
|
---|
81 | mammoData.InputVariables.Single(x => x.Value == "BI-RADS"), false);
|
---|
82 | mammoData.InputVariables.SetItemCheckedState(
|
---|
83 | mammoData.InputVariables.Single(x => x.Value == "Age"), true);
|
---|
84 | mammoData.InputVariables.SetItemCheckedState(
|
---|
85 | mammoData.InputVariables.Single(x => x.Value == "Shape"), true);
|
---|
86 | mammoData.InputVariables.SetItemCheckedState(
|
---|
87 | mammoData.InputVariables.Single(x => x.Value == "Margin"), true);
|
---|
88 | mammoData.InputVariables.SetItemCheckedState(
|
---|
89 | mammoData.InputVariables.Single(x => x.Value == "Density"), true);
|
---|
90 | mammoData.InputVariables.SetItemCheckedState(
|
---|
91 | mammoData.InputVariables.Single(x => x.Value == "Severity"), false);
|
---|
92 | mammoData.TrainingPartition.Start = 0;
|
---|
93 | mammoData.TrainingPartition.End = 800;
|
---|
94 | mammoData.TestPartition.Start = 800;
|
---|
95 | mammoData.TestPartition.End = 961;
|
---|
96 | mammoData.Name = "Data imported from mammographic_masses.csv";
|
---|
97 | mammoData.Description = "Original dataset: http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass, missing values have been replaced with median values.";
|
---|
98 | symbClassProblem.ProblemData = mammoData;
|
---|
99 |
|
---|
100 | // configure grammar
|
---|
101 | var grammar = new TypeCoherentExpressionGrammar();
|
---|
102 | grammar.ConfigureAsDefaultClassificationGrammar();
|
---|
103 | grammar.Symbols.OfType<VariableCondition>().Single().Enabled = false;
|
---|
104 | foreach (var varSy in grammar.Symbols.OfType<VariableBase>()) varSy.VariableChangeProbability = 1.0; // for backwards compatibilty
|
---|
105 | var varSymbol = grammar.Symbols.OfType<Variable>().Single();
|
---|
106 | varSymbol.WeightMu = 1.0;
|
---|
107 | varSymbol.WeightSigma = 1.0;
|
---|
108 | varSymbol.WeightManipulatorMu = 0.0;
|
---|
109 | varSymbol.WeightManipulatorSigma = 0.05;
|
---|
110 | varSymbol.MultiplicativeWeightManipulatorSigma = 0.03;
|
---|
111 | var constSymbol = grammar.Symbols.OfType<Constant>().Single();
|
---|
112 | constSymbol.MaxValue = 20;
|
---|
113 | constSymbol.MinValue = -20;
|
---|
114 | constSymbol.ManipulatorMu = 0.0;
|
---|
115 | constSymbol.ManipulatorSigma = 1;
|
---|
116 | constSymbol.MultiplicativeManipulatorSigma = 0.03;
|
---|
117 | symbClassProblem.SymbolicExpressionTreeGrammar = grammar;
|
---|
118 |
|
---|
119 | // configure remaining problem parameters
|
---|
120 | symbClassProblem.BestKnownQuality.Value = 0.0;
|
---|
121 | symbClassProblem.FitnessCalculationPartition.Start = 0;
|
---|
122 | symbClassProblem.FitnessCalculationPartition.End = 400;
|
---|
123 | symbClassProblem.ValidationPartition.Start = 400;
|
---|
124 | symbClassProblem.ValidationPartition.End = 800;
|
---|
125 | symbClassProblem.RelativeNumberOfEvaluatedSamples.Value = 1;
|
---|
126 | symbClassProblem.MaximumSymbolicExpressionTreeLength.Value = 100;
|
---|
127 | symbClassProblem.MaximumSymbolicExpressionTreeDepth.Value = 10;
|
---|
128 | symbClassProblem.MaximumFunctionDefinitions.Value = 0;
|
---|
129 | symbClassProblem.MaximumFunctionArguments.Value = 0;
|
---|
130 | symbClassProblem.EvaluatorParameter.Value = new SymbolicClassificationSingleObjectiveMeanSquaredErrorEvaluator();
|
---|
131 | #endregion
|
---|
132 | #region Algorithm Configuration
|
---|
133 | ga.Problem = symbClassProblem;
|
---|
134 | ga.Name = "Genetic Programming - Symbolic Classification";
|
---|
135 | ga.Description = "A standard genetic programming algorithm to solve a classification problem (Mammographic+Mass dataset)";
|
---|
136 | SamplesUtils.ConfigureGeneticAlgorithmParameters<TournamentSelector, SubtreeCrossover, MultiSymbolicExpressionTreeManipulator>(
|
---|
137 | ga, 1000, 1, 100, 0.15, 5
|
---|
138 | );
|
---|
139 |
|
---|
140 | var mutator = (MultiSymbolicExpressionTreeManipulator)ga.Mutator;
|
---|
141 | mutator.Operators.OfType<FullTreeShaker>().Single().ShakingFactor = 0.1;
|
---|
142 | mutator.Operators.OfType<OnePointShaker>().Single().ShakingFactor = 1.0;
|
---|
143 |
|
---|
144 | ga.Analyzer.Operators.SetItemCheckedState(
|
---|
145 | ga.Analyzer.Operators
|
---|
146 | .OfType<SymbolicClassificationSingleObjectiveOverfittingAnalyzer>()
|
---|
147 | .Single(), false);
|
---|
148 | ga.Analyzer.Operators.SetItemCheckedState(
|
---|
149 | ga.Analyzer.Operators
|
---|
150 | .OfType<SymbolicDataAnalysisAlleleFrequencyAnalyzer>()
|
---|
151 | .First(), false);
|
---|
152 | #endregion
|
---|
153 | return ga;
|
---|
154 | }
|
---|
155 |
|
---|
156 | }
|
---|
157 | }
|
---|