Free cookie consent management tool by TermsFeed Policy Generator

source: stable/HeuristicLab.Tests/HeuristicLab-3.3/Samples/GPSymbolicClassificationSampleTest.cs @ 11997

Last change on this file since 11997 was 11907, checked in by jkarder, 10 years ago

#2211: merged r11450, r11466, r11483, r11514, r11515 and r11890 into stable
#2234: merged r11308, r11309, r11326, r11337, r11340, r11339, r11342, r11361, r11427, r11447, r11464, r11542, r11544, r11545, r11547, r11548 into stable
#2239: merged r11437, r11439 and r11472 into stable
#2262: merged r11436, r11440, r11471, r11474, r11477, r11479, r11480, r11605, r11657, r11721, r11734, r11735, r11787, r11788, r11789 and r11826 into stable

File size: 7.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.IO;
23using System.Linq;
24using HeuristicLab.Algorithms.GeneticAlgorithm;
25using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
26using HeuristicLab.Persistence.Default.Xml;
27using HeuristicLab.Problems.DataAnalysis;
28using HeuristicLab.Problems.DataAnalysis.Symbolic;
29using HeuristicLab.Problems.DataAnalysis.Symbolic.Classification;
30using HeuristicLab.Problems.Instances.DataAnalysis;
31using HeuristicLab.Selection;
32using Microsoft.VisualStudio.TestTools.UnitTesting;
33
34namespace HeuristicLab.Tests {
35  [TestClass]
36  public class GPSymbolicClassificationSampleTest {
37    private const string SampleFileName = "SGP_SymbClass";
38
39    [TestMethod]
40    [TestCategory("Samples.Create")]
41    [TestProperty("Time", "medium")]
42    public void CreateGpSymbolicClassificationSampleTest() {
43      var ga = CreateGpSymbolicClassificationSample();
44      string path = Path.Combine(SamplesUtils.SamplesDirectory, SampleFileName + SamplesUtils.SampleFileExtension);
45      XmlGenerator.Serialize(ga, path);
46    }
47
48    [TestMethod]
49    [TestCategory("Samples.Execute")]
50    [TestProperty("Time", "long")]
51    public void RunGpSymbolicClassificationSampleTest() {
52      var ga = CreateGpSymbolicClassificationSample();
53      ga.SetSeedRandomly.Value = false;
54      SamplesUtils.RunAlgorithm(ga);
55      Assert.AreEqual(0.141880203907627, SamplesUtils.GetDoubleResult(ga, "BestQuality"), 1E-8);
56      Assert.AreEqual(4.3246992327753295, SamplesUtils.GetDoubleResult(ga, "CurrentAverageQuality"), 1E-8);
57      Assert.AreEqual(100.62175156249987, SamplesUtils.GetDoubleResult(ga, "CurrentWorstQuality"), 1E-8);
58      Assert.AreEqual(100900, SamplesUtils.GetIntResult(ga, "EvaluatedSolutions"));
59      var bestTrainingSolution = (IClassificationSolution)ga.Results["Best training solution"].Value;
60      Assert.AreEqual(0.80875, bestTrainingSolution.TrainingAccuracy, 1E-8);
61      Assert.AreEqual(0.795031055900621, bestTrainingSolution.TestAccuracy, 1E-8);
62      var bestValidationSolution = (IClassificationSolution)ga.Results["Best validation solution"].Value;
63      Assert.AreEqual(0.81375, bestValidationSolution.TrainingAccuracy, 1E-8);
64      Assert.AreEqual(0.788819875776398, bestValidationSolution.TestAccuracy, 1E-8);
65    }
66
67    private GeneticAlgorithm CreateGpSymbolicClassificationSample() {
68      GeneticAlgorithm ga = new GeneticAlgorithm();
69      #region Problem Configuration
70      SymbolicClassificationSingleObjectiveProblem symbClassProblem = new SymbolicClassificationSingleObjectiveProblem();
71      symbClassProblem.Name = "Mammography Classification Problem";
72      symbClassProblem.Description = "Mammography dataset imported from the UCI machine learning repository (http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass)";
73      UCIInstanceProvider provider = new UCIInstanceProvider();
74      var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("Mammography, M. Elter, 2007")).Single();
75      var mammoData = (ClassificationProblemData)provider.LoadData(instance);
76      mammoData.TargetVariableParameter.Value = mammoData.TargetVariableParameter.ValidValues
77        .First(v => v.Value == "Severity");
78      mammoData.InputVariables.SetItemCheckedState(
79        mammoData.InputVariables.Single(x => x.Value == "BI-RADS"), false);
80      mammoData.InputVariables.SetItemCheckedState(
81        mammoData.InputVariables.Single(x => x.Value == "Age"), true);
82      mammoData.InputVariables.SetItemCheckedState(
83        mammoData.InputVariables.Single(x => x.Value == "Shape"), true);
84      mammoData.InputVariables.SetItemCheckedState(
85        mammoData.InputVariables.Single(x => x.Value == "Margin"), true);
86      mammoData.InputVariables.SetItemCheckedState(
87        mammoData.InputVariables.Single(x => x.Value == "Density"), true);
88      mammoData.InputVariables.SetItemCheckedState(
89        mammoData.InputVariables.Single(x => x.Value == "Severity"), false);
90      mammoData.TrainingPartition.Start = 0;
91      mammoData.TrainingPartition.End = 800;
92      mammoData.TestPartition.Start = 800;
93      mammoData.TestPartition.End = 961;
94      mammoData.Name = "Data imported from mammographic_masses.csv";
95      mammoData.Description = "Original dataset: http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass, missing values have been replaced with median values.";
96      symbClassProblem.ProblemData = mammoData;
97
98      // configure grammar
99      var grammar = new TypeCoherentExpressionGrammar();
100      grammar.ConfigureAsDefaultClassificationGrammar();
101      grammar.Symbols.OfType<VariableCondition>().Single().Enabled = false;
102      var varSymbol = grammar.Symbols.OfType<Variable>().Where(x => !(x is LaggedVariable)).Single();
103      varSymbol.WeightMu = 1.0;
104      varSymbol.WeightSigma = 1.0;
105      varSymbol.WeightManipulatorMu = 0.0;
106      varSymbol.WeightManipulatorSigma = 0.05;
107      varSymbol.MultiplicativeWeightManipulatorSigma = 0.03;
108      var constSymbol = grammar.Symbols.OfType<Constant>().Single();
109      constSymbol.MaxValue = 20;
110      constSymbol.MinValue = -20;
111      constSymbol.ManipulatorMu = 0.0;
112      constSymbol.ManipulatorSigma = 1;
113      constSymbol.MultiplicativeManipulatorSigma = 0.03;
114      symbClassProblem.SymbolicExpressionTreeGrammar = grammar;
115
116      // configure remaining problem parameters
117      symbClassProblem.BestKnownQuality.Value = 0.0;
118      symbClassProblem.FitnessCalculationPartition.Start = 0;
119      symbClassProblem.FitnessCalculationPartition.End = 400;
120      symbClassProblem.ValidationPartition.Start = 400;
121      symbClassProblem.ValidationPartition.End = 800;
122      symbClassProblem.RelativeNumberOfEvaluatedSamples.Value = 1;
123      symbClassProblem.MaximumSymbolicExpressionTreeLength.Value = 100;
124      symbClassProblem.MaximumSymbolicExpressionTreeDepth.Value = 10;
125      symbClassProblem.MaximumFunctionDefinitions.Value = 0;
126      symbClassProblem.MaximumFunctionArguments.Value = 0;
127      symbClassProblem.EvaluatorParameter.Value = new SymbolicClassificationSingleObjectiveMeanSquaredErrorEvaluator();
128      #endregion
129      #region Algorithm Configuration
130      ga.Problem = symbClassProblem;
131      ga.Name = "Genetic Programming - Symbolic Classification";
132      ga.Description = "A standard genetic programming algorithm to solve a classification problem (Mammographic+Mass dataset)";
133      SamplesUtils.ConfigureGeneticAlgorithmParameters<TournamentSelector, SubtreeCrossover, MultiSymbolicExpressionTreeManipulator>(
134        ga, 1000, 1, 100, 0.15, 5
135        );
136
137      var mutator = (MultiSymbolicExpressionTreeManipulator)ga.Mutator;
138      mutator.Operators.OfType<FullTreeShaker>().Single().ShakingFactor = 0.1;
139      mutator.Operators.OfType<OnePointShaker>().Single().ShakingFactor = 1.0;
140
141      ga.Analyzer.Operators.SetItemCheckedState(
142        ga.Analyzer.Operators
143        .OfType<SymbolicClassificationSingleObjectiveOverfittingAnalyzer>()
144        .Single(), false);
145      ga.Analyzer.Operators.SetItemCheckedState(
146        ga.Analyzer.Operators
147        .OfType<SymbolicDataAnalysisAlleleFrequencyAnalyzer>()
148        .First(), false);
149      #endregion
150      return ga;
151    }
152
153  }
154}
Note: See TracBrowser for help on using the repository browser.