Free cookie consent management tool by TermsFeed Policy Generator

source: stable/HeuristicLab.Tests/HeuristicLab-3.3/Samples/GPSymbolicClassificationSampleTest.cs @ 17146

Last change on this file since 17146 was 17105, checked in by mkommend, 5 years ago

#2520: Merged 16584, 16585,16594,16595, 16625, 16658, 16659, 16672, 16707, 16729, 16792, 16796, 16797, 16799, 16819, 16906, 16907, 16908, 16933, 16945, 16992, 16994, 16995, 16996, 16997, 17014, 17015, 17017, 17020, 17021, 17022, 17023, 17024, 17029, 17086, 17087, 17088, 17089 into stable.

File size: 8.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.IO;
23using System.Linq;
24using HEAL.Attic;
25using HeuristicLab.Algorithms.GeneticAlgorithm;
26using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
27using HeuristicLab.Problems.DataAnalysis;
28using HeuristicLab.Problems.DataAnalysis.Symbolic;
29using HeuristicLab.Problems.DataAnalysis.Symbolic.Classification;
30using HeuristicLab.Problems.Instances.DataAnalysis;
31using HeuristicLab.Selection;
32using Microsoft.VisualStudio.TestTools.UnitTesting;
33
34namespace HeuristicLab.Tests {
35  [TestClass]
36  public class GPSymbolicClassificationSampleTest {
37    private const string SampleFileName = "SGP_SymbClass";
38
39    private static readonly ProtoBufSerializer serializer = new ProtoBufSerializer();
40
41    [TestMethod]
42    [TestCategory("Samples.Create")]
43    [TestProperty("Time", "medium")]
44    public void CreateGpSymbolicClassificationSampleTest() {
45      var ga = CreateGpSymbolicClassificationSample();
46      string path = Path.Combine(SamplesUtils.SamplesDirectory, SampleFileName + SamplesUtils.SampleFileExtension);
47      serializer .Serialize(ga, path);
48    }
49
50    [TestMethod]
51    [TestCategory("Samples.Execute")]
52    [TestProperty("Time", "long")]
53    public void RunGpSymbolicClassificationSampleTest() {
54      var ga = CreateGpSymbolicClassificationSample();
55      ga.SetSeedRandomly.Value = false;
56      SamplesUtils.RunAlgorithm(ga);
57      Assert.AreEqual(0.141880203907627, SamplesUtils.GetDoubleResult(ga, "BestQuality"), 1E-8);
58      Assert.AreEqual(4.3246992327753295, SamplesUtils.GetDoubleResult(ga, "CurrentAverageQuality"), 1E-8);
59      Assert.AreEqual(100.62175156249987, SamplesUtils.GetDoubleResult(ga, "CurrentWorstQuality"), 1E-8);
60      Assert.AreEqual(100900, SamplesUtils.GetIntResult(ga, "EvaluatedSolutions"));
61      var bestTrainingSolution = (IClassificationSolution)ga.Results["Best training solution"].Value;
62      Assert.AreEqual(0.80875, bestTrainingSolution.TrainingAccuracy, 1E-8);
63      Assert.AreEqual(0.795031055900621, bestTrainingSolution.TestAccuracy, 1E-8);
64      var bestValidationSolution = (IClassificationSolution)ga.Results["Best validation solution"].Value;
65      Assert.AreEqual(0.81375, bestValidationSolution.TrainingAccuracy, 1E-8);
66      Assert.AreEqual(0.788819875776398, bestValidationSolution.TestAccuracy, 1E-8);
67    }
68
69    private GeneticAlgorithm CreateGpSymbolicClassificationSample() {
70      GeneticAlgorithm ga = new GeneticAlgorithm();
71      #region Problem Configuration
72      SymbolicClassificationSingleObjectiveProblem symbClassProblem = new SymbolicClassificationSingleObjectiveProblem();
73      symbClassProblem.Name = "Mammography Classification Problem";
74      symbClassProblem.Description = "Mammography dataset imported from the UCI machine learning repository (http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass)";
75      UCIInstanceProvider provider = new UCIInstanceProvider();
76      var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("Mammography, M. Elter, 2007")).Single();
77      var mammoData = (ClassificationProblemData)provider.LoadData(instance);
78      mammoData.TargetVariableParameter.Value = mammoData.TargetVariableParameter.ValidValues
79        .First(v => v.Value == "Severity");
80      mammoData.InputVariables.SetItemCheckedState(
81        mammoData.InputVariables.Single(x => x.Value == "BI-RADS"), false);
82      mammoData.InputVariables.SetItemCheckedState(
83        mammoData.InputVariables.Single(x => x.Value == "Age"), true);
84      mammoData.InputVariables.SetItemCheckedState(
85        mammoData.InputVariables.Single(x => x.Value == "Shape"), true);
86      mammoData.InputVariables.SetItemCheckedState(
87        mammoData.InputVariables.Single(x => x.Value == "Margin"), true);
88      mammoData.InputVariables.SetItemCheckedState(
89        mammoData.InputVariables.Single(x => x.Value == "Density"), true);
90      mammoData.InputVariables.SetItemCheckedState(
91        mammoData.InputVariables.Single(x => x.Value == "Severity"), false);
92      mammoData.TrainingPartition.Start = 0;
93      mammoData.TrainingPartition.End = 800;
94      mammoData.TestPartition.Start = 800;
95      mammoData.TestPartition.End = 961;
96      mammoData.Name = "Data imported from mammographic_masses.csv";
97      mammoData.Description = "Original dataset: http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass, missing values have been replaced with median values.";
98      symbClassProblem.ProblemData = mammoData;
99
100      // configure grammar
101      var grammar = new TypeCoherentExpressionGrammar();
102      grammar.ConfigureAsDefaultClassificationGrammar();
103      grammar.Symbols.OfType<VariableCondition>().Single().Enabled = false;
104      foreach (var varSy in grammar.Symbols.OfType<VariableBase>()) varSy.VariableChangeProbability = 1.0; // for backwards compatibilty
105      var varSymbol = grammar.Symbols.OfType<Variable>().Single();
106      varSymbol.WeightMu = 1.0;
107      varSymbol.WeightSigma = 1.0;
108      varSymbol.WeightManipulatorMu = 0.0;
109      varSymbol.WeightManipulatorSigma = 0.05;
110      varSymbol.MultiplicativeWeightManipulatorSigma = 0.03;
111      var constSymbol = grammar.Symbols.OfType<Constant>().Single();
112      constSymbol.MaxValue = 20;
113      constSymbol.MinValue = -20;
114      constSymbol.ManipulatorMu = 0.0;
115      constSymbol.ManipulatorSigma = 1;
116      constSymbol.MultiplicativeManipulatorSigma = 0.03;
117      symbClassProblem.SymbolicExpressionTreeGrammar = grammar;
118
119      // configure remaining problem parameters
120      symbClassProblem.BestKnownQuality.Value = 0.0;
121      symbClassProblem.FitnessCalculationPartition.Start = 0;
122      symbClassProblem.FitnessCalculationPartition.End = 400;
123      symbClassProblem.ValidationPartition.Start = 400;
124      symbClassProblem.ValidationPartition.End = 800;
125      symbClassProblem.RelativeNumberOfEvaluatedSamples.Value = 1;
126      symbClassProblem.MaximumSymbolicExpressionTreeLength.Value = 100;
127      symbClassProblem.MaximumSymbolicExpressionTreeDepth.Value = 10;
128      symbClassProblem.MaximumFunctionDefinitions.Value = 0;
129      symbClassProblem.MaximumFunctionArguments.Value = 0;
130      symbClassProblem.EvaluatorParameter.Value = new SymbolicClassificationSingleObjectiveMeanSquaredErrorEvaluator();
131      #endregion
132      #region Algorithm Configuration
133      ga.Problem = symbClassProblem;
134      ga.Name = "Genetic Programming - Symbolic Classification";
135      ga.Description = "A standard genetic programming algorithm to solve a classification problem (Mammographic+Mass dataset)";
136      SamplesUtils.ConfigureGeneticAlgorithmParameters<TournamentSelector, SubtreeCrossover, MultiSymbolicExpressionTreeManipulator>(
137        ga, 1000, 1, 100, 0.15, 5
138        );
139
140      var mutator = (MultiSymbolicExpressionTreeManipulator)ga.Mutator;
141      mutator.Operators.OfType<FullTreeShaker>().Single().ShakingFactor = 0.1;
142      mutator.Operators.OfType<OnePointShaker>().Single().ShakingFactor = 1.0;
143
144      ga.Analyzer.Operators.SetItemCheckedState(
145        ga.Analyzer.Operators
146        .OfType<SymbolicClassificationSingleObjectiveOverfittingAnalyzer>()
147        .Single(), false);
148      ga.Analyzer.Operators.SetItemCheckedState(
149        ga.Analyzer.Operators
150        .OfType<SymbolicDataAnalysisAlleleFrequencyAnalyzer>()
151        .First(), false);
152      #endregion
153      return ga;
154    }
155
156  }
157}
Note: See TracBrowser for help on using the repository browser.