Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2521_ProblemRefactoring/HeuristicLab.Tests/HeuristicLab-3.3/Samples/GPSymbolicClassificationSampleTest.cs

Last change on this file was 17226, checked in by mkommend, 5 years ago

#2521: Merged trunk changes into problem refactoring branch.

File size: 8.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.IO;
23using System.Linq;
24using HEAL.Attic;
25using HeuristicLab.Algorithms.GeneticAlgorithm;
26using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
27using HeuristicLab.Problems.DataAnalysis;
28using HeuristicLab.Problems.DataAnalysis.Symbolic;
29using HeuristicLab.Problems.DataAnalysis.Symbolic.Classification;
30using HeuristicLab.Problems.Instances.DataAnalysis;
31using HeuristicLab.Selection;
32using Microsoft.VisualStudio.TestTools.UnitTesting;
33
34namespace HeuristicLab.Tests {
35  [TestClass]
36  public class GPSymbolicClassificationSampleTest {
37    private const string SampleFileName = "SGP_SymbClass";
38
39    private static readonly ProtoBufSerializer serializer = new ProtoBufSerializer();
40
41    [TestMethod]
42    [TestCategory("Samples.Create")]
43    [TestProperty("Time", "medium")]
44    public void CreateGpSymbolicClassificationSampleTest() {
45      var ga = CreateGpSymbolicClassificationSample();
46      string path = Path.Combine(SamplesUtils.SamplesDirectory, SampleFileName + SamplesUtils.SampleFileExtension);
47      serializer .Serialize(ga, path);
48    }
49
50    [TestMethod]
51    [TestCategory("Samples.Execute")]
52    [TestProperty("Time", "long")]
53    public void RunGpSymbolicClassificationSampleTest() {
54      var ga = CreateGpSymbolicClassificationSample();
55      ga.SetSeedRandomly.Value = false;
56      SamplesUtils.RunAlgorithm(ga);
57      Assert.AreEqual(0.141880203907627, SamplesUtils.GetDoubleResult(ga, "BestQuality"), 1E-8);
58      Assert.AreEqual(4.3246992327753295, SamplesUtils.GetDoubleResult(ga, "CurrentAverageQuality"), 1E-8);
59      Assert.AreEqual(100.62175156249987, SamplesUtils.GetDoubleResult(ga, "CurrentWorstQuality"), 1E-8);
60      Assert.AreEqual(100900, SamplesUtils.GetIntResult(ga, "EvaluatedSolutions"));
61      var bestTrainingSolution = (IClassificationSolution)ga.Results["Best training solution"].Value;
62      Assert.AreEqual(0.80875, bestTrainingSolution.TrainingAccuracy, 1E-8);
63      Assert.AreEqual(0.795031055900621, bestTrainingSolution.TestAccuracy, 1E-8);
64      var bestValidationSolution = (IClassificationSolution)ga.Results["Best validation solution"].Value;
65      Assert.AreEqual(0.81375, bestValidationSolution.TrainingAccuracy, 1E-8);
66      Assert.AreEqual(0.788819875776398, bestValidationSolution.TestAccuracy, 1E-8);
67    }
68
69    private GeneticAlgorithm CreateGpSymbolicClassificationSample() {
70      GeneticAlgorithm ga = new GeneticAlgorithm();
71      #region Problem Configuration
72      SymbolicClassificationSingleObjectiveProblem symbClassProblem = new SymbolicClassificationSingleObjectiveProblem();
73      symbClassProblem.Name = "Mammography Classification Problem";
74      symbClassProblem.Description = "Mammography dataset imported from the UCI machine learning repository (http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass)";
75      UCIInstanceProvider provider = new UCIInstanceProvider();
76      var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("Mammography, M. Elter, 2007")).Single();
77      var mammoData = (ClassificationProblemData)provider.LoadData(instance);
78      mammoData.TargetVariableParameter.Value = mammoData.TargetVariableParameter.ValidValues
79        .First(v => v.Value == "Severity");
80      mammoData.InputVariables.SetItemCheckedState(
81        mammoData.InputVariables.Single(x => x.Value == "BI-RADS"), false);
82      mammoData.InputVariables.SetItemCheckedState(
83        mammoData.InputVariables.Single(x => x.Value == "Age"), true);
84      mammoData.InputVariables.SetItemCheckedState(
85        mammoData.InputVariables.Single(x => x.Value == "Shape"), true);
86      mammoData.InputVariables.SetItemCheckedState(
87        mammoData.InputVariables.Single(x => x.Value == "Margin"), true);
88      mammoData.InputVariables.SetItemCheckedState(
89        mammoData.InputVariables.Single(x => x.Value == "Density"), true);
90      mammoData.InputVariables.SetItemCheckedState(
91        mammoData.InputVariables.Single(x => x.Value == "Severity"), false);
92      mammoData.TrainingPartition.Start = 0;
93      mammoData.TrainingPartition.End = 800;
94      mammoData.TestPartition.Start = 800;
95      mammoData.TestPartition.End = 961;
96      mammoData.Name = "Data imported from mammographic_masses.csv";
97      mammoData.Description = "Original dataset: http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass, missing values have been replaced with median values.";
98      symbClassProblem.ProblemData = mammoData;
99
100      // configure grammar
101      var grammar = new TypeCoherentExpressionGrammar();
102      grammar.ConfigureAsDefaultClassificationGrammar();
103      grammar.Symbols.OfType<VariableCondition>().Single().Enabled = false;
104      foreach (var varSy in grammar.Symbols.OfType<VariableBase>()) varSy.VariableChangeProbability = 1.0; // for backwards compatibilty
105      var varSymbol = grammar.Symbols.OfType<Variable>().Single();
106      varSymbol.WeightMu = 1.0;
107      varSymbol.WeightSigma = 1.0;
108      varSymbol.WeightManipulatorMu = 0.0;
109      varSymbol.WeightManipulatorSigma = 0.05;
110      varSymbol.MultiplicativeWeightManipulatorSigma = 0.03;
111      var constSymbol = grammar.Symbols.OfType<Constant>().Single();
112      constSymbol.MaxValue = 20;
113      constSymbol.MinValue = -20;
114      constSymbol.ManipulatorMu = 0.0;
115      constSymbol.ManipulatorSigma = 1;
116      constSymbol.MultiplicativeManipulatorSigma = 0.03;
117      symbClassProblem.SymbolicExpressionTreeGrammar = grammar;
118
119      // configure remaining problem parameters
120      symbClassProblem.BestKnownQuality.Value = 0.0;
121      symbClassProblem.FitnessCalculationPartition.Start = 0;
122      symbClassProblem.FitnessCalculationPartition.End = 400;
123      symbClassProblem.ValidationPartition.Start = 400;
124      symbClassProblem.ValidationPartition.End = 800;
125      symbClassProblem.RelativeNumberOfEvaluatedSamples.Value = 1;
126      symbClassProblem.MaximumSymbolicExpressionTreeLength.Value = 100;
127      symbClassProblem.MaximumSymbolicExpressionTreeDepth.Value = 10;
128      symbClassProblem.MaximumFunctionDefinitions.Value = 0;
129      symbClassProblem.MaximumFunctionArguments.Value = 0;
130      symbClassProblem.EvaluatorParameter.Value = new SymbolicClassificationSingleObjectiveMeanSquaredErrorEvaluator();
131      #endregion
132      #region Algorithm Configuration
133      ga.Problem = symbClassProblem;
134      ga.Name = "Genetic Programming - Symbolic Classification";
135      ga.Description = "A standard genetic programming algorithm to solve a classification problem (Mammographic+Mass dataset)";
136      SamplesUtils.ConfigureGeneticAlgorithmParameters<TournamentSelector, SubtreeCrossover, MultiSymbolicExpressionTreeManipulator>(
137        ga, 1000, 1, 100, 0.15, 5
138        );
139
140      var mutator = (MultiSymbolicExpressionTreeManipulator)ga.Mutator;
141      mutator.Operators.OfType<FullTreeShaker>().Single().ShakingFactor = 0.1;
142      mutator.Operators.OfType<OnePointShaker>().Single().ShakingFactor = 1.0;
143
144      ga.Analyzer.Operators.SetItemCheckedState(
145        ga.Analyzer.Operators
146        .OfType<SymbolicClassificationSingleObjectiveOverfittingAnalyzer>()
147        .Single(), false);
148      ga.Analyzer.Operators.SetItemCheckedState(
149        ga.Analyzer.Operators
150        .OfType<SymbolicDataAnalysisAlleleFrequencyAnalyzer>()
151        .First(), false);
152      #endregion
153      return ga;
154    }
155
156  }
157}
Note: See TracBrowser for help on using the repository browser.