Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2947_ConfigurableIndexedDataTable/HeuristicLab.Tests/HeuristicLab-3.3/Samples/GPSymbolicClassificationSampleTest.cs @ 16520

Last change on this file since 16520 was 15583, checked in by swagner, 7 years ago

#2640: Updated year of copyrights in license headers

File size: 8.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.IO;
23using System.Linq;
24using HeuristicLab.Algorithms.GeneticAlgorithm;
25using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
26using HeuristicLab.Persistence.Default.Xml;
27using HeuristicLab.Problems.DataAnalysis;
28using HeuristicLab.Problems.DataAnalysis.Symbolic;
29using HeuristicLab.Problems.DataAnalysis.Symbolic.Classification;
30using HeuristicLab.Problems.Instances.DataAnalysis;
31using HeuristicLab.Selection;
32using Microsoft.VisualStudio.TestTools.UnitTesting;
33
34namespace HeuristicLab.Tests {
35  [TestClass]
36  public class GPSymbolicClassificationSampleTest {
37    private const string SampleFileName = "SGP_SymbClass";
38
39    [TestMethod]
40    [TestCategory("Samples.Create")]
41    [TestProperty("Time", "medium")]
42    public void CreateGpSymbolicClassificationSampleTest() {
43      var ga = CreateGpSymbolicClassificationSample();
44      string path = Path.Combine(SamplesUtils.SamplesDirectory, SampleFileName + SamplesUtils.SampleFileExtension);
45      XmlGenerator.Serialize(ga, path);
46    }
47
48    [TestMethod]
49    [TestCategory("Samples.Execute")]
50    [TestProperty("Time", "long")]
51    public void RunGpSymbolicClassificationSampleTest() {
52      var ga = CreateGpSymbolicClassificationSample();
53      ga.SetSeedRandomly.Value = false;
54      SamplesUtils.RunAlgorithm(ga);
55      Assert.AreEqual(0.141880203907627, SamplesUtils.GetDoubleResult(ga, "BestQuality"), 1E-8);
56      Assert.AreEqual(4.3246992327753295, SamplesUtils.GetDoubleResult(ga, "CurrentAverageQuality"), 1E-8);
57      Assert.AreEqual(100.62175156249987, SamplesUtils.GetDoubleResult(ga, "CurrentWorstQuality"), 1E-8);
58      Assert.AreEqual(100900, SamplesUtils.GetIntResult(ga, "EvaluatedSolutions"));
59      var bestTrainingSolution = (IClassificationSolution)ga.Results["Best training solution"].Value;
60      Assert.AreEqual(0.80875, bestTrainingSolution.TrainingAccuracy, 1E-8);
61      Assert.AreEqual(0.795031055900621, bestTrainingSolution.TestAccuracy, 1E-8);
62      var bestValidationSolution = (IClassificationSolution)ga.Results["Best validation solution"].Value;
63      Assert.AreEqual(0.81375, bestValidationSolution.TrainingAccuracy, 1E-8);
64      Assert.AreEqual(0.788819875776398, bestValidationSolution.TestAccuracy, 1E-8);
65    }
66
67    private GeneticAlgorithm CreateGpSymbolicClassificationSample() {
68      GeneticAlgorithm ga = new GeneticAlgorithm();
69      #region Problem Configuration
70      SymbolicClassificationSingleObjectiveProblem symbClassProblem = new SymbolicClassificationSingleObjectiveProblem();
71      symbClassProblem.Name = "Mammography Classification Problem";
72      symbClassProblem.Description = "Mammography dataset imported from the UCI machine learning repository (http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass)";
73      UCIInstanceProvider provider = new UCIInstanceProvider();
74      var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("Mammography, M. Elter, 2007")).Single();
75      var mammoData = (ClassificationProblemData)provider.LoadData(instance);
76      mammoData.TargetVariableParameter.Value = mammoData.TargetVariableParameter.ValidValues
77        .First(v => v.Value == "Severity");
78      mammoData.InputVariables.SetItemCheckedState(
79        mammoData.InputVariables.Single(x => x.Value == "BI-RADS"), false);
80      mammoData.InputVariables.SetItemCheckedState(
81        mammoData.InputVariables.Single(x => x.Value == "Age"), true);
82      mammoData.InputVariables.SetItemCheckedState(
83        mammoData.InputVariables.Single(x => x.Value == "Shape"), true);
84      mammoData.InputVariables.SetItemCheckedState(
85        mammoData.InputVariables.Single(x => x.Value == "Margin"), true);
86      mammoData.InputVariables.SetItemCheckedState(
87        mammoData.InputVariables.Single(x => x.Value == "Density"), true);
88      mammoData.InputVariables.SetItemCheckedState(
89        mammoData.InputVariables.Single(x => x.Value == "Severity"), false);
90      mammoData.TrainingPartition.Start = 0;
91      mammoData.TrainingPartition.End = 800;
92      mammoData.TestPartition.Start = 800;
93      mammoData.TestPartition.End = 961;
94      mammoData.Name = "Data imported from mammographic_masses.csv";
95      mammoData.Description = "Original dataset: http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass, missing values have been replaced with median values.";
96      symbClassProblem.ProblemData = mammoData;
97
98      // configure grammar
99      var grammar = new TypeCoherentExpressionGrammar();
100      grammar.ConfigureAsDefaultClassificationGrammar();
101      grammar.Symbols.OfType<VariableCondition>().Single().Enabled = false;
102      foreach (var varSy in grammar.Symbols.OfType<VariableBase>()) varSy.VariableChangeProbability = 1.0; // for backwards compatibilty
103      var varSymbol = grammar.Symbols.OfType<Variable>().Single();
104      varSymbol.WeightMu = 1.0;
105      varSymbol.WeightSigma = 1.0;
106      varSymbol.WeightManipulatorMu = 0.0;
107      varSymbol.WeightManipulatorSigma = 0.05;
108      varSymbol.MultiplicativeWeightManipulatorSigma = 0.03;
109      var constSymbol = grammar.Symbols.OfType<Constant>().Single();
110      constSymbol.MaxValue = 20;
111      constSymbol.MinValue = -20;
112      constSymbol.ManipulatorMu = 0.0;
113      constSymbol.ManipulatorSigma = 1;
114      constSymbol.MultiplicativeManipulatorSigma = 0.03;
115      symbClassProblem.SymbolicExpressionTreeGrammar = grammar;
116
117      // configure remaining problem parameters
118      symbClassProblem.BestKnownQuality.Value = 0.0;
119      symbClassProblem.FitnessCalculationPartition.Start = 0;
120      symbClassProblem.FitnessCalculationPartition.End = 400;
121      symbClassProblem.ValidationPartition.Start = 400;
122      symbClassProblem.ValidationPartition.End = 800;
123      symbClassProblem.RelativeNumberOfEvaluatedSamples.Value = 1;
124      symbClassProblem.MaximumSymbolicExpressionTreeLength.Value = 100;
125      symbClassProblem.MaximumSymbolicExpressionTreeDepth.Value = 10;
126      symbClassProblem.MaximumFunctionDefinitions.Value = 0;
127      symbClassProblem.MaximumFunctionArguments.Value = 0;
128      symbClassProblem.EvaluatorParameter.Value = new SymbolicClassificationSingleObjectiveMeanSquaredErrorEvaluator();
129      #endregion
130      #region Algorithm Configuration
131      ga.Problem = symbClassProblem;
132      ga.Name = "Genetic Programming - Symbolic Classification";
133      ga.Description = "A standard genetic programming algorithm to solve a classification problem (Mammographic+Mass dataset)";
134      SamplesUtils.ConfigureGeneticAlgorithmParameters<TournamentSelector, SubtreeCrossover, MultiSymbolicExpressionTreeManipulator>(
135        ga, 1000, 1, 100, 0.15, 5
136        );
137
138      var mutator = (MultiSymbolicExpressionTreeManipulator)ga.Mutator;
139      mutator.Operators.OfType<FullTreeShaker>().Single().ShakingFactor = 0.1;
140      mutator.Operators.OfType<OnePointShaker>().Single().ShakingFactor = 1.0;
141
142      ga.Analyzer.Operators.SetItemCheckedState(
143        ga.Analyzer.Operators
144        .OfType<SymbolicClassificationSingleObjectiveOverfittingAnalyzer>()
145        .Single(), false);
146      ga.Analyzer.Operators.SetItemCheckedState(
147        ga.Analyzer.Operators
148        .OfType<SymbolicDataAnalysisAlleleFrequencyAnalyzer>()
149        .First(), false);
150      #endregion
151      return ga;
152    }
153
154  }
155}
Note: See TracBrowser for help on using the repository browser.