Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Tests/HeuristicLab-3.3/Samples/GPSymbolicClassificationSampleTest.cs @ 11450

Last change on this file since 11450 was 11450, checked in by bburlacu, 10 years ago

#2211: Separated samples class into separate test classes. Added scripts unit tests (grid search classification/regression).

File size: 8.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.IO;
23using System.Linq;
24using HeuristicLab.Algorithms.GeneticAlgorithm;
25using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
26using HeuristicLab.Persistence.Default.Xml;
27using HeuristicLab.Problems.DataAnalysis;
28using HeuristicLab.Problems.DataAnalysis.Symbolic;
29using HeuristicLab.Problems.DataAnalysis.Symbolic.Classification;
30using HeuristicLab.Problems.Instances.DataAnalysis;
31using HeuristicLab.Selection;
32using Microsoft.VisualStudio.TestTools.UnitTesting;
33
34namespace HeuristicLab.Tests {
35  /// <summary>
36  /// Summary description for GPSymbolicClassificationSampleTest
37  /// </summary>
38  [TestClass]
39  public class GPSymbolicClassificationSampleTest {
40    private const string samplesDirectory = SamplesUtils.Directory;
41    [ClassInitialize]
42    public static void MyClassInitialize(TestContext testContext) {
43      if (!Directory.Exists(samplesDirectory))
44        Directory.CreateDirectory(samplesDirectory);
45    }
46
47    [TestMethod]
48    [TestCategory("Samples.Create")]
49    [TestProperty("Time", "medium")]
50    public void CreateGpSymbolicClassificationSampleTest() {
51      var ga = CreateGpSymbolicClassificationSample();
52      XmlGenerator.Serialize(ga, @"Samples\SGP_SymbClass.hl");
53    }
54
55    [TestMethod]
56    [TestCategory("Samples.Execute")]
57    [TestProperty("Time", "long")]
58    public void RunGpSymbolicClassificationSampleTest() {
59      var ga = CreateGpSymbolicClassificationSample();
60      ga.SetSeedRandomly.Value = false;
61      SamplesUtils.RunAlgorithm(ga);
62      Assert.AreEqual(0.141880203907627, SamplesUtils.GetDoubleResult(ga, "BestQuality"), 1E-8);
63      Assert.AreEqual(4.3246992327753295, SamplesUtils.GetDoubleResult(ga, "CurrentAverageQuality"), 1E-8);
64      Assert.AreEqual(100.62175156249987, SamplesUtils.GetDoubleResult(ga, "CurrentWorstQuality"), 1E-8);
65      Assert.AreEqual(100900, SamplesUtils.GetIntResult(ga, "EvaluatedSolutions"));
66      var bestTrainingSolution = (IClassificationSolution)ga.Results["Best training solution"].Value;
67      Assert.AreEqual(0.80875, bestTrainingSolution.TrainingAccuracy, 1E-8);
68      Assert.AreEqual(0.795031055900621, bestTrainingSolution.TestAccuracy, 1E-8);
69      var bestValidationSolution = (IClassificationSolution)ga.Results["Best validation solution"].Value;
70      Assert.AreEqual(0.81375, bestValidationSolution.TrainingAccuracy, 1E-8);
71      Assert.AreEqual(0.788819875776398, bestValidationSolution.TestAccuracy, 1E-8);
72    }
73
74    private GeneticAlgorithm CreateGpSymbolicClassificationSample() {
75      GeneticAlgorithm ga = new GeneticAlgorithm();
76      #region Problem Configuration
77      SymbolicClassificationSingleObjectiveProblem symbClassProblem = new SymbolicClassificationSingleObjectiveProblem();
78      symbClassProblem.Name = "Mammography Classification Problem";
79      symbClassProblem.Description = "Mammography dataset imported from the UCI machine learning repository (http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass)";
80      UCIInstanceProvider provider = new UCIInstanceProvider();
81      var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("Mammography, M. Elter, 2007")).Single();
82      var mammoData = (ClassificationProblemData)provider.LoadData(instance);
83      mammoData.TargetVariableParameter.Value = mammoData.TargetVariableParameter.ValidValues
84        .First(v => v.Value == "Severity");
85      mammoData.InputVariables.SetItemCheckedState(
86        mammoData.InputVariables.Single(x => x.Value == "BI-RADS"), false);
87      mammoData.InputVariables.SetItemCheckedState(
88        mammoData.InputVariables.Single(x => x.Value == "Age"), true);
89      mammoData.InputVariables.SetItemCheckedState(
90        mammoData.InputVariables.Single(x => x.Value == "Shape"), true);
91      mammoData.InputVariables.SetItemCheckedState(
92        mammoData.InputVariables.Single(x => x.Value == "Margin"), true);
93      mammoData.InputVariables.SetItemCheckedState(
94        mammoData.InputVariables.Single(x => x.Value == "Density"), true);
95      mammoData.InputVariables.SetItemCheckedState(
96        mammoData.InputVariables.Single(x => x.Value == "Severity"), false);
97      mammoData.TrainingPartition.Start = 0;
98      mammoData.TrainingPartition.End = 800;
99      mammoData.TestPartition.Start = 800;
100      mammoData.TestPartition.End = 961;
101      mammoData.Name = "Data imported from mammographic_masses.csv";
102      mammoData.Description = "Original dataset: http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass, missing values have been replaced with median values.";
103      symbClassProblem.ProblemData = mammoData;
104
105      // configure grammar
106      var grammar = new TypeCoherentExpressionGrammar();
107      grammar.ConfigureAsDefaultClassificationGrammar();
108      grammar.Symbols.OfType<VariableCondition>().Single().Enabled = false;
109      var varSymbol = grammar.Symbols.OfType<Variable>().Where(x => !(x is LaggedVariable)).Single();
110      varSymbol.WeightMu = 1.0;
111      varSymbol.WeightSigma = 1.0;
112      varSymbol.WeightManipulatorMu = 0.0;
113      varSymbol.WeightManipulatorSigma = 0.05;
114      varSymbol.MultiplicativeWeightManipulatorSigma = 0.03;
115      var constSymbol = grammar.Symbols.OfType<Constant>().Single();
116      constSymbol.MaxValue = 20;
117      constSymbol.MinValue = -20;
118      constSymbol.ManipulatorMu = 0.0;
119      constSymbol.ManipulatorSigma = 1;
120      constSymbol.MultiplicativeManipulatorSigma = 0.03;
121      symbClassProblem.SymbolicExpressionTreeGrammar = grammar;
122
123      // configure remaining problem parameters
124      symbClassProblem.BestKnownQuality.Value = 0.0;
125      symbClassProblem.FitnessCalculationPartition.Start = 0;
126      symbClassProblem.FitnessCalculationPartition.End = 400;
127      symbClassProblem.ValidationPartition.Start = 400;
128      symbClassProblem.ValidationPartition.End = 800;
129      symbClassProblem.RelativeNumberOfEvaluatedSamples.Value = 1;
130      symbClassProblem.MaximumSymbolicExpressionTreeLength.Value = 100;
131      symbClassProblem.MaximumSymbolicExpressionTreeDepth.Value = 10;
132      symbClassProblem.MaximumFunctionDefinitions.Value = 0;
133      symbClassProblem.MaximumFunctionArguments.Value = 0;
134      symbClassProblem.EvaluatorParameter.Value = new SymbolicClassificationSingleObjectiveMeanSquaredErrorEvaluator();
135      #endregion
136      #region Algorithm Configuration
137      ga.Problem = symbClassProblem;
138      ga.Name = "Genetic Programming - Symbolic Classification";
139      ga.Description = "A standard genetic programming algorithm to solve a classification problem (Mammographic+Mass dataset)";
140      SamplesUtils.ConfigureGeneticAlgorithmParameters<TournamentSelector, SubtreeCrossover, MultiSymbolicExpressionTreeManipulator>(
141        ga, 1000, 1, 100, 0.15, 5
142        );
143
144      var mutator = (MultiSymbolicExpressionTreeManipulator)ga.Mutator;
145      mutator.Operators.OfType<FullTreeShaker>().Single().ShakingFactor = 0.1;
146      mutator.Operators.OfType<OnePointShaker>().Single().ShakingFactor = 1.0;
147
148      ga.Analyzer.Operators.SetItemCheckedState(
149        ga.Analyzer.Operators
150        .OfType<SymbolicClassificationSingleObjectiveOverfittingAnalyzer>()
151        .Single(), false);
152      ga.Analyzer.Operators.SetItemCheckedState(
153        ga.Analyzer.Operators
154        .OfType<SymbolicDataAnalysisAlleleFrequencyAnalyzer>()
155        .First(), false);
156      #endregion
157      return ga;
158    }
159
160  }
161}
Note: See TracBrowser for help on using the repository browser.