[11450] | 1 | #region License Information
|
---|
| 2 | /* HeuristicLab
|
---|
[17180] | 3 | * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
[11450] | 4 | *
|
---|
| 5 | * This file is part of HeuristicLab.
|
---|
| 6 | *
|
---|
| 7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
| 8 | * it under the terms of the GNU General Public License as published by
|
---|
| 9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
| 10 | * (at your option) any later version.
|
---|
| 11 | *
|
---|
| 12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 15 | * GNU General Public License for more details.
|
---|
| 16 | *
|
---|
| 17 | * You should have received a copy of the GNU General Public License
|
---|
| 18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 19 | */
|
---|
| 20 | #endregion
|
---|
| 21 |
|
---|
| 22 | using System.IO;
|
---|
| 23 | using System.Linq;
|
---|
[17021] | 24 | using HEAL.Attic;
|
---|
[11450] | 25 | using HeuristicLab.Algorithms.GeneticAlgorithm;
|
---|
| 26 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
| 27 | using HeuristicLab.Problems.DataAnalysis;
|
---|
| 28 | using HeuristicLab.Problems.DataAnalysis.Symbolic;
|
---|
| 29 | using HeuristicLab.Problems.DataAnalysis.Symbolic.Classification;
|
---|
| 30 | using HeuristicLab.Problems.Instances.DataAnalysis;
|
---|
| 31 | using HeuristicLab.Selection;
|
---|
| 32 | using Microsoft.VisualStudio.TestTools.UnitTesting;
|
---|
| 33 |
|
---|
| 34 | namespace HeuristicLab.Tests {
|
---|
| 35 | [TestClass]
|
---|
| 36 | public class GPSymbolicClassificationSampleTest {
|
---|
[11514] | 37 | private const string SampleFileName = "SGP_SymbClass";
|
---|
[11450] | 38 |
|
---|
[17021] | 39 | private static readonly ProtoBufSerializer serializer = new ProtoBufSerializer();
|
---|
| 40 |
|
---|
[11450] | 41 | [TestMethod]
|
---|
| 42 | [TestCategory("Samples.Create")]
|
---|
| 43 | [TestProperty("Time", "medium")]
|
---|
| 44 | public void CreateGpSymbolicClassificationSampleTest() {
|
---|
| 45 | var ga = CreateGpSymbolicClassificationSample();
|
---|
[11514] | 46 | string path = Path.Combine(SamplesUtils.SamplesDirectory, SampleFileName + SamplesUtils.SampleFileExtension);
|
---|
[17021] | 47 | serializer .Serialize(ga, path);
|
---|
[11450] | 48 | }
|
---|
| 49 |
|
---|
| 50 | [TestMethod]
|
---|
| 51 | [TestCategory("Samples.Execute")]
|
---|
| 52 | [TestProperty("Time", "long")]
|
---|
| 53 | public void RunGpSymbolicClassificationSampleTest() {
|
---|
| 54 | var ga = CreateGpSymbolicClassificationSample();
|
---|
| 55 | ga.SetSeedRandomly.Value = false;
|
---|
| 56 | SamplesUtils.RunAlgorithm(ga);
|
---|
| 57 | Assert.AreEqual(0.141880203907627, SamplesUtils.GetDoubleResult(ga, "BestQuality"), 1E-8);
|
---|
| 58 | Assert.AreEqual(4.3246992327753295, SamplesUtils.GetDoubleResult(ga, "CurrentAverageQuality"), 1E-8);
|
---|
| 59 | Assert.AreEqual(100.62175156249987, SamplesUtils.GetDoubleResult(ga, "CurrentWorstQuality"), 1E-8);
|
---|
| 60 | Assert.AreEqual(100900, SamplesUtils.GetIntResult(ga, "EvaluatedSolutions"));
|
---|
| 61 | var bestTrainingSolution = (IClassificationSolution)ga.Results["Best training solution"].Value;
|
---|
| 62 | Assert.AreEqual(0.80875, bestTrainingSolution.TrainingAccuracy, 1E-8);
|
---|
| 63 | Assert.AreEqual(0.795031055900621, bestTrainingSolution.TestAccuracy, 1E-8);
|
---|
| 64 | var bestValidationSolution = (IClassificationSolution)ga.Results["Best validation solution"].Value;
|
---|
| 65 | Assert.AreEqual(0.81375, bestValidationSolution.TrainingAccuracy, 1E-8);
|
---|
| 66 | Assert.AreEqual(0.788819875776398, bestValidationSolution.TestAccuracy, 1E-8);
|
---|
| 67 | }
|
---|
| 68 |
|
---|
| 69 | private GeneticAlgorithm CreateGpSymbolicClassificationSample() {
|
---|
| 70 | GeneticAlgorithm ga = new GeneticAlgorithm();
|
---|
| 71 | #region Problem Configuration
|
---|
| 72 | SymbolicClassificationSingleObjectiveProblem symbClassProblem = new SymbolicClassificationSingleObjectiveProblem();
|
---|
| 73 | symbClassProblem.Name = "Mammography Classification Problem";
|
---|
| 74 | symbClassProblem.Description = "Mammography dataset imported from the UCI machine learning repository (http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass)";
|
---|
| 75 | UCIInstanceProvider provider = new UCIInstanceProvider();
|
---|
| 76 | var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("Mammography, M. Elter, 2007")).Single();
|
---|
| 77 | var mammoData = (ClassificationProblemData)provider.LoadData(instance);
|
---|
| 78 | mammoData.TargetVariableParameter.Value = mammoData.TargetVariableParameter.ValidValues
|
---|
| 79 | .First(v => v.Value == "Severity");
|
---|
| 80 | mammoData.InputVariables.SetItemCheckedState(
|
---|
| 81 | mammoData.InputVariables.Single(x => x.Value == "BI-RADS"), false);
|
---|
| 82 | mammoData.InputVariables.SetItemCheckedState(
|
---|
| 83 | mammoData.InputVariables.Single(x => x.Value == "Age"), true);
|
---|
| 84 | mammoData.InputVariables.SetItemCheckedState(
|
---|
| 85 | mammoData.InputVariables.Single(x => x.Value == "Shape"), true);
|
---|
| 86 | mammoData.InputVariables.SetItemCheckedState(
|
---|
| 87 | mammoData.InputVariables.Single(x => x.Value == "Margin"), true);
|
---|
| 88 | mammoData.InputVariables.SetItemCheckedState(
|
---|
| 89 | mammoData.InputVariables.Single(x => x.Value == "Density"), true);
|
---|
| 90 | mammoData.InputVariables.SetItemCheckedState(
|
---|
| 91 | mammoData.InputVariables.Single(x => x.Value == "Severity"), false);
|
---|
| 92 | mammoData.TrainingPartition.Start = 0;
|
---|
| 93 | mammoData.TrainingPartition.End = 800;
|
---|
| 94 | mammoData.TestPartition.Start = 800;
|
---|
| 95 | mammoData.TestPartition.End = 961;
|
---|
| 96 | mammoData.Name = "Data imported from mammographic_masses.csv";
|
---|
| 97 | mammoData.Description = "Original dataset: http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass, missing values have been replaced with median values.";
|
---|
| 98 | symbClassProblem.ProblemData = mammoData;
|
---|
| 99 |
|
---|
| 100 | // configure grammar
|
---|
| 101 | var grammar = new TypeCoherentExpressionGrammar();
|
---|
| 102 | grammar.ConfigureAsDefaultClassificationGrammar();
|
---|
| 103 | grammar.Symbols.OfType<VariableCondition>().Single().Enabled = false;
|
---|
[14832] | 104 | foreach (var varSy in grammar.Symbols.OfType<VariableBase>()) varSy.VariableChangeProbability = 1.0; // for backwards compatibilty
|
---|
| 105 | var varSymbol = grammar.Symbols.OfType<Variable>().Single();
|
---|
[11450] | 106 | varSymbol.WeightMu = 1.0;
|
---|
| 107 | varSymbol.WeightSigma = 1.0;
|
---|
| 108 | varSymbol.WeightManipulatorMu = 0.0;
|
---|
| 109 | varSymbol.WeightManipulatorSigma = 0.05;
|
---|
| 110 | varSymbol.MultiplicativeWeightManipulatorSigma = 0.03;
|
---|
| 111 | var constSymbol = grammar.Symbols.OfType<Constant>().Single();
|
---|
| 112 | constSymbol.MaxValue = 20;
|
---|
| 113 | constSymbol.MinValue = -20;
|
---|
| 114 | constSymbol.ManipulatorMu = 0.0;
|
---|
| 115 | constSymbol.ManipulatorSigma = 1;
|
---|
| 116 | constSymbol.MultiplicativeManipulatorSigma = 0.03;
|
---|
| 117 | symbClassProblem.SymbolicExpressionTreeGrammar = grammar;
|
---|
| 118 |
|
---|
| 119 | // configure remaining problem parameters
|
---|
| 120 | symbClassProblem.BestKnownQuality.Value = 0.0;
|
---|
| 121 | symbClassProblem.FitnessCalculationPartition.Start = 0;
|
---|
| 122 | symbClassProblem.FitnessCalculationPartition.End = 400;
|
---|
| 123 | symbClassProblem.ValidationPartition.Start = 400;
|
---|
| 124 | symbClassProblem.ValidationPartition.End = 800;
|
---|
| 125 | symbClassProblem.RelativeNumberOfEvaluatedSamples.Value = 1;
|
---|
| 126 | symbClassProblem.MaximumSymbolicExpressionTreeLength.Value = 100;
|
---|
| 127 | symbClassProblem.MaximumSymbolicExpressionTreeDepth.Value = 10;
|
---|
| 128 | symbClassProblem.MaximumFunctionDefinitions.Value = 0;
|
---|
| 129 | symbClassProblem.MaximumFunctionArguments.Value = 0;
|
---|
| 130 | symbClassProblem.EvaluatorParameter.Value = new SymbolicClassificationSingleObjectiveMeanSquaredErrorEvaluator();
|
---|
| 131 | #endregion
|
---|
| 132 | #region Algorithm Configuration
|
---|
| 133 | ga.Problem = symbClassProblem;
|
---|
| 134 | ga.Name = "Genetic Programming - Symbolic Classification";
|
---|
| 135 | ga.Description = "A standard genetic programming algorithm to solve a classification problem (Mammographic+Mass dataset)";
|
---|
| 136 | SamplesUtils.ConfigureGeneticAlgorithmParameters<TournamentSelector, SubtreeCrossover, MultiSymbolicExpressionTreeManipulator>(
|
---|
| 137 | ga, 1000, 1, 100, 0.15, 5
|
---|
| 138 | );
|
---|
| 139 |
|
---|
| 140 | var mutator = (MultiSymbolicExpressionTreeManipulator)ga.Mutator;
|
---|
| 141 | mutator.Operators.OfType<FullTreeShaker>().Single().ShakingFactor = 0.1;
|
---|
| 142 | mutator.Operators.OfType<OnePointShaker>().Single().ShakingFactor = 1.0;
|
---|
| 143 |
|
---|
| 144 | ga.Analyzer.Operators.SetItemCheckedState(
|
---|
| 145 | ga.Analyzer.Operators
|
---|
| 146 | .OfType<SymbolicClassificationSingleObjectiveOverfittingAnalyzer>()
|
---|
| 147 | .Single(), false);
|
---|
| 148 | ga.Analyzer.Operators.SetItemCheckedState(
|
---|
| 149 | ga.Analyzer.Operators
|
---|
| 150 | .OfType<SymbolicDataAnalysisAlleleFrequencyAnalyzer>()
|
---|
| 151 | .First(), false);
|
---|
| 152 | #endregion
|
---|
| 153 | return ga;
|
---|
| 154 | }
|
---|
| 155 |
|
---|
| 156 | }
|
---|
| 157 | }
|
---|