#region License Information /* HeuristicLab * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL) * * This file is part of HeuristicLab. * * HeuristicLab is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * HeuristicLab is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with HeuristicLab. If not, see . */ #endregion using System.IO; using System.Linq; using HEAL.Attic; using HeuristicLab.Algorithms.GeneticAlgorithm; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Problems.DataAnalysis; using HeuristicLab.Problems.DataAnalysis.Symbolic; using HeuristicLab.Problems.DataAnalysis.Symbolic.Classification; using HeuristicLab.Problems.Instances.DataAnalysis; using HeuristicLab.Selection; using Microsoft.VisualStudio.TestTools.UnitTesting; namespace HeuristicLab.Tests { [TestClass] public class GPSymbolicClassificationSampleTest { private const string SampleFileName = "SGP_SymbClass"; private static readonly ProtoBufSerializer serializer = new ProtoBufSerializer(); [TestMethod] [TestCategory("Samples.Create")] [TestProperty("Time", "medium")] public void CreateGpSymbolicClassificationSampleTest() { var ga = CreateGpSymbolicClassificationSample(); string path = Path.Combine(SamplesUtils.SamplesDirectory, SampleFileName + SamplesUtils.SampleFileExtension); serializer .Serialize(ga, path); } [TestMethod] [TestCategory("Samples.Execute")] [TestProperty("Time", "long")] public void RunGpSymbolicClassificationSampleTest() { var ga = CreateGpSymbolicClassificationSample(); ga.SetSeedRandomly.Value = false; SamplesUtils.RunAlgorithm(ga); Assert.AreEqual(0.141880203907627, SamplesUtils.GetDoubleResult(ga, "BestQuality"), 1E-8); Assert.AreEqual(4.3246992327753295, SamplesUtils.GetDoubleResult(ga, "CurrentAverageQuality"), 1E-8); Assert.AreEqual(100.62175156249987, SamplesUtils.GetDoubleResult(ga, "CurrentWorstQuality"), 1E-8); Assert.AreEqual(100900, SamplesUtils.GetIntResult(ga, "EvaluatedSolutions")); var bestTrainingSolution = (IClassificationSolution)ga.Results["Best training solution"].Value; Assert.AreEqual(0.80875, bestTrainingSolution.TrainingAccuracy, 1E-8); Assert.AreEqual(0.795031055900621, bestTrainingSolution.TestAccuracy, 1E-8); var bestValidationSolution = (IClassificationSolution)ga.Results["Best validation solution"].Value; Assert.AreEqual(0.81375, bestValidationSolution.TrainingAccuracy, 1E-8); Assert.AreEqual(0.788819875776398, bestValidationSolution.TestAccuracy, 1E-8); } private GeneticAlgorithm CreateGpSymbolicClassificationSample() { GeneticAlgorithm ga = new GeneticAlgorithm(); #region Problem Configuration SymbolicClassificationSingleObjectiveProblem symbClassProblem = new SymbolicClassificationSingleObjectiveProblem(); symbClassProblem.Name = "Mammography Classification Problem"; symbClassProblem.Description = "Mammography dataset imported from the UCI machine learning repository (http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass)"; UCIInstanceProvider provider = new UCIInstanceProvider(); var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("Mammography, M. Elter, 2007")).Single(); var mammoData = (ClassificationProblemData)provider.LoadData(instance); mammoData.TargetVariableParameter.Value = mammoData.TargetVariableParameter.ValidValues .First(v => v.Value == "Severity"); mammoData.InputVariables.SetItemCheckedState( mammoData.InputVariables.Single(x => x.Value == "BI-RADS"), false); mammoData.InputVariables.SetItemCheckedState( mammoData.InputVariables.Single(x => x.Value == "Age"), true); mammoData.InputVariables.SetItemCheckedState( mammoData.InputVariables.Single(x => x.Value == "Shape"), true); mammoData.InputVariables.SetItemCheckedState( mammoData.InputVariables.Single(x => x.Value == "Margin"), true); mammoData.InputVariables.SetItemCheckedState( mammoData.InputVariables.Single(x => x.Value == "Density"), true); mammoData.InputVariables.SetItemCheckedState( mammoData.InputVariables.Single(x => x.Value == "Severity"), false); mammoData.TrainingPartition.Start = 0; mammoData.TrainingPartition.End = 800; mammoData.TestPartition.Start = 800; mammoData.TestPartition.End = 961; mammoData.Name = "Data imported from mammographic_masses.csv"; mammoData.Description = "Original dataset: http://archive.ics.uci.edu/ml/datasets/Mammographic+Mass, missing values have been replaced with median values."; symbClassProblem.ProblemData = mammoData; // configure grammar var grammar = new TypeCoherentExpressionGrammar(); grammar.ConfigureAsDefaultClassificationGrammar(); grammar.Symbols.OfType().Single().Enabled = false; foreach (var varSy in grammar.Symbols.OfType()) varSy.VariableChangeProbability = 1.0; // for backwards compatibilty var varSymbol = grammar.Symbols.OfType().Single(); varSymbol.WeightMu = 1.0; varSymbol.WeightSigma = 1.0; varSymbol.WeightManipulatorMu = 0.0; varSymbol.WeightManipulatorSigma = 0.05; varSymbol.MultiplicativeWeightManipulatorSigma = 0.03; var numSymbol = grammar.Symbols.OfType().Single(); numSymbol.MaxValue = 20; numSymbol.MinValue = -20; numSymbol.ManipulatorMu = 0.0; numSymbol.ManipulatorSigma = 1; numSymbol.MultiplicativeManipulatorSigma = 0.03; symbClassProblem.SymbolicExpressionTreeGrammar = grammar; // configure remaining problem parameters symbClassProblem.BestKnownQuality.Value = 0.0; symbClassProblem.FitnessCalculationPartition.Start = 0; symbClassProblem.FitnessCalculationPartition.End = 400; symbClassProblem.ValidationPartition.Start = 400; symbClassProblem.ValidationPartition.End = 800; symbClassProblem.RelativeNumberOfEvaluatedSamples.Value = 1; symbClassProblem.MaximumSymbolicExpressionTreeLength.Value = 100; symbClassProblem.MaximumSymbolicExpressionTreeDepth.Value = 10; symbClassProblem.MaximumFunctionDefinitions.Value = 0; symbClassProblem.MaximumFunctionArguments.Value = 0; symbClassProblem.EvaluatorParameter.Value = new SymbolicClassificationSingleObjectiveMeanSquaredErrorEvaluator(); #endregion #region Algorithm Configuration ga.Problem = symbClassProblem; ga.Name = "Genetic Programming - Symbolic Classification"; ga.Description = "A standard genetic programming algorithm to solve a classification problem (Mammographic+Mass dataset)"; SamplesUtils.ConfigureGeneticAlgorithmParameters( ga, 1000, 1, 100, 0.15, 5 ); var mutator = (MultiSymbolicExpressionTreeManipulator)ga.Mutator; mutator.Operators.OfType().Single().ShakingFactor = 0.1; mutator.Operators.OfType().Single().ShakingFactor = 1.0; ga.Analyzer.Operators.SetItemCheckedState( ga.Analyzer.Operators .OfType() .Single(), false); ga.Analyzer.Operators.SetItemCheckedState( ga.Analyzer.Operators .OfType() .First(), false); #endregion return ga; } } }