[11450] | 1 | #region License Information
|
---|
| 2 | /* HeuristicLab
|
---|
[12031] | 3 | * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
[11450] | 4 | *
|
---|
| 5 | * This file is part of HeuristicLab.
|
---|
| 6 | *
|
---|
| 7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
| 8 | * it under the terms of the GNU General Public License as published by
|
---|
| 9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
| 10 | * (at your option) any later version.
|
---|
| 11 | *
|
---|
| 12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 15 | * GNU General Public License for more details.
|
---|
| 16 | *
|
---|
| 17 | * You should have received a copy of the GNU General Public License
|
---|
| 18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 19 | */
|
---|
| 20 | #endregion
|
---|
| 21 |
|
---|
| 22 | using System.IO;
|
---|
| 23 | using System.Linq;
|
---|
| 24 | using HeuristicLab.Algorithms.GeneticAlgorithm;
|
---|
| 25 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
| 26 | using HeuristicLab.Persistence.Default.Xml;
|
---|
| 27 | using HeuristicLab.Problems.DataAnalysis;
|
---|
| 28 | using HeuristicLab.Problems.DataAnalysis.Symbolic;
|
---|
| 29 | using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
|
---|
| 30 | using HeuristicLab.Problems.Instances.DataAnalysis;
|
---|
| 31 | using HeuristicLab.Selection;
|
---|
| 32 | using Microsoft.VisualStudio.TestTools.UnitTesting;
|
---|
| 33 |
|
---|
| 34 | namespace HeuristicLab.Tests {
|
---|
| 35 | [TestClass]
|
---|
| 36 | public class GPSymbolicRegressionSampleTest {
|
---|
[11514] | 37 | private const string SampleFileName = "SGP_SymbReg";
|
---|
[11450] | 38 |
|
---|
| 39 | [TestMethod]
|
---|
| 40 | [TestCategory("Samples.Create")]
|
---|
| 41 | [TestProperty("Time", "medium")]
|
---|
| 42 | public void CreateGpSymbolicRegressionSampleTest() {
|
---|
| 43 | var ga = CreateGpSymbolicRegressionSample();
|
---|
[11514] | 44 | string path = Path.Combine(SamplesUtils.SamplesDirectory, SampleFileName + SamplesUtils.SampleFileExtension);
|
---|
| 45 | XmlGenerator.Serialize(ga, path);
|
---|
[11450] | 46 | }
|
---|
| 47 | [TestMethod]
|
---|
| 48 | [TestCategory("Samples.Execute")]
|
---|
| 49 | [TestProperty("Time", "long")]
|
---|
| 50 | public void RunGpSymbolicRegressionSampleTest() {
|
---|
| 51 | var ga = CreateGpSymbolicRegressionSample();
|
---|
| 52 | ga.SetSeedRandomly.Value = false;
|
---|
| 53 | SamplesUtils.RunAlgorithm(ga);
|
---|
| 54 | Assert.AreEqual(0.858344291534625, SamplesUtils.GetDoubleResult(ga, "BestQuality"), 1E-8);
|
---|
| 55 | Assert.AreEqual(0.56758466520692641, SamplesUtils.GetDoubleResult(ga, "CurrentAverageQuality"), 1E-8);
|
---|
| 56 | Assert.AreEqual(0, SamplesUtils.GetDoubleResult(ga, "CurrentWorstQuality"), 1E-8);
|
---|
| 57 | Assert.AreEqual(50950, SamplesUtils.GetIntResult(ga, "EvaluatedSolutions"));
|
---|
| 58 | var bestTrainingSolution = (IRegressionSolution)ga.Results["Best training solution"].Value;
|
---|
| 59 | Assert.AreEqual(0.85504801557844745, bestTrainingSolution.TrainingRSquared, 1E-8);
|
---|
| 60 | Assert.AreEqual(0.86259381948647817, bestTrainingSolution.TestRSquared, 1E-8);
|
---|
| 61 | var bestValidationSolution = (IRegressionSolution)ga.Results["Best validation solution"].Value;
|
---|
| 62 | Assert.AreEqual(0.84854338315539746, bestValidationSolution.TrainingRSquared, 1E-8);
|
---|
| 63 | Assert.AreEqual(0.8662813452656678, bestValidationSolution.TestRSquared, 1E-8);
|
---|
| 64 | }
|
---|
| 65 |
|
---|
| 66 | private GeneticAlgorithm CreateGpSymbolicRegressionSample() {
|
---|
| 67 | GeneticAlgorithm ga = new GeneticAlgorithm();
|
---|
| 68 | #region Problem Configuration
|
---|
| 69 | SymbolicRegressionSingleObjectiveProblem symbRegProblem = new SymbolicRegressionSingleObjectiveProblem();
|
---|
| 70 | symbRegProblem.Name = "Tower Symbolic Regression Problem";
|
---|
| 71 | symbRegProblem.Description = "Tower Dataset (downloaded from: http://www.symbolicregression.com/?q=towerProblem)";
|
---|
| 72 | RegressionRealWorldInstanceProvider provider = new RegressionRealWorldInstanceProvider();
|
---|
| 73 | var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("Tower")).Single();
|
---|
| 74 | var towerProblemData = (RegressionProblemData)provider.LoadData(instance);
|
---|
| 75 | towerProblemData.TargetVariableParameter.Value = towerProblemData.TargetVariableParameter.ValidValues
|
---|
| 76 | .First(v => v.Value == "towerResponse");
|
---|
| 77 | towerProblemData.InputVariables.SetItemCheckedState(
|
---|
| 78 | towerProblemData.InputVariables.Single(x => x.Value == "x1"), true);
|
---|
| 79 | towerProblemData.InputVariables.SetItemCheckedState(
|
---|
| 80 | towerProblemData.InputVariables.Single(x => x.Value == "x7"), false);
|
---|
| 81 | towerProblemData.InputVariables.SetItemCheckedState(
|
---|
| 82 | towerProblemData.InputVariables.Single(x => x.Value == "x11"), false);
|
---|
| 83 | towerProblemData.InputVariables.SetItemCheckedState(
|
---|
| 84 | towerProblemData.InputVariables.Single(x => x.Value == "x16"), false);
|
---|
| 85 | towerProblemData.InputVariables.SetItemCheckedState(
|
---|
| 86 | towerProblemData.InputVariables.Single(x => x.Value == "x21"), false);
|
---|
| 87 | towerProblemData.InputVariables.SetItemCheckedState(
|
---|
| 88 | towerProblemData.InputVariables.Single(x => x.Value == "x25"), false);
|
---|
| 89 | towerProblemData.InputVariables.SetItemCheckedState(
|
---|
| 90 | towerProblemData.InputVariables.Single(x => x.Value == "towerResponse"), false);
|
---|
| 91 | towerProblemData.TrainingPartition.Start = 0;
|
---|
| 92 | towerProblemData.TrainingPartition.End = 3136;
|
---|
| 93 | towerProblemData.TestPartition.Start = 3136;
|
---|
| 94 | towerProblemData.TestPartition.End = 4999;
|
---|
| 95 | towerProblemData.Name = "Data imported from towerData.txt";
|
---|
| 96 | towerProblemData.Description = "Chemical concentration at top of distillation tower, dataset downloaded from: http://vanillamodeling.com/realproblems.html, best R² achieved with nu-SVR = 0.97";
|
---|
| 97 | symbRegProblem.ProblemData = towerProblemData;
|
---|
| 98 |
|
---|
| 99 | // configure grammar
|
---|
| 100 | var grammar = new TypeCoherentExpressionGrammar();
|
---|
| 101 | grammar.ConfigureAsDefaultRegressionGrammar();
|
---|
| 102 | grammar.Symbols.OfType<VariableCondition>().Single().InitialFrequency = 0.0;
|
---|
| 103 | var varSymbol = grammar.Symbols.OfType<Variable>().Where(x => !(x is LaggedVariable)).Single();
|
---|
| 104 | varSymbol.WeightMu = 1.0;
|
---|
| 105 | varSymbol.WeightSigma = 1.0;
|
---|
| 106 | varSymbol.WeightManipulatorMu = 0.0;
|
---|
| 107 | varSymbol.WeightManipulatorSigma = 0.05;
|
---|
| 108 | varSymbol.MultiplicativeWeightManipulatorSigma = 0.03;
|
---|
| 109 | var constSymbol = grammar.Symbols.OfType<Constant>().Single();
|
---|
| 110 | constSymbol.MaxValue = 20;
|
---|
| 111 | constSymbol.MinValue = -20;
|
---|
| 112 | constSymbol.ManipulatorMu = 0.0;
|
---|
| 113 | constSymbol.ManipulatorSigma = 1;
|
---|
| 114 | constSymbol.MultiplicativeManipulatorSigma = 0.03;
|
---|
| 115 | symbRegProblem.SymbolicExpressionTreeGrammar = grammar;
|
---|
| 116 |
|
---|
| 117 | // configure remaining problem parameters
|
---|
| 118 | symbRegProblem.BestKnownQuality.Value = 0.97;
|
---|
| 119 | symbRegProblem.FitnessCalculationPartition.Start = 0;
|
---|
| 120 | symbRegProblem.FitnessCalculationPartition.End = 2300;
|
---|
| 121 | symbRegProblem.ValidationPartition.Start = 2300;
|
---|
| 122 | symbRegProblem.ValidationPartition.End = 3136;
|
---|
| 123 | symbRegProblem.RelativeNumberOfEvaluatedSamples.Value = 1;
|
---|
| 124 | symbRegProblem.MaximumSymbolicExpressionTreeLength.Value = 150;
|
---|
| 125 | symbRegProblem.MaximumSymbolicExpressionTreeDepth.Value = 12;
|
---|
| 126 | symbRegProblem.MaximumFunctionDefinitions.Value = 0;
|
---|
| 127 | symbRegProblem.MaximumFunctionArguments.Value = 0;
|
---|
| 128 |
|
---|
| 129 | symbRegProblem.EvaluatorParameter.Value = new SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator();
|
---|
| 130 | #endregion
|
---|
| 131 | #region Algorithm Configuration
|
---|
| 132 | ga.Problem = symbRegProblem;
|
---|
| 133 | ga.Name = "Genetic Programming - Symbolic Regression";
|
---|
| 134 | ga.Description = "A standard genetic programming algorithm to solve a symbolic regression problem (tower dataset)";
|
---|
| 135 | SamplesUtils.ConfigureGeneticAlgorithmParameters<TournamentSelector, SubtreeCrossover, MultiSymbolicExpressionTreeManipulator>(
|
---|
| 136 | ga, 1000, 1, 50, 0.15, 5);
|
---|
| 137 | var mutator = (MultiSymbolicExpressionTreeManipulator)ga.Mutator;
|
---|
| 138 | mutator.Operators.OfType<FullTreeShaker>().Single().ShakingFactor = 0.1;
|
---|
| 139 | mutator.Operators.OfType<OnePointShaker>().Single().ShakingFactor = 1.0;
|
---|
| 140 |
|
---|
| 141 | ga.Analyzer.Operators.SetItemCheckedState(
|
---|
| 142 | ga.Analyzer.Operators
|
---|
| 143 | .OfType<SymbolicRegressionSingleObjectiveOverfittingAnalyzer>()
|
---|
| 144 | .Single(), false);
|
---|
| 145 | ga.Analyzer.Operators.SetItemCheckedState(
|
---|
| 146 | ga.Analyzer.Operators
|
---|
| 147 | .OfType<SymbolicDataAnalysisAlleleFrequencyAnalyzer>()
|
---|
| 148 | .First(), false);
|
---|
| 149 | #endregion
|
---|
| 150 | return ga;
|
---|
| 151 | }
|
---|
| 152 | }
|
---|
| 153 | }
|
---|