1 | #region License Information
|
---|
2 | /* HeuristicLab
|
---|
3 | * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
4 | *
|
---|
5 | * This file is part of HeuristicLab.
|
---|
6 | *
|
---|
7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
8 | * it under the terms of the GNU General Public License as published by
|
---|
9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
10 | * (at your option) any later version.
|
---|
11 | *
|
---|
12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
15 | * GNU General Public License for more details.
|
---|
16 | *
|
---|
17 | * You should have received a copy of the GNU General Public License
|
---|
18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
19 | */
|
---|
20 | #endregion
|
---|
21 |
|
---|
22 | using System.IO;
|
---|
23 | using System.Linq;
|
---|
24 | using HeuristicLab.Algorithms.GeneticAlgorithm;
|
---|
25 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
26 | using HeuristicLab.Persistence.Default.Xml;
|
---|
27 | using HeuristicLab.Problems.DataAnalysis;
|
---|
28 | using HeuristicLab.Problems.DataAnalysis.Symbolic;
|
---|
29 | using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
|
---|
30 | using HeuristicLab.Problems.Instances.DataAnalysis;
|
---|
31 | using HeuristicLab.Selection;
|
---|
32 | using Microsoft.VisualStudio.TestTools.UnitTesting;
|
---|
33 |
|
---|
34 | namespace HeuristicLab.Tests {
|
---|
35 | [TestClass]
|
---|
36 | public class GPSymbolicRegressionSampleTest {
|
---|
37 | private const string SampleFileName = "SGP_SymbReg";
|
---|
38 |
|
---|
39 | [TestMethod]
|
---|
40 | [TestCategory("Samples.Create")]
|
---|
41 | [TestProperty("Time", "medium")]
|
---|
42 | public void CreateGpSymbolicRegressionSampleTest() {
|
---|
43 | var ga = CreateGpSymbolicRegressionSample();
|
---|
44 | string path = Path.Combine(SamplesUtils.SamplesDirectory, SampleFileName + SamplesUtils.SampleFileExtension);
|
---|
45 | XmlGenerator.Serialize(ga, path);
|
---|
46 | }
|
---|
47 | [TestMethod]
|
---|
48 | [TestCategory("Samples.Execute")]
|
---|
49 | [TestProperty("Time", "long")]
|
---|
50 | public void RunGpSymbolicRegressionSampleTest() {
|
---|
51 | var ga = CreateGpSymbolicRegressionSample();
|
---|
52 | ga.SetSeedRandomly.Value = false;
|
---|
53 | SamplesUtils.RunAlgorithm(ga);
|
---|
54 | Assert.AreEqual(0.858344291534625, SamplesUtils.GetDoubleResult(ga, "BestQuality"), 1E-8);
|
---|
55 | Assert.AreEqual(0.56758466520692641, SamplesUtils.GetDoubleResult(ga, "CurrentAverageQuality"), 1E-8);
|
---|
56 | Assert.AreEqual(0, SamplesUtils.GetDoubleResult(ga, "CurrentWorstQuality"), 1E-8);
|
---|
57 | Assert.AreEqual(50950, SamplesUtils.GetIntResult(ga, "EvaluatedSolutions"));
|
---|
58 | var bestTrainingSolution = (IRegressionSolution)ga.Results["Best training solution"].Value;
|
---|
59 | Assert.AreEqual(0.85504801557844745, bestTrainingSolution.TrainingRSquared, 1E-8);
|
---|
60 | Assert.AreEqual(0.86259381948647817, bestTrainingSolution.TestRSquared, 1E-8);
|
---|
61 | var bestValidationSolution = (IRegressionSolution)ga.Results["Best validation solution"].Value;
|
---|
62 | Assert.AreEqual(0.84854338315539746, bestValidationSolution.TrainingRSquared, 1E-8);
|
---|
63 | Assert.AreEqual(0.8662813452656678, bestValidationSolution.TestRSquared, 1E-8);
|
---|
64 | }
|
---|
65 |
|
---|
66 | private GeneticAlgorithm CreateGpSymbolicRegressionSample() {
|
---|
67 | GeneticAlgorithm ga = new GeneticAlgorithm();
|
---|
68 | #region Problem Configuration
|
---|
69 | SymbolicRegressionSingleObjectiveProblem symbRegProblem = new SymbolicRegressionSingleObjectiveProblem();
|
---|
70 | symbRegProblem.Name = "Tower Symbolic Regression Problem";
|
---|
71 | symbRegProblem.Description = "Tower Dataset (downloaded from: http://www.symbolicregression.com/?q=towerProblem)";
|
---|
72 | RegressionRealWorldInstanceProvider provider = new RegressionRealWorldInstanceProvider();
|
---|
73 | var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("Tower")).Single();
|
---|
74 | var towerProblemData = (RegressionProblemData)provider.LoadData(instance);
|
---|
75 | towerProblemData.TargetVariableParameter.Value = towerProblemData.TargetVariableParameter.ValidValues
|
---|
76 | .First(v => v.Value == "towerResponse");
|
---|
77 | towerProblemData.InputVariables.SetItemCheckedState(
|
---|
78 | towerProblemData.InputVariables.Single(x => x.Value == "x1"), true);
|
---|
79 | towerProblemData.InputVariables.SetItemCheckedState(
|
---|
80 | towerProblemData.InputVariables.Single(x => x.Value == "x7"), false);
|
---|
81 | towerProblemData.InputVariables.SetItemCheckedState(
|
---|
82 | towerProblemData.InputVariables.Single(x => x.Value == "x11"), false);
|
---|
83 | towerProblemData.InputVariables.SetItemCheckedState(
|
---|
84 | towerProblemData.InputVariables.Single(x => x.Value == "x16"), false);
|
---|
85 | towerProblemData.InputVariables.SetItemCheckedState(
|
---|
86 | towerProblemData.InputVariables.Single(x => x.Value == "x21"), false);
|
---|
87 | towerProblemData.InputVariables.SetItemCheckedState(
|
---|
88 | towerProblemData.InputVariables.Single(x => x.Value == "x25"), false);
|
---|
89 | towerProblemData.InputVariables.SetItemCheckedState(
|
---|
90 | towerProblemData.InputVariables.Single(x => x.Value == "towerResponse"), false);
|
---|
91 | towerProblemData.TrainingPartition.Start = 0;
|
---|
92 | towerProblemData.TrainingPartition.End = 3136;
|
---|
93 | towerProblemData.TestPartition.Start = 3136;
|
---|
94 | towerProblemData.TestPartition.End = 4999;
|
---|
95 | towerProblemData.Name = "Data imported from towerData.txt";
|
---|
96 | towerProblemData.Description = "Chemical concentration at top of distillation tower, dataset downloaded from: http://vanillamodeling.com/realproblems.html, best R² achieved with nu-SVR = 0.97";
|
---|
97 | symbRegProblem.ProblemData = towerProblemData;
|
---|
98 |
|
---|
99 | // configure grammar
|
---|
100 | var grammar = new TypeCoherentExpressionGrammar();
|
---|
101 | grammar.ConfigureAsDefaultRegressionGrammar();
|
---|
102 | grammar.Symbols.OfType<VariableCondition>().Single().InitialFrequency = 0.0;
|
---|
103 | foreach (var varSy in grammar.Symbols.OfType<VariableBase>()) varSy.VariableChangeProbability = 1.0; // for backwards compatibilty
|
---|
104 | var varSymbol = grammar.Symbols.OfType<Variable>().Single();
|
---|
105 | varSymbol.WeightMu = 1.0;
|
---|
106 | varSymbol.WeightSigma = 1.0;
|
---|
107 | varSymbol.WeightManipulatorMu = 0.0;
|
---|
108 | varSymbol.WeightManipulatorSigma = 0.05;
|
---|
109 | varSymbol.MultiplicativeWeightManipulatorSigma = 0.03;
|
---|
110 | var constSymbol = grammar.Symbols.OfType<Constant>().Single();
|
---|
111 | constSymbol.MaxValue = 20;
|
---|
112 | constSymbol.MinValue = -20;
|
---|
113 | constSymbol.ManipulatorMu = 0.0;
|
---|
114 | constSymbol.ManipulatorSigma = 1;
|
---|
115 | constSymbol.MultiplicativeManipulatorSigma = 0.03;
|
---|
116 | symbRegProblem.SymbolicExpressionTreeGrammar = grammar;
|
---|
117 |
|
---|
118 | // configure remaining problem parameters
|
---|
119 | symbRegProblem.BestKnownQuality.Value = 0.97;
|
---|
120 | symbRegProblem.FitnessCalculationPartition.Start = 0;
|
---|
121 | symbRegProblem.FitnessCalculationPartition.End = 2300;
|
---|
122 | symbRegProblem.ValidationPartition.Start = 2300;
|
---|
123 | symbRegProblem.ValidationPartition.End = 3136;
|
---|
124 | symbRegProblem.RelativeNumberOfEvaluatedSamples.Value = 1;
|
---|
125 | symbRegProblem.MaximumSymbolicExpressionTreeLength.Value = 150;
|
---|
126 | symbRegProblem.MaximumSymbolicExpressionTreeDepth.Value = 12;
|
---|
127 | symbRegProblem.MaximumFunctionDefinitions.Value = 0;
|
---|
128 | symbRegProblem.MaximumFunctionArguments.Value = 0;
|
---|
129 |
|
---|
130 | symbRegProblem.EvaluatorParameter.Value = new SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator();
|
---|
131 | #endregion
|
---|
132 | #region Algorithm Configuration
|
---|
133 | ga.Problem = symbRegProblem;
|
---|
134 | ga.Name = "Genetic Programming - Symbolic Regression";
|
---|
135 | ga.Description = "A standard genetic programming algorithm to solve a symbolic regression problem (tower dataset)";
|
---|
136 | SamplesUtils.ConfigureGeneticAlgorithmParameters<TournamentSelector, SubtreeCrossover, MultiSymbolicExpressionTreeManipulator>(
|
---|
137 | ga, 1000, 1, 50, 0.15, 5);
|
---|
138 | var mutator = (MultiSymbolicExpressionTreeManipulator)ga.Mutator;
|
---|
139 | mutator.Operators.OfType<FullTreeShaker>().Single().ShakingFactor = 0.1;
|
---|
140 | mutator.Operators.OfType<OnePointShaker>().Single().ShakingFactor = 1.0;
|
---|
141 |
|
---|
142 | ga.Analyzer.Operators.SetItemCheckedState(
|
---|
143 | ga.Analyzer.Operators
|
---|
144 | .OfType<SymbolicRegressionSingleObjectiveOverfittingAnalyzer>()
|
---|
145 | .Single(), false);
|
---|
146 | ga.Analyzer.Operators.SetItemCheckedState(
|
---|
147 | ga.Analyzer.Operators
|
---|
148 | .OfType<SymbolicDataAnalysisAlleleFrequencyAnalyzer>()
|
---|
149 | .First(), false);
|
---|
150 | #endregion
|
---|
151 | return ga;
|
---|
152 | }
|
---|
153 | }
|
---|
154 | }
|
---|