Free cookie consent management tool by TermsFeed Policy Generator

source: stable/HeuristicLab.Tests/HeuristicLab-3.3/Samples/GPSymbolicRegressionSampleTest.cs @ 17105

Last change on this file since 17105 was 17105, checked in by mkommend, 5 years ago

#2520: Merged 16584, 16585,16594,16595, 16625, 16658, 16659, 16672, 16707, 16729, 16792, 16796, 16797, 16799, 16819, 16906, 16907, 16908, 16933, 16945, 16992, 16994, 16995, 16996, 16997, 17014, 17015, 17017, 17020, 17021, 17022, 17023, 17024, 17029, 17086, 17087, 17088, 17089 into stable.

File size: 8.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.IO;
23using System.Linq;
24using HEAL.Attic;
25using HeuristicLab.Algorithms.GeneticAlgorithm;
26using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
27using HeuristicLab.Problems.DataAnalysis;
28using HeuristicLab.Problems.DataAnalysis.Symbolic;
29using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
30using HeuristicLab.Problems.Instances.DataAnalysis;
31using HeuristicLab.Selection;
32using Microsoft.VisualStudio.TestTools.UnitTesting;
33
34namespace HeuristicLab.Tests {
35  [TestClass]
36  public class GPSymbolicRegressionSampleTest {
37    private const string SampleFileName = "SGP_SymbReg";
38
39    private static readonly ProtoBufSerializer serializer = new ProtoBufSerializer();
40
41    [TestMethod]
42    [TestCategory("Samples.Create")]
43    [TestProperty("Time", "medium")]
44    public void CreateGpSymbolicRegressionSampleTest() {
45      var ga = CreateGpSymbolicRegressionSample();
46      string path = Path.Combine(SamplesUtils.SamplesDirectory, SampleFileName + SamplesUtils.SampleFileExtension);
47      serializer.Serialize(ga, path);
48    }
49    [TestMethod]
50    [TestCategory("Samples.Execute")]
51    [TestProperty("Time", "long")]
52    public void RunGpSymbolicRegressionSampleTest() {
53      var ga = CreateGpSymbolicRegressionSample();
54      ga.SetSeedRandomly.Value = false;
55      SamplesUtils.RunAlgorithm(ga);
56      Assert.AreEqual(0.858344291534625, SamplesUtils.GetDoubleResult(ga, "BestQuality"), 1E-8);
57      Assert.AreEqual(0.56758466520692641, SamplesUtils.GetDoubleResult(ga, "CurrentAverageQuality"), 1E-8);
58      Assert.AreEqual(0, SamplesUtils.GetDoubleResult(ga, "CurrentWorstQuality"), 1E-8);
59      Assert.AreEqual(50950, SamplesUtils.GetIntResult(ga, "EvaluatedSolutions"));
60      var bestTrainingSolution = (IRegressionSolution)ga.Results["Best training solution"].Value;
61      Assert.AreEqual(0.85504801557844745, bestTrainingSolution.TrainingRSquared, 1E-8);
62      Assert.AreEqual(0.86259381948647817, bestTrainingSolution.TestRSquared, 1E-8);
63      var bestValidationSolution = (IRegressionSolution)ga.Results["Best validation solution"].Value;
64      Assert.AreEqual(0.84854338315539746, bestValidationSolution.TrainingRSquared, 1E-8);
65      Assert.AreEqual(0.8662813452656678, bestValidationSolution.TestRSquared, 1E-8);
66    }
67
68    private GeneticAlgorithm CreateGpSymbolicRegressionSample() {
69      GeneticAlgorithm ga = new GeneticAlgorithm();
70      #region Problem Configuration
71      SymbolicRegressionSingleObjectiveProblem symbRegProblem = new SymbolicRegressionSingleObjectiveProblem();
72      symbRegProblem.Name = "Tower Symbolic Regression Problem";
73      symbRegProblem.Description = "Tower Dataset (downloaded from: http://www.symbolicregression.com/?q=towerProblem)";
74      RegressionRealWorldInstanceProvider provider = new RegressionRealWorldInstanceProvider();
75      var instance = provider.GetDataDescriptors().Where(x => x.Name.Equals("Tower")).Single();
76      var towerProblemData = (RegressionProblemData)provider.LoadData(instance);
77      towerProblemData.TargetVariableParameter.Value = towerProblemData.TargetVariableParameter.ValidValues
78        .First(v => v.Value == "towerResponse");
79      towerProblemData.InputVariables.SetItemCheckedState(
80        towerProblemData.InputVariables.Single(x => x.Value == "x1"), true);
81      towerProblemData.InputVariables.SetItemCheckedState(
82        towerProblemData.InputVariables.Single(x => x.Value == "x7"), false);
83      towerProblemData.InputVariables.SetItemCheckedState(
84        towerProblemData.InputVariables.Single(x => x.Value == "x11"), false);
85      towerProblemData.InputVariables.SetItemCheckedState(
86        towerProblemData.InputVariables.Single(x => x.Value == "x16"), false);
87      towerProblemData.InputVariables.SetItemCheckedState(
88        towerProblemData.InputVariables.Single(x => x.Value == "x21"), false);
89      towerProblemData.InputVariables.SetItemCheckedState(
90        towerProblemData.InputVariables.Single(x => x.Value == "x25"), false);
91      towerProblemData.InputVariables.SetItemCheckedState(
92        towerProblemData.InputVariables.Single(x => x.Value == "towerResponse"), false);
93      towerProblemData.TrainingPartition.Start = 0;
94      towerProblemData.TrainingPartition.End = 3136;
95      towerProblemData.TestPartition.Start = 3136;
96      towerProblemData.TestPartition.End = 4999;
97      towerProblemData.Name = "Data imported from towerData.txt";
98      towerProblemData.Description = "Chemical concentration at top of distillation tower, dataset downloaded from: http://vanillamodeling.com/realproblems.html, best R² achieved with nu-SVR = 0.97";
99      symbRegProblem.ProblemData = towerProblemData;
100
101      // configure grammar
102      var grammar = new TypeCoherentExpressionGrammar();
103      grammar.ConfigureAsDefaultRegressionGrammar();
104      grammar.Symbols.OfType<VariableCondition>().Single().InitialFrequency = 0.0;
105      foreach (var varSy in grammar.Symbols.OfType<VariableBase>()) varSy.VariableChangeProbability = 1.0; // for backwards compatibilty
106      var varSymbol = grammar.Symbols.OfType<Variable>().Single();
107      varSymbol.WeightMu = 1.0;
108      varSymbol.WeightSigma = 1.0;
109      varSymbol.WeightManipulatorMu = 0.0;
110      varSymbol.WeightManipulatorSigma = 0.05;
111      varSymbol.MultiplicativeWeightManipulatorSigma = 0.03;
112      var constSymbol = grammar.Symbols.OfType<Constant>().Single();
113      constSymbol.MaxValue = 20;
114      constSymbol.MinValue = -20;
115      constSymbol.ManipulatorMu = 0.0;
116      constSymbol.ManipulatorSigma = 1;
117      constSymbol.MultiplicativeManipulatorSigma = 0.03;
118      symbRegProblem.SymbolicExpressionTreeGrammar = grammar;
119
120      // configure remaining problem parameters
121      symbRegProblem.BestKnownQuality.Value = 0.97;
122      symbRegProblem.FitnessCalculationPartition.Start = 0;
123      symbRegProblem.FitnessCalculationPartition.End = 2300;
124      symbRegProblem.ValidationPartition.Start = 2300;
125      symbRegProblem.ValidationPartition.End = 3136;
126      symbRegProblem.RelativeNumberOfEvaluatedSamples.Value = 1;
127      symbRegProblem.MaximumSymbolicExpressionTreeLength.Value = 150;
128      symbRegProblem.MaximumSymbolicExpressionTreeDepth.Value = 12;
129      symbRegProblem.MaximumFunctionDefinitions.Value = 0;
130      symbRegProblem.MaximumFunctionArguments.Value = 0;
131
132      symbRegProblem.EvaluatorParameter.Value = new SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator();
133      #endregion
134      #region Algorithm Configuration
135      ga.Problem = symbRegProblem;
136      ga.Name = "Genetic Programming - Symbolic Regression";
137      ga.Description = "A standard genetic programming algorithm to solve a symbolic regression problem (tower dataset)";
138      SamplesUtils.ConfigureGeneticAlgorithmParameters<TournamentSelector, SubtreeCrossover, MultiSymbolicExpressionTreeManipulator>(
139        ga, 1000, 1, 50, 0.15, 5);
140      var mutator = (MultiSymbolicExpressionTreeManipulator)ga.Mutator;
141      mutator.Operators.OfType<FullTreeShaker>().Single().ShakingFactor = 0.1;
142      mutator.Operators.OfType<OnePointShaker>().Single().ShakingFactor = 1.0;
143
144      ga.Analyzer.Operators.SetItemCheckedState(
145        ga.Analyzer.Operators
146        .OfType<SymbolicRegressionSingleObjectiveOverfittingAnalyzer>()
147        .Single(), false);
148      ga.Analyzer.Operators.SetItemCheckedState(
149        ga.Analyzer.Operators
150        .OfType<SymbolicDataAnalysisAlleleFrequencyAnalyzer>()
151        .First(), false);
152      #endregion
153      return ga;
154    }
155  }
156}
Note: See TracBrowser for help on using the repository browser.