Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessCovarianceOptimizationProblem.cs @ 13160

Last change on this file since 13160 was 13160, checked in by gkronber, 8 years ago

#2504: svn:ignore

File size: 18.7 KB
RevLine 
[12946]1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Linq;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
[13160]28using HeuristicLab.Optimization;
[12946]29using HeuristicLab.Parameters;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.Problems.DataAnalysis;
32using HeuristicLab.Problems.Instances;
33
34
35namespace HeuristicLab.Algorithms.DataAnalysis {
36  [Item("Gaussian Process Covariance Optimization Problem", "")]
37  [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 300)]
38  [StorableClass]
39  public sealed class GaussianProcessCovarianceOptimizationProblem : SymbolicExpressionTreeProblem, IRegressionProblem, IProblemInstanceConsumer<IRegressionProblemData>, IProblemInstanceExporter<IRegressionProblemData> {
40    #region static variables and ctor
41    private static readonly CovarianceMaternIso maternIso1;
42    private static readonly CovarianceMaternIso maternIso3;
43    private static readonly CovarianceMaternIso maternIso5;
44    private static readonly CovariancePiecewisePolynomial piecewisePoly0;
45    private static readonly CovariancePiecewisePolynomial piecewisePoly1;
46    private static readonly CovariancePiecewisePolynomial piecewisePoly2;
47    private static readonly CovariancePiecewisePolynomial piecewisePoly3;
48    private static readonly CovariancePolynomial poly2;
49    private static readonly CovariancePolynomial poly3;
50    private static readonly CovarianceSpectralMixture spectralMixture1;
51    private static readonly CovarianceSpectralMixture spectralMixture3;
52    private static readonly CovarianceSpectralMixture spectralMixture5;
53    private static readonly CovarianceLinear linear;
54    private static readonly CovarianceLinearArd linearArd;
55    private static readonly CovarianceNeuralNetwork neuralNetwork;
56    private static readonly CovariancePeriodic periodic;
57    private static readonly CovarianceRationalQuadraticIso ratQuadraticIso;
58    private static readonly CovarianceRationalQuadraticArd ratQuadraticArd;
59    private static readonly CovarianceSquaredExponentialArd sqrExpArd;
60    private static readonly CovarianceSquaredExponentialIso sqrExpIso;
61
62    static GaussianProcessCovarianceOptimizationProblem() {
63      // cumbersome initialization because of ConstrainedValueParameters
64      maternIso1 = new CovarianceMaternIso(); SetConstrainedValueParameter(maternIso1.DParameter, 1);
65      maternIso3 = new CovarianceMaternIso(); SetConstrainedValueParameter(maternIso3.DParameter, 3);
66      maternIso5 = new CovarianceMaternIso(); SetConstrainedValueParameter(maternIso5.DParameter, 5);
67
68      piecewisePoly0 = new CovariancePiecewisePolynomial(); SetConstrainedValueParameter(piecewisePoly0.VParameter, 0);
69      piecewisePoly1 = new CovariancePiecewisePolynomial(); SetConstrainedValueParameter(piecewisePoly1.VParameter, 1);
70      piecewisePoly2 = new CovariancePiecewisePolynomial(); SetConstrainedValueParameter(piecewisePoly2.VParameter, 2);
71      piecewisePoly3 = new CovariancePiecewisePolynomial(); SetConstrainedValueParameter(piecewisePoly3.VParameter, 3);
72
73      poly2 = new CovariancePolynomial(); poly2.DegreeParameter.Value.Value = 2;
74      poly3 = new CovariancePolynomial(); poly3.DegreeParameter.Value.Value = 3;
75
76      spectralMixture1 = new CovarianceSpectralMixture(); spectralMixture1.QParameter.Value.Value = 1;
77      spectralMixture3 = new CovarianceSpectralMixture(); spectralMixture3.QParameter.Value.Value = 3;
78      spectralMixture5 = new CovarianceSpectralMixture(); spectralMixture5.QParameter.Value.Value = 5;
79
80      linear = new CovarianceLinear();
81      linearArd = new CovarianceLinearArd();
82      neuralNetwork = new CovarianceNeuralNetwork();
83      periodic = new CovariancePeriodic();
84      ratQuadraticArd = new CovarianceRationalQuadraticArd();
85      ratQuadraticIso = new CovarianceRationalQuadraticIso();
86      sqrExpArd = new CovarianceSquaredExponentialArd();
87      sqrExpIso = new CovarianceSquaredExponentialIso();
88    }
89
90    private static void SetConstrainedValueParameter(IConstrainedValueParameter<IntValue> param, int val) {
91      param.Value = param.ValidValues.Single(v => v.Value == val);
92    }
93
94    #endregion
95
96    #region parameter names
97
98    private const string ProblemDataParameterName = "ProblemData";
99    private const string ConstantOptIterationsParameterName = "Constant optimization steps";
100    private const string RestartsParameterName = "Restarts";
101
102    #endregion
103
104    #region Parameter Properties
105    IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } }
106
107    public IValueParameter<IRegressionProblemData> ProblemDataParameter {
108      get { return (IValueParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
109    }
110    public IFixedValueParameter<IntValue> ConstantOptIterationsParameter {
111      get { return (IFixedValueParameter<IntValue>)Parameters[ConstantOptIterationsParameterName]; }
112    }
113    public IFixedValueParameter<IntValue> RestartsParameter {
114      get { return (IFixedValueParameter<IntValue>)Parameters[RestartsParameterName]; }
115    }
116    #endregion
117
118    #region Properties
119
120    public IRegressionProblemData ProblemData {
121      get { return ProblemDataParameter.Value; }
122      set { ProblemDataParameter.Value = value; }
123    }
124    IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } }
125
126    public int ConstantOptIterations {
127      get { return ConstantOptIterationsParameter.Value.Value; }
128      set { ConstantOptIterationsParameter.Value.Value = value; }
129    }
130
131    public int Restarts {
132      get { return RestartsParameter.Value.Value; }
133      set { RestartsParameter.Value.Value = value; }
134    }
135    #endregion
136
137    public override bool Maximization {
138      get { return true; } // return log likelihood (instead of negative log likelihood as in GPR
139    }
140
141    public GaussianProcessCovarianceOptimizationProblem()
142      : base() {
143      Parameters.Add(new ValueParameter<IRegressionProblemData>(ProblemDataParameterName, "The data for the regression problem", new RegressionProblemData()));
144      Parameters.Add(new FixedValueParameter<IntValue>(ConstantOptIterationsParameterName, "Number of optimization steps for hyperparameter values", new IntValue(50)));
145      Parameters.Add(new FixedValueParameter<IntValue>(RestartsParameterName, "The number of random restarts for constant optimization.", new IntValue(10)));
146      Parameters["Restarts"].Hidden = true;
147      var g = new SimpleSymbolicExpressionGrammar();
148      g.AddSymbols(new string[] { "Sum", "Product" }, 2, 2);
149      g.AddTerminalSymbols(new string[]
150      {
151        "Linear",
152        "LinearArd",
153        "MaternIso1",
154        "MaternIso3",
155        "MaternIso5",
156        "NeuralNetwork",
157        "Periodic",
158        "PiecewisePolynomial0",
159        "PiecewisePolynomial1",
160        "PiecewisePolynomial2",
161        "PiecewisePolynomial3",
162        "Polynomial2",
163        "Polynomial3",
164        "RationalQuadraticArd",
165        "RationalQuadraticIso",
166        "SpectralMixture1",
167        "SpectralMixture3",
168        "SpectralMixture5",
169        "SquaredExponentialArd",
170        "SquaredExponentialIso"
171      });
172      base.Encoding = new SymbolicExpressionTreeEncoding(g, 10, 5);
173    }
174
175
176    public override double Evaluate(ISymbolicExpressionTree tree, IRandom random) {
177
178      var meanFunction = new MeanConst();
179      var problemData = ProblemData;
180      var ds = problemData.Dataset;
181      var targetVariable = problemData.TargetVariable;
182      var allowedInputVariables = problemData.AllowedInputVariables.ToArray();
183      var nVars = allowedInputVariables.Length;
184      var trainingRows = problemData.TrainingIndices.ToArray();
185
186      // use the same covariance function for each restart
187      var covarianceFunction = TreeToCovarianceFunction(tree);
188
189      // allocate hyperparameters
190      var hyperParameters = new double[meanFunction.GetNumberOfParameters(nVars) + covarianceFunction.GetNumberOfParameters(nVars) + 1]; // mean + cov + noise
191      double[] bestHyperParameters = new double[hyperParameters.Length];
192      var bestObjValue = new double[1] { double.MinValue };
193
194      // data that is necessary for the objective function
195      var data = Tuple.Create(ds, targetVariable, allowedInputVariables, trainingRows, (IMeanFunction)meanFunction, covarianceFunction, bestObjValue);
196
197      for (int t = 0; t < Restarts; t++) {
198        var prevBest = bestObjValue[0];
199        var prevBestHyperParameters = new double[hyperParameters.Length];
200        Array.Copy(bestHyperParameters, prevBestHyperParameters, bestHyperParameters.Length);
201
202        // initialize hyperparameters
203        hyperParameters[0] = ds.GetDoubleValues(targetVariable).Average(); // mean const
204
205        for (int i = 0; i < covarianceFunction.GetNumberOfParameters(nVars); i++) {
206          hyperParameters[1 + i] = random.NextDouble() * 2.0 - 1.0;
207        }
208        hyperParameters[hyperParameters.Length - 1] = 1.0; // s² = exp(2), TODO: other inits better?
209
210        // use alglib.bfgs for hyper-parameter optimization ...
211        double epsg = 0;
212        double epsf = 0.00001;
213        double epsx = 0;
214        double stpmax = 1;
215        int maxits = ConstantOptIterations;
216        alglib.mincgstate state;
217        alglib.mincgreport rep;
218
219        alglib.mincgcreate(hyperParameters, out state);
220        alglib.mincgsetcond(state, epsg, epsf, epsx, maxits);
221        alglib.mincgsetstpmax(state, stpmax);
222        alglib.mincgoptimize(state, ObjectiveFunction, null, data);
223
224        alglib.mincgresults(state, out bestHyperParameters, out rep);
225
226        if (rep.terminationtype < 0) {
227          // error -> restore previous best quality
228          bestObjValue[0] = prevBest;
229          Array.Copy(prevBestHyperParameters, bestHyperParameters, prevBestHyperParameters.Length);
230        }
231      }
232
233      return bestObjValue[0];
234    }
235
[13160]236    public override void Analyze(ISymbolicExpressionTree[] trees, double[] qualities, ResultCollection results, IRandom random) {
237      if (!results.ContainsKey("Best Solution Quality")) {
238        results.Add(new Result("Best Solution Quality", typeof(DoubleValue)));
239      }
240      if (!results.ContainsKey("Best Tree")) {
241        results.Add(new Result("Best Tree", typeof(ISymbolicExpressionTree)));
242      }
243      if (!results.ContainsKey("Best Solution")) {
244        results.Add(new Result("Best Solution", typeof(GaussianProcessRegressionSolution)));
245      }
246
247      var bestQuality = qualities.Max();
248
249      if (results["Best Solution Quality"].Value == null || bestQuality > ((DoubleValue)results["Best Solution Quality"].Value).Value) {
250        var bestIdx = Array.IndexOf(qualities, bestQuality);
251        var bestClone = (ISymbolicExpressionTree)trees[bestIdx].Clone();
252        results["Best Tree"].Value = bestClone;
253        results["Best Solution Quality"].Value = new DoubleValue(bestQuality);
254        results["Best Solution"].Value = CreateSolution(bestClone, random);
255      }
256    }
257
258    private IItem CreateSolution(ISymbolicExpressionTree tree, IRandom random) {
259      // again tune the hyper-parameters.
260      // this is suboptimal because 1) more effort and 2) we cannot be sure to find the same local optimum
261      var meanFunction = new MeanConst();
262      var problemData = ProblemData;
263      var ds = problemData.Dataset;
264      var targetVariable = problemData.TargetVariable;
265      var allowedInputVariables = problemData.AllowedInputVariables.ToArray();
266      var nVars = allowedInputVariables.Length;
267      var trainingRows = problemData.TrainingIndices.ToArray();
268      var bestObjValue = new double[1] { double.MinValue };
269
270      // use the same covariance function for each restart
271      var covarianceFunction = TreeToCovarianceFunction(tree);
272      // data that is necessary for the objective function
273      var data = Tuple.Create(ds, targetVariable, allowedInputVariables, trainingRows, (IMeanFunction)meanFunction, covarianceFunction, bestObjValue);
274
275      // allocate hyperparameters
276      var hyperParameters = new double[meanFunction.GetNumberOfParameters(nVars) + covarianceFunction.GetNumberOfParameters(nVars) + 1]; // mean + cov + noise
277
278      // initialize hyperparameters
279      hyperParameters[0] = ds.GetDoubleValues(targetVariable).Average(); // mean const
280
281      for (int i = 0; i < covarianceFunction.GetNumberOfParameters(nVars); i++) {
282        hyperParameters[1 + i] = random.NextDouble() * 2.0 - 1.0;
283      }
284      hyperParameters[hyperParameters.Length - 1] = 1.0; // s² = exp(2), TODO: other inits better?
285
286      // use alglib.bfgs for hyper-parameter optimization ...
287      double epsg = 0;
288      double epsf = 0.00001;
289      double epsx = 0;
290      double stpmax = 1;
291      int maxits = ConstantOptIterations;
292      alglib.mincgstate state;
293      alglib.mincgreport rep;
294
295      alglib.mincgcreate(hyperParameters, out state);
296      alglib.mincgsetcond(state, epsg, epsf, epsx, maxits);
297      alglib.mincgsetstpmax(state, stpmax);
298      alglib.mincgoptimize(state, ObjectiveFunction, null, data);
299
300      alglib.mincgresults(state, out hyperParameters, out rep);
301
302      if (rep.terminationtype >= 0) {
303
304        var model = new GaussianProcessModel(ds, targetVariable, allowedInputVariables, trainingRows, hyperParameters, meanFunction, covarianceFunction);
305        return model.CreateRegressionSolution(ProblemData);
306      } else return null;
307    }
308
309    private void ObjectiveFunction(double[] x, ref double func, double[] grad, object obj) {
[12946]310      // we want to optimize the model likelihood by changing the hyperparameters and also return the gradient for each hyperparameter
311      var data = (Tuple<IDataset, string, string[], int[], IMeanFunction, ICovarianceFunction, double[]>)obj;
312      var ds = data.Item1;
313      var targetVariable = data.Item2;
314      var allowedInputVariables = data.Item3;
315      var trainingRows = data.Item4;
316      var meanFunction = data.Item5;
317      var covarianceFunction = data.Item6;
318      var bestObjValue = data.Item7;
319      var hyperParameters = x; // the decision variable vector
320
321      try {
322        var model = new GaussianProcessModel(ds, targetVariable, allowedInputVariables, trainingRows, hyperParameters, meanFunction, covarianceFunction);
323
324        func = model.NegativeLogLikelihood; // mincgoptimize, so we return negative likelihood
325        bestObjValue[0] = Math.Max(bestObjValue[0], -func); // problem itself is a maximization problem
326        var gradients = model.HyperparameterGradients;
327        Array.Copy(gradients, grad, gradients.Length);
[13160]328      } catch (ArgumentException) {
[12946]329        // building the GaussianProcessModel might fail, in this case we return the worst possible objective value
330        func = 1.0E+300;
331        Array.Clear(grad, 0, grad.Length);
332      }
333    }
334
335    private ICovarianceFunction TreeToCovarianceFunction(ISymbolicExpressionTree tree) {
336      return TreeToCovarianceFunction(tree.Root.GetSubtree(0).GetSubtree(0)); // skip programroot and startsymbol
337    }
338
339    private ICovarianceFunction TreeToCovarianceFunction(ISymbolicExpressionTreeNode node) {
340      switch (node.Symbol.Name) {
341        case "Sum": {
342            var sum = new CovarianceSum();
343            sum.Terms.Add(TreeToCovarianceFunction(node.GetSubtree(0)));
344            sum.Terms.Add(TreeToCovarianceFunction(node.GetSubtree(1)));
345            return sum;
346          }
347        case "Product": {
348            var prod = new CovarianceProduct();
349            prod.Factors.Add(TreeToCovarianceFunction(node.GetSubtree(0)));
350            prod.Factors.Add(TreeToCovarianceFunction(node.GetSubtree(1)));
351            return prod;
352          }
353        // covFunction is cloned by the model so we can reuse instances of terminal covariance functions
354        case "Linear": return linear;
355        case "LinearArd": return linearArd;
356        case "MaternIso1": return maternIso1;
357        case "MaternIso3": return maternIso3;
358        case "MaternIso5": return maternIso5;
359        case "NeuralNetwork": return neuralNetwork;
360        case "Periodic": return periodic;
361        case "PiecewisePolynomial0": return piecewisePoly0;
362        case "PiecewisePolynomial1": return piecewisePoly1;
363        case "PiecewisePolynomial2": return piecewisePoly2;
364        case "PiecewisePolynomial3": return piecewisePoly3;
365        case "Polynomial2": return poly2;
366        case "Polynomial3": return poly3;
367        case "RationalQuadraticArd": return ratQuadraticArd;
368        case "RationalQuadraticIso": return ratQuadraticIso;
369        case "SpectralMixture1": return spectralMixture1;
370        case "SpectralMixture3": return spectralMixture3;
371        case "SpectralMixture5": return spectralMixture5;
372        case "SquaredExponentialArd": return sqrExpArd;
373        case "SquaredExponentialIso": return sqrExpIso;
374        default: throw new InvalidProgramException(string.Format("Found invalid symbol {0}", node.Symbol.Name));
375      }
376    }
377
378
379    // persistence
380    [StorableConstructor]
381    private GaussianProcessCovarianceOptimizationProblem(bool deserializing) : base(deserializing) { }
382    [StorableHook(HookType.AfterDeserialization)]
383    private void AfterDeserialization() {
384    }
385
386    // cloning
387    private GaussianProcessCovarianceOptimizationProblem(GaussianProcessCovarianceOptimizationProblem original, Cloner cloner)
388      : base(original, cloner) {
389    }
390    public override IDeepCloneable Clone(Cloner cloner) {
391      return new GaussianProcessCovarianceOptimizationProblem(this, cloner);
392    }
393
394    public void Load(IRegressionProblemData data) {
395      this.ProblemData = data;
396      OnProblemDataChanged();
397    }
398
399    public IRegressionProblemData Export() {
400      return ProblemData;
401    }
402
403    #region events
404    public event EventHandler ProblemDataChanged;
405
406
407    private void OnProblemDataChanged() {
408      var handler = ProblemDataChanged;
409      if (handler != null)
410        handler(this, EventArgs.Empty);
411    }
412    #endregion
413
414  }
415}
Note: See TracBrowser for help on using the repository browser.