Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessCovarianceOptimizationProblem.cs @ 13209

Last change on this file since 13209 was 13209, checked in by mkommend, 8 years ago

#1967: Implemented IStatefulItem in GaussianProcessCovarianceOptimizationProblem to clear the temporary parameters correctly.

File size: 18.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Linq;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Optimization;
29using HeuristicLab.Parameters;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31using HeuristicLab.Problems.DataAnalysis;
32using HeuristicLab.Problems.Instances;
33
34
35namespace HeuristicLab.Algorithms.DataAnalysis {
36  [Item("Gaussian Process Covariance Optimization Problem", "")]
37  [Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 300)]
38  [StorableClass]
39  public sealed class GaussianProcessCovarianceOptimizationProblem : SymbolicExpressionTreeProblem, IStatefulItem, IRegressionProblem, IProblemInstanceConsumer<IRegressionProblemData>, IProblemInstanceExporter<IRegressionProblemData> {
40    #region static variables and ctor
41    private static readonly CovarianceMaternIso maternIso1;
42    private static readonly CovarianceMaternIso maternIso3;
43    private static readonly CovarianceMaternIso maternIso5;
44    private static readonly CovariancePiecewisePolynomial piecewisePoly0;
45    private static readonly CovariancePiecewisePolynomial piecewisePoly1;
46    private static readonly CovariancePiecewisePolynomial piecewisePoly2;
47    private static readonly CovariancePiecewisePolynomial piecewisePoly3;
48    private static readonly CovariancePolynomial poly2;
49    private static readonly CovariancePolynomial poly3;
50    private static readonly CovarianceSpectralMixture spectralMixture1;
51    private static readonly CovarianceSpectralMixture spectralMixture3;
52    private static readonly CovarianceSpectralMixture spectralMixture5;
53    private static readonly CovarianceLinear linear;
54    private static readonly CovarianceLinearArd linearArd;
55    private static readonly CovarianceNeuralNetwork neuralNetwork;
56    private static readonly CovariancePeriodic periodic;
57    private static readonly CovarianceRationalQuadraticIso ratQuadraticIso;
58    private static readonly CovarianceRationalQuadraticArd ratQuadraticArd;
59    private static readonly CovarianceSquaredExponentialArd sqrExpArd;
60    private static readonly CovarianceSquaredExponentialIso sqrExpIso;
61
62    static GaussianProcessCovarianceOptimizationProblem() {
63      // cumbersome initialization because of ConstrainedValueParameters
64      maternIso1 = new CovarianceMaternIso(); SetConstrainedValueParameter(maternIso1.DParameter, 1);
65      maternIso3 = new CovarianceMaternIso(); SetConstrainedValueParameter(maternIso3.DParameter, 3);
66      maternIso5 = new CovarianceMaternIso(); SetConstrainedValueParameter(maternIso5.DParameter, 5);
67
68      piecewisePoly0 = new CovariancePiecewisePolynomial(); SetConstrainedValueParameter(piecewisePoly0.VParameter, 0);
69      piecewisePoly1 = new CovariancePiecewisePolynomial(); SetConstrainedValueParameter(piecewisePoly1.VParameter, 1);
70      piecewisePoly2 = new CovariancePiecewisePolynomial(); SetConstrainedValueParameter(piecewisePoly2.VParameter, 2);
71      piecewisePoly3 = new CovariancePiecewisePolynomial(); SetConstrainedValueParameter(piecewisePoly3.VParameter, 3);
72
73      poly2 = new CovariancePolynomial(); poly2.DegreeParameter.Value.Value = 2;
74      poly3 = new CovariancePolynomial(); poly3.DegreeParameter.Value.Value = 3;
75
76      spectralMixture1 = new CovarianceSpectralMixture(); spectralMixture1.QParameter.Value.Value = 1;
77      spectralMixture3 = new CovarianceSpectralMixture(); spectralMixture3.QParameter.Value.Value = 3;
78      spectralMixture5 = new CovarianceSpectralMixture(); spectralMixture5.QParameter.Value.Value = 5;
79
80      linear = new CovarianceLinear();
81      linearArd = new CovarianceLinearArd();
82      neuralNetwork = new CovarianceNeuralNetwork();
83      periodic = new CovariancePeriodic();
84      ratQuadraticArd = new CovarianceRationalQuadraticArd();
85      ratQuadraticIso = new CovarianceRationalQuadraticIso();
86      sqrExpArd = new CovarianceSquaredExponentialArd();
87      sqrExpIso = new CovarianceSquaredExponentialIso();
88    }
89
90    private static void SetConstrainedValueParameter(IConstrainedValueParameter<IntValue> param, int val) {
91      param.Value = param.ValidValues.Single(v => v.Value == val);
92    }
93
94    #endregion
95
96    #region parameter names
97
98    private const string ProblemDataParameterName = "ProblemData";
99    private const string ConstantOptIterationsParameterName = "Constant optimization steps";
100    private const string RestartsParameterName = "Restarts";
101
102    #endregion
103
104    #region Parameter Properties
105    IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } }
106
107    public IValueParameter<IRegressionProblemData> ProblemDataParameter {
108      get { return (IValueParameter<IRegressionProblemData>)Parameters[ProblemDataParameterName]; }
109    }
110    public IFixedValueParameter<IntValue> ConstantOptIterationsParameter {
111      get { return (IFixedValueParameter<IntValue>)Parameters[ConstantOptIterationsParameterName]; }
112    }
113    public IFixedValueParameter<IntValue> RestartsParameter {
114      get { return (IFixedValueParameter<IntValue>)Parameters[RestartsParameterName]; }
115    }
116    #endregion
117
118    #region Properties
119
120    public IRegressionProblemData ProblemData {
121      get { return ProblemDataParameter.Value; }
122      set { ProblemDataParameter.Value = value; }
123    }
124    IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } }
125
126    public int ConstantOptIterations {
127      get { return ConstantOptIterationsParameter.Value.Value; }
128      set { ConstantOptIterationsParameter.Value.Value = value; }
129    }
130
131    public int Restarts {
132      get { return RestartsParameter.Value.Value; }
133      set { RestartsParameter.Value.Value = value; }
134    }
135    #endregion
136
137    public override bool Maximization {
138      get { return true; } // return log likelihood (instead of negative log likelihood as in GPR
139    }
140
141    // problem stores a few variables for information exchange from Evaluate() to Analyze()
142    private readonly object problemStateLocker = new object();
143    [Storable]
144    private double bestQ;
145    [Storable]
146    private double[] bestHyperParameters;
147    [Storable]
148    private IMeanFunction meanFunc;
149    [Storable]
150    private ICovarianceFunction covFunc;
151
152    public GaussianProcessCovarianceOptimizationProblem()
153      : base() {
154      Parameters.Add(new ValueParameter<IRegressionProblemData>(ProblemDataParameterName, "The data for the regression problem", new RegressionProblemData()));
155      Parameters.Add(new FixedValueParameter<IntValue>(ConstantOptIterationsParameterName, "Number of optimization steps for hyperparameter values", new IntValue(50)));
156      Parameters.Add(new FixedValueParameter<IntValue>(RestartsParameterName, "The number of random restarts for constant optimization.", new IntValue(10)));
157      Parameters["Restarts"].Hidden = true;
158      var g = new SimpleSymbolicExpressionGrammar();
159      g.AddSymbols(new string[] { "Sum", "Product" }, 2, 2);
160      g.AddTerminalSymbols(new string[]
161      {
162        "Linear",
163        "LinearArd",
164        "MaternIso1",
165        "MaternIso3",
166        "MaternIso5",
167        "NeuralNetwork",
168        "Periodic",
169        "PiecewisePolynomial0",
170        "PiecewisePolynomial1",
171        "PiecewisePolynomial2",
172        "PiecewisePolynomial3",
173        "Polynomial2",
174        "Polynomial3",
175        "RationalQuadraticArd",
176        "RationalQuadraticIso",
177        "SpectralMixture1",
178        "SpectralMixture3",
179        "SpectralMixture5",
180        "SquaredExponentialArd",
181        "SquaredExponentialIso"
182      });
183      base.Encoding = new SymbolicExpressionTreeEncoding(g, 10, 5);
184    }
185
186    public void InitializeState() { ClearState(); }
187    public void ClearState() {
188      meanFunc = null;
189      covFunc = null;
190      bestQ = double.NegativeInfinity;
191      bestHyperParameters = null;
192    }
193
194    public override double Evaluate(ISymbolicExpressionTree tree, IRandom random) {
195      var meanFunction = new MeanConst();
196      var problemData = ProblemData;
197      var ds = problemData.Dataset;
198      var targetVariable = problemData.TargetVariable;
199      var allowedInputVariables = problemData.AllowedInputVariables.ToArray();
200      var nVars = allowedInputVariables.Length;
201      var trainingRows = problemData.TrainingIndices.ToArray();
202
203      // use the same covariance function for each restart
204      var covarianceFunction = TreeToCovarianceFunction(tree);
205
206      // allocate hyperparameters
207      var hyperParameters = new double[meanFunction.GetNumberOfParameters(nVars) + covarianceFunction.GetNumberOfParameters(nVars) + 1]; // mean + cov + noise
208      double[] bestHyperParameters = new double[hyperParameters.Length];
209      var bestObjValue = new double[1] { double.MinValue };
210
211      // data that is necessary for the objective function
212      var data = Tuple.Create(ds, targetVariable, allowedInputVariables, trainingRows, (IMeanFunction)meanFunction, covarianceFunction, bestObjValue);
213
214      for (int t = 0; t < Restarts; t++) {
215        var prevBest = bestObjValue[0];
216        var prevBestHyperParameters = new double[hyperParameters.Length];
217        Array.Copy(bestHyperParameters, prevBestHyperParameters, bestHyperParameters.Length);
218
219        // initialize hyperparameters
220        hyperParameters[0] = ds.GetDoubleValues(targetVariable).Average(); // mean const
221
222        for (int i = 0; i < covarianceFunction.GetNumberOfParameters(nVars); i++) {
223          hyperParameters[1 + i] = random.NextDouble() * 2.0 - 1.0;
224        }
225        hyperParameters[hyperParameters.Length - 1] = 1.0; // s² = exp(2), TODO: other inits better?
226
227        // use alglib.bfgs for hyper-parameter optimization ...
228        double epsg = 0;
229        double epsf = 0.00001;
230        double epsx = 0;
231        double stpmax = 1;
232        int maxits = ConstantOptIterations;
233        alglib.mincgstate state;
234        alglib.mincgreport rep;
235
236        alglib.mincgcreate(hyperParameters, out state);
237        alglib.mincgsetcond(state, epsg, epsf, epsx, maxits);
238        alglib.mincgsetstpmax(state, stpmax);
239        alglib.mincgoptimize(state, ObjectiveFunction, null, data);
240
241        alglib.mincgresults(state, out bestHyperParameters, out rep);
242
243        if (rep.terminationtype < 0) {
244          // error -> restore previous best quality
245          bestObjValue[0] = prevBest;
246          Array.Copy(prevBestHyperParameters, bestHyperParameters, prevBestHyperParameters.Length);
247        }
248      }
249
250      UpdateBestSoFar(bestObjValue[0], bestHyperParameters, meanFunction, covarianceFunction);
251
252      return bestObjValue[0];
253    }
254
255    // updates the overall best quality and overall best model for Analyze()
256    private void UpdateBestSoFar(double bestQ, double[] bestHyperParameters, IMeanFunction meanFunc, ICovarianceFunction covFunc) {
257      lock (problemStateLocker) {
258        if (bestQ > this.bestQ) {
259          this.bestQ = bestQ;
260          this.bestHyperParameters = new double[bestHyperParameters.Length];
261          Array.Copy(bestHyperParameters, this.bestHyperParameters, this.bestHyperParameters.Length);
262          this.meanFunc = meanFunc;
263          this.covFunc = covFunc;
264        }
265      }
266    }
267
268    public override void Analyze(ISymbolicExpressionTree[] trees, double[] qualities, ResultCollection results, IRandom random) {
269      if (!results.ContainsKey("Best Solution Quality")) {
270        results.Add(new Result("Best Solution Quality", typeof(DoubleValue)));
271      }
272      if (!results.ContainsKey("Best Tree")) {
273        results.Add(new Result("Best Tree", typeof(ISymbolicExpressionTree)));
274      }
275      if (!results.ContainsKey("Best Solution")) {
276        results.Add(new Result("Best Solution", typeof(GaussianProcessRegressionSolution)));
277      }
278
279      var bestQuality = qualities.Max();
280
281      if (results["Best Solution Quality"].Value == null || bestQuality > ((DoubleValue)results["Best Solution Quality"].Value).Value) {
282        var bestIdx = Array.IndexOf(qualities, bestQuality);
283        var bestClone = (ISymbolicExpressionTree)trees[bestIdx].Clone();
284        results["Best Tree"].Value = bestClone;
285        results["Best Solution Quality"].Value = new DoubleValue(bestQuality);
286        results["Best Solution"].Value = CreateSolution();
287      }
288    }
289
290    private IItem CreateSolution() {
291      var problemData = ProblemData;
292      var ds = problemData.Dataset;
293      var targetVariable = problemData.TargetVariable;
294      var allowedInputVariables = problemData.AllowedInputVariables.ToArray();
295      var trainingRows = problemData.TrainingIndices.ToArray();
296
297      lock (problemStateLocker) {
298        var model = new GaussianProcessModel(ds, targetVariable, allowedInputVariables, trainingRows, bestHyperParameters, (IMeanFunction)meanFunc.Clone(), (ICovarianceFunction)covFunc.Clone());
299        model.FixParameters();
300        return model.CreateRegressionSolution((IRegressionProblemData)ProblemData.Clone());
301      }
302    }
303
304    private void ObjectiveFunction(double[] x, ref double func, double[] grad, object obj) {
305      // we want to optimize the model likelihood by changing the hyperparameters and also return the gradient for each hyperparameter
306      var data = (Tuple<IDataset, string, string[], int[], IMeanFunction, ICovarianceFunction, double[]>)obj;
307      var ds = data.Item1;
308      var targetVariable = data.Item2;
309      var allowedInputVariables = data.Item3;
310      var trainingRows = data.Item4;
311      var meanFunction = data.Item5;
312      var covarianceFunction = data.Item6;
313      var bestObjValue = data.Item7;
314      var hyperParameters = x; // the decision variable vector
315
316      try {
317        var model = new GaussianProcessModel(ds, targetVariable, allowedInputVariables, trainingRows, hyperParameters, meanFunction, covarianceFunction);
318
319        func = model.NegativeLogLikelihood; // mincgoptimize, so we return negative likelihood
320        bestObjValue[0] = Math.Max(bestObjValue[0], -func); // problem itself is a maximization problem
321        var gradients = model.HyperparameterGradients;
322        Array.Copy(gradients, grad, gradients.Length);
323      }
324      catch (ArgumentException) {
325        // building the GaussianProcessModel might fail, in this case we return the worst possible objective value
326        func = 1.0E+300;
327        Array.Clear(grad, 0, grad.Length);
328      }
329    }
330
331    private ICovarianceFunction TreeToCovarianceFunction(ISymbolicExpressionTree tree) {
332      return TreeToCovarianceFunction(tree.Root.GetSubtree(0).GetSubtree(0)); // skip programroot and startsymbol
333    }
334
335    private ICovarianceFunction TreeToCovarianceFunction(ISymbolicExpressionTreeNode node) {
336      switch (node.Symbol.Name) {
337        case "Sum": {
338            var sum = new CovarianceSum();
339            sum.Terms.Add(TreeToCovarianceFunction(node.GetSubtree(0)));
340            sum.Terms.Add(TreeToCovarianceFunction(node.GetSubtree(1)));
341            return sum;
342          }
343        case "Product": {
344            var prod = new CovarianceProduct();
345            prod.Factors.Add(TreeToCovarianceFunction(node.GetSubtree(0)));
346            prod.Factors.Add(TreeToCovarianceFunction(node.GetSubtree(1)));
347            return prod;
348          }
349        // covFunction is cloned by the model so we can reuse instances of terminal covariance functions
350        case "Linear": return linear;
351        case "LinearArd": return linearArd;
352        case "MaternIso1": return maternIso1;
353        case "MaternIso3": return maternIso3;
354        case "MaternIso5": return maternIso5;
355        case "NeuralNetwork": return neuralNetwork;
356        case "Periodic": return periodic;
357        case "PiecewisePolynomial0": return piecewisePoly0;
358        case "PiecewisePolynomial1": return piecewisePoly1;
359        case "PiecewisePolynomial2": return piecewisePoly2;
360        case "PiecewisePolynomial3": return piecewisePoly3;
361        case "Polynomial2": return poly2;
362        case "Polynomial3": return poly3;
363        case "RationalQuadraticArd": return ratQuadraticArd;
364        case "RationalQuadraticIso": return ratQuadraticIso;
365        case "SpectralMixture1": return spectralMixture1;
366        case "SpectralMixture3": return spectralMixture3;
367        case "SpectralMixture5": return spectralMixture5;
368        case "SquaredExponentialArd": return sqrExpArd;
369        case "SquaredExponentialIso": return sqrExpIso;
370        default: throw new InvalidProgramException(string.Format("Found invalid symbol {0}", node.Symbol.Name));
371      }
372    }
373
374
375    // persistence
376    [StorableConstructor]
377    private GaussianProcessCovarianceOptimizationProblem(bool deserializing) : base(deserializing) { }
378    [StorableHook(HookType.AfterDeserialization)]
379    private void AfterDeserialization() {
380    }
381
382    // cloning
383    private GaussianProcessCovarianceOptimizationProblem(GaussianProcessCovarianceOptimizationProblem original, Cloner cloner)
384      : base(original, cloner) {
385      bestQ = original.bestQ;
386      meanFunc = cloner.Clone(original.meanFunc);
387      covFunc = cloner.Clone(original.covFunc);
388      if (bestHyperParameters != null) {
389        bestHyperParameters = new double[original.bestHyperParameters.Length];
390        Array.Copy(original.bestHyperParameters, bestHyperParameters, bestHyperParameters.Length);
391      }
392    }
393    public override IDeepCloneable Clone(Cloner cloner) {
394      return new GaussianProcessCovarianceOptimizationProblem(this, cloner);
395    }
396
397    public void Load(IRegressionProblemData data) {
398      this.ProblemData = data;
399      OnProblemDataChanged();
400    }
401
402    public IRegressionProblemData Export() {
403      return ProblemData;
404    }
405
406    #region events
407    public event EventHandler ProblemDataChanged;
408
409
410    private void OnProblemDataChanged() {
411      var handler = ProblemDataChanged;
412      if (handler != null)
413        handler(this, EventArgs.Empty);
414    }
415    #endregion
416
417  }
418}
Note: See TracBrowser for help on using the repository browser.