#region License Information
/* HeuristicLab
* Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Data;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HeuristicLab.Optimization;
using HeuristicLab.Parameters;
using HEAL.Attic;
using HeuristicLab.Problems.DataAnalysis;
using HeuristicLab.Problems.Instances;
namespace HeuristicLab.Algorithms.DataAnalysis {
[Item("Gaussian Process Covariance Optimization Problem", "")]
[Creatable(CreatableAttribute.Categories.GeneticProgrammingProblems, Priority = 300)]
[StorableType("A3EA7CE7-78FA-48FF-9DD5-FBE5AB770A99")]
public sealed class GaussianProcessCovarianceOptimizationProblem : SymbolicExpressionTreeProblem, IStatefulItem, IRegressionProblem, IProblemInstanceConsumer, IProblemInstanceExporter {
#region static variables and ctor
private static readonly CovarianceMaternIso maternIso1;
private static readonly CovarianceMaternIso maternIso3;
private static readonly CovarianceMaternIso maternIso5;
private static readonly CovariancePiecewisePolynomial piecewisePoly0;
private static readonly CovariancePiecewisePolynomial piecewisePoly1;
private static readonly CovariancePiecewisePolynomial piecewisePoly2;
private static readonly CovariancePiecewisePolynomial piecewisePoly3;
private static readonly CovariancePolynomial poly2;
private static readonly CovariancePolynomial poly3;
private static readonly CovarianceSpectralMixture spectralMixture1;
private static readonly CovarianceSpectralMixture spectralMixture3;
private static readonly CovarianceSpectralMixture spectralMixture5;
private static readonly CovarianceLinear linear;
private static readonly CovarianceLinearArd linearArd;
private static readonly CovarianceNeuralNetwork neuralNetwork;
private static readonly CovariancePeriodic periodic;
private static readonly CovarianceRationalQuadraticIso ratQuadraticIso;
private static readonly CovarianceRationalQuadraticArd ratQuadraticArd;
private static readonly CovarianceSquaredExponentialArd sqrExpArd;
private static readonly CovarianceSquaredExponentialIso sqrExpIso;
static GaussianProcessCovarianceOptimizationProblem() {
// cumbersome initialization because of ConstrainedValueParameters
maternIso1 = new CovarianceMaternIso(); SetConstrainedValueParameter(maternIso1.DParameter, 1);
maternIso3 = new CovarianceMaternIso(); SetConstrainedValueParameter(maternIso3.DParameter, 3);
maternIso5 = new CovarianceMaternIso(); SetConstrainedValueParameter(maternIso5.DParameter, 5);
piecewisePoly0 = new CovariancePiecewisePolynomial(); SetConstrainedValueParameter(piecewisePoly0.VParameter, 0);
piecewisePoly1 = new CovariancePiecewisePolynomial(); SetConstrainedValueParameter(piecewisePoly1.VParameter, 1);
piecewisePoly2 = new CovariancePiecewisePolynomial(); SetConstrainedValueParameter(piecewisePoly2.VParameter, 2);
piecewisePoly3 = new CovariancePiecewisePolynomial(); SetConstrainedValueParameter(piecewisePoly3.VParameter, 3);
poly2 = new CovariancePolynomial(); poly2.DegreeParameter.Value.Value = 2;
poly3 = new CovariancePolynomial(); poly3.DegreeParameter.Value.Value = 3;
spectralMixture1 = new CovarianceSpectralMixture(); spectralMixture1.QParameter.Value.Value = 1;
spectralMixture3 = new CovarianceSpectralMixture(); spectralMixture3.QParameter.Value.Value = 3;
spectralMixture5 = new CovarianceSpectralMixture(); spectralMixture5.QParameter.Value.Value = 5;
linear = new CovarianceLinear();
linearArd = new CovarianceLinearArd();
neuralNetwork = new CovarianceNeuralNetwork();
periodic = new CovariancePeriodic();
ratQuadraticArd = new CovarianceRationalQuadraticArd();
ratQuadraticIso = new CovarianceRationalQuadraticIso();
sqrExpArd = new CovarianceSquaredExponentialArd();
sqrExpIso = new CovarianceSquaredExponentialIso();
}
private static void SetConstrainedValueParameter(IConstrainedValueParameter param, int val) {
param.Value = param.ValidValues.Single(v => v.Value == val);
}
#endregion
#region parameter names
private const string ProblemDataParameterName = "ProblemData";
private const string ConstantOptIterationsParameterName = "Constant optimization steps";
private const string RestartsParameterName = "Restarts";
#endregion
#region Parameter Properties
IParameter IDataAnalysisProblem.ProblemDataParameter { get { return ProblemDataParameter; } }
public IValueParameter ProblemDataParameter {
get { return (IValueParameter)Parameters[ProblemDataParameterName]; }
}
public IFixedValueParameter ConstantOptIterationsParameter {
get { return (IFixedValueParameter)Parameters[ConstantOptIterationsParameterName]; }
}
public IFixedValueParameter RestartsParameter {
get { return (IFixedValueParameter)Parameters[RestartsParameterName]; }
}
#endregion
#region Properties
public IRegressionProblemData ProblemData {
get { return ProblemDataParameter.Value; }
set { ProblemDataParameter.Value = value; }
}
IDataAnalysisProblemData IDataAnalysisProblem.ProblemData { get { return ProblemData; } }
public int ConstantOptIterations {
get { return ConstantOptIterationsParameter.Value.Value; }
set { ConstantOptIterationsParameter.Value.Value = value; }
}
public int Restarts {
get { return RestartsParameter.Value.Value; }
set { RestartsParameter.Value.Value = value; }
}
#endregion
public override bool Maximization {
get { return true; } // return log likelihood (instead of negative log likelihood as in GPR
}
// problem stores a few variables for information exchange from Evaluate() to Analyze()
private readonly object problemStateLocker = new object();
[Storable]
private double bestQ;
[Storable]
private double[] bestHyperParameters;
[Storable]
private IMeanFunction meanFunc;
[Storable]
private ICovarianceFunction covFunc;
public GaussianProcessCovarianceOptimizationProblem()
: base() {
Parameters.Add(new ValueParameter(ProblemDataParameterName, "The data for the regression problem", new RegressionProblemData()));
Parameters.Add(new FixedValueParameter(ConstantOptIterationsParameterName, "Number of optimization steps for hyperparameter values", new IntValue(50)));
Parameters.Add(new FixedValueParameter(RestartsParameterName, "The number of random restarts for constant optimization.", new IntValue(10)));
Parameters["Restarts"].Hidden = true;
var g = new SimpleSymbolicExpressionGrammar();
g.AddSymbols(new string[] { "Sum", "Product" }, 2, 2);
g.AddTerminalSymbols(new string[]
{
"Linear",
"LinearArd",
"MaternIso1",
"MaternIso3",
"MaternIso5",
"NeuralNetwork",
"Periodic",
"PiecewisePolynomial0",
"PiecewisePolynomial1",
"PiecewisePolynomial2",
"PiecewisePolynomial3",
"Polynomial2",
"Polynomial3",
"RationalQuadraticArd",
"RationalQuadraticIso",
"SpectralMixture1",
"SpectralMixture3",
"SpectralMixture5",
"SquaredExponentialArd",
"SquaredExponentialIso"
});
base.Encoding = new SymbolicExpressionTreeEncoding(g, 10, 5);
}
public void InitializeState() { ClearState(); }
public void ClearState() {
meanFunc = null;
covFunc = null;
bestQ = double.NegativeInfinity;
bestHyperParameters = null;
}
private readonly object syncRoot = new object();
// Does not produce the same result for the same seed when using parallel engine (see below)!
public override double Evaluate(ISymbolicExpressionTree tree, IRandom random) {
var meanFunction = new MeanConst();
var problemData = ProblemData;
var ds = problemData.Dataset;
var targetVariable = problemData.TargetVariable;
var allowedInputVariables = problemData.AllowedInputVariables.ToArray();
var nVars = allowedInputVariables.Length;
var trainingRows = problemData.TrainingIndices.ToArray();
// use the same covariance function for each restart
var covarianceFunction = TreeToCovarianceFunction(tree);
// allocate hyperparameters
var hyperParameters = new double[meanFunction.GetNumberOfParameters(nVars) + covarianceFunction.GetNumberOfParameters(nVars) + 1]; // mean + cov + noise
double[] bestHyperParameters = new double[hyperParameters.Length];
var bestObjValue = new double[1] { double.MinValue };
// data that is necessary for the objective function
var data = Tuple.Create(ds, targetVariable, allowedInputVariables, trainingRows, (IMeanFunction)meanFunction, covarianceFunction, bestObjValue);
for (int t = 0; t < Restarts; t++) {
var prevBest = bestObjValue[0];
var prevBestHyperParameters = new double[hyperParameters.Length];
Array.Copy(bestHyperParameters, prevBestHyperParameters, bestHyperParameters.Length);
// initialize hyperparameters
hyperParameters[0] = ds.GetDoubleValues(targetVariable).Average(); // mean const
// Evaluate might be called concurrently therefore access to random has to be synchronized.
// However, results of multiple runs with the same seed will be different when using the parallel engine.
lock (syncRoot) {
for (int i = 0; i < covarianceFunction.GetNumberOfParameters(nVars); i++) {
hyperParameters[1 + i] = random.NextDouble() * 2.0 - 1.0;
}
}
hyperParameters[hyperParameters.Length - 1] = 1.0; // sē = exp(2), TODO: other inits better?
// use alglib.bfgs for hyper-parameter optimization ...
double epsg = 0;
double epsf = 0.00001;
double epsx = 0;
double stpmax = 1;
int maxits = ConstantOptIterations;
alglib.mincgstate state;
alglib.mincgreport rep;
alglib.mincgcreate(hyperParameters, out state);
alglib.mincgsetcond(state, epsg, epsf, epsx, maxits);
alglib.mincgsetstpmax(state, stpmax);
alglib.mincgoptimize(state, ObjectiveFunction, null, data);
alglib.mincgresults(state, out bestHyperParameters, out rep);
if (rep.terminationtype < 0) {
// error -> restore previous best quality
bestObjValue[0] = prevBest;
Array.Copy(prevBestHyperParameters, bestHyperParameters, prevBestHyperParameters.Length);
}
}
UpdateBestSoFar(bestObjValue[0], bestHyperParameters, meanFunction, covarianceFunction);
return bestObjValue[0];
}
// updates the overall best quality and overall best model for Analyze()
private void UpdateBestSoFar(double bestQ, double[] bestHyperParameters, IMeanFunction meanFunc, ICovarianceFunction covFunc) {
lock (problemStateLocker) {
if (bestQ > this.bestQ) {
this.bestQ = bestQ;
this.bestHyperParameters = new double[bestHyperParameters.Length];
Array.Copy(bestHyperParameters, this.bestHyperParameters, this.bestHyperParameters.Length);
this.meanFunc = meanFunc;
this.covFunc = covFunc;
}
}
}
public override void Analyze(ISymbolicExpressionTree[] trees, double[] qualities, ResultCollection results, IRandom random) {
if (!results.ContainsKey("Best Solution Quality")) {
results.Add(new Result("Best Solution Quality", typeof(DoubleValue)));
}
if (!results.ContainsKey("Best Tree")) {
results.Add(new Result("Best Tree", typeof(ISymbolicExpressionTree)));
}
if (!results.ContainsKey("Best Solution")) {
results.Add(new Result("Best Solution", typeof(GaussianProcessRegressionSolution)));
}
var bestQuality = qualities.Max();
if (results["Best Solution Quality"].Value == null || bestQuality > ((DoubleValue)results["Best Solution Quality"].Value).Value) {
var bestIdx = Array.IndexOf(qualities, bestQuality);
var bestClone = (ISymbolicExpressionTree)trees[bestIdx].Clone();
results["Best Tree"].Value = bestClone;
results["Best Solution Quality"].Value = new DoubleValue(bestQuality);
results["Best Solution"].Value = CreateSolution();
}
}
private IItem CreateSolution() {
var problemData = ProblemData;
var ds = problemData.Dataset;
var targetVariable = problemData.TargetVariable;
var allowedInputVariables = problemData.AllowedInputVariables.ToArray();
var trainingRows = problemData.TrainingIndices.ToArray();
lock (problemStateLocker) {
var model = new GaussianProcessModel(ds, targetVariable, allowedInputVariables, trainingRows, bestHyperParameters, (IMeanFunction)meanFunc.Clone(), (ICovarianceFunction)covFunc.Clone());
model.FixParameters();
return model.CreateRegressionSolution((IRegressionProblemData)ProblemData.Clone());
}
}
private void ObjectiveFunction(double[] x, ref double func, double[] grad, object obj) {
// we want to optimize the model likelihood by changing the hyperparameters and also return the gradient for each hyperparameter
var data = (Tuple)obj;
var ds = data.Item1;
var targetVariable = data.Item2;
var allowedInputVariables = data.Item3;
var trainingRows = data.Item4;
var meanFunction = data.Item5;
var covarianceFunction = data.Item6;
var bestObjValue = data.Item7;
var hyperParameters = x; // the decision variable vector
try {
var model = new GaussianProcessModel(ds, targetVariable, allowedInputVariables, trainingRows, hyperParameters, meanFunction, covarianceFunction);
func = model.NegativeLogLikelihood; // mincgoptimize, so we return negative likelihood
bestObjValue[0] = Math.Max(bestObjValue[0], -func); // problem itself is a maximization problem
var gradients = model.HyperparameterGradients;
Array.Copy(gradients, grad, gradients.Length);
}
catch (ArgumentException) {
// building the GaussianProcessModel might fail, in this case we return the worst possible objective value
func = 1.0E+300;
Array.Clear(grad, 0, grad.Length);
}
}
private ICovarianceFunction TreeToCovarianceFunction(ISymbolicExpressionTree tree) {
return TreeToCovarianceFunction(tree.Root.GetSubtree(0).GetSubtree(0)); // skip programroot and startsymbol
}
private ICovarianceFunction TreeToCovarianceFunction(ISymbolicExpressionTreeNode node) {
switch (node.Symbol.Name) {
case "Sum": {
var sum = new CovarianceSum();
sum.Terms.Add(TreeToCovarianceFunction(node.GetSubtree(0)));
sum.Terms.Add(TreeToCovarianceFunction(node.GetSubtree(1)));
return sum;
}
case "Product": {
var prod = new CovarianceProduct();
prod.Factors.Add(TreeToCovarianceFunction(node.GetSubtree(0)));
prod.Factors.Add(TreeToCovarianceFunction(node.GetSubtree(1)));
return prod;
}
// covFunction is cloned by the model so we can reuse instances of terminal covariance functions
case "Linear": return linear;
case "LinearArd": return linearArd;
case "MaternIso1": return maternIso1;
case "MaternIso3": return maternIso3;
case "MaternIso5": return maternIso5;
case "NeuralNetwork": return neuralNetwork;
case "Periodic": return periodic;
case "PiecewisePolynomial0": return piecewisePoly0;
case "PiecewisePolynomial1": return piecewisePoly1;
case "PiecewisePolynomial2": return piecewisePoly2;
case "PiecewisePolynomial3": return piecewisePoly3;
case "Polynomial2": return poly2;
case "Polynomial3": return poly3;
case "RationalQuadraticArd": return ratQuadraticArd;
case "RationalQuadraticIso": return ratQuadraticIso;
case "SpectralMixture1": return spectralMixture1;
case "SpectralMixture3": return spectralMixture3;
case "SpectralMixture5": return spectralMixture5;
case "SquaredExponentialArd": return sqrExpArd;
case "SquaredExponentialIso": return sqrExpIso;
default: throw new InvalidProgramException(string.Format("Found invalid symbol {0}", node.Symbol.Name));
}
}
// persistence
[StorableConstructor]
private GaussianProcessCovarianceOptimizationProblem(StorableConstructorFlag _) : base(_) { }
[StorableHook(HookType.AfterDeserialization)]
private void AfterDeserialization() {
}
// cloning
private GaussianProcessCovarianceOptimizationProblem(GaussianProcessCovarianceOptimizationProblem original, Cloner cloner)
: base(original, cloner) {
bestQ = original.bestQ;
meanFunc = cloner.Clone(original.meanFunc);
covFunc = cloner.Clone(original.covFunc);
if (bestHyperParameters != null) {
bestHyperParameters = new double[original.bestHyperParameters.Length];
Array.Copy(original.bestHyperParameters, bestHyperParameters, bestHyperParameters.Length);
}
}
public override IDeepCloneable Clone(Cloner cloner) {
return new GaussianProcessCovarianceOptimizationProblem(this, cloner);
}
public void Load(IRegressionProblemData data) {
this.ProblemData = data;
OnProblemDataChanged();
}
public IRegressionProblemData Export() {
return ProblemData;
}
#region events
public event EventHandler ProblemDataChanged;
private void OnProblemDataChanged() {
var handler = ProblemDataChanged;
if (handler != null)
handler(this, EventArgs.Empty);
}
#endregion
}
}