#region License Information
/* HeuristicLab
* Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
using HEAL.Attic;
using HeuristicLab.Data;
using System.Linq;
namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
///
/// Represents a symbolic regression model
///
[StorableType("2739C33E-4DDB-4285-9DFB-C056D900B2F2")]
[Item(Name = "Symbolic Regression Model", Description = "Represents a symbolic regression model.")]
public class SymbolicRegressionModel : SymbolicDataAnalysisModel, ISymbolicRegressionModel {
[Storable]
private string targetVariable;
public string TargetVariable {
get { return targetVariable; }
set {
if (string.IsNullOrEmpty(value) || targetVariable == value) return;
targetVariable = value;
OnTargetVariableChanged(this, EventArgs.Empty);
}
}
[Storable]
private readonly double[,] parameterCovariance;
[Storable]
private readonly double sigma;
[StorableConstructor]
protected SymbolicRegressionModel(StorableConstructorFlag _) : base(_) {
targetVariable = string.Empty;
}
protected SymbolicRegressionModel(SymbolicRegressionModel original, Cloner cloner)
: base(original, cloner) {
this.targetVariable = original.targetVariable;
this.parameterCovariance = original.parameterCovariance; // immutable
this.sigma = original.sigma;
}
public SymbolicRegressionModel(string targetVariable, ISymbolicExpressionTree tree,
ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue, double[,] parameterCovariance = null, double sigma = 0.0)
: base(tree, interpreter, lowerEstimationLimit, upperEstimationLimit) {
this.targetVariable = targetVariable;
if (parameterCovariance != null)
this.parameterCovariance = (double[,])parameterCovariance.Clone();
this.sigma = sigma;
}
public override IDeepCloneable Clone(Cloner cloner) {
return new SymbolicRegressionModel(this, cloner);
}
public IEnumerable GetEstimatedValues(IDataset dataset, IEnumerable rows) {
return Interpreter.GetSymbolicExpressionTreeValues(SymbolicExpressionTree, dataset, rows)
.LimitToRange(LowerEstimationLimit, UpperEstimationLimit);
}
public IEnumerable GetEstimatedVariances(IDataset dataset, IEnumerable rows) {
// must work with a copy because we change tree nodes
var treeCopy = (ISymbolicExpressionTree)SymbolicExpressionTree.Clone();
// uses sampling to produce prediction intervals
alglib.hqrndseed(31415, 926535, out var state);
var cov = parameterCovariance;
if (cov == null || cov.Length == 0) return rows.Select(_ => 0.0);
var n = 30;
var M = rows.Select(_ => new double[n]).ToArray();
var paramNodes = new List();
var coeffList = new List();
// HACK: skip linear scaling parameters because the analyzer doesn't use them (and they are likely correlated with the remaining parameters)
// only works with linear scaling
if (!(treeCopy.Root.GetSubtree(0).GetSubtree(0).Symbol is Addition) ||
!(treeCopy.Root.GetSubtree(0).GetSubtree(0).GetSubtree(0).Symbol is Multiplication))
throw new NotImplementedException("prediction intervals are implemented only for linear scaling");
foreach (var node in treeCopy.Root.GetSubtree(0).GetSubtree(0).IterateNodesPostfix()) {
if (node is ConstantTreeNode constNode) {
paramNodes.Add(constNode);
coeffList.Add(constNode.Value);
} else if (node is VariableTreeNode varNode) {
paramNodes.Add(varNode);
coeffList.Add(varNode.Weight);
}
}
var coeff = coeffList.ToArray();
var numParams = coeff.Length;
if (cov.GetLength(0) != numParams) throw new InvalidProgramException();
// TODO: probably we do not need to sample but can instead use a first-order or second-order approximation of f
// see http://sia.webpopix.org/nonlinearRegression.html
// also see https://rmazing.wordpress.com/2013/08/26/predictnls-part-2-taylor-approximation-confidence-intervals-for-nls-models/
// https://www.rdocumentation.org/packages/propagate/versions/1.0-4/topics/predictNLS
double[] p = new double[numParams];
for (int i = 0; i < 30; i++) {
// sample and update parameter vector delta is
alglib.hqrndnormalv(state, numParams, out var delta);
alglib.rmatrixmv(numParams, numParams, cov, 0, 0, 0, delta, 0, ref p, 0);
for (int j = 0; j < numParams; j++) {
if (paramNodes[j] is ConstantTreeNode constNode) constNode.Value = coeff[j] + p[j];
else if (paramNodes[j] is VariableTreeNode varNode) varNode.Weight = coeff[j] + p[j];
}
var r = 0;
var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(treeCopy, dataset, rows).LimitToRange(LowerEstimationLimit, UpperEstimationLimit);
foreach (var pred in estimatedValues) {
M[r++][i] = pred;
}
}
// reset parameters
for (int j = 0; j < numParams; j++) {
if (paramNodes[j] is ConstantTreeNode constNode) constNode.Value = coeff[j];
else if (paramNodes[j] is VariableTreeNode varNode) varNode.Weight = coeff[j];
}
var sigma2 = sigma * sigma;
return M.Select(M_i => M_i.Variance() + sigma2).ToArray();
}
public ISymbolicRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
return new SymbolicRegressionSolution(this, new RegressionProblemData(problemData));
}
IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {
return CreateRegressionSolution(problemData);
}
public void Scale(IRegressionProblemData problemData) {
Scale(problemData, problemData.TargetVariable);
}
public virtual bool IsProblemDataCompatible(IRegressionProblemData problemData, out string errorMessage) {
return RegressionModel.IsProblemDataCompatible(this, problemData, out errorMessage);
}
public override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) {
if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
var regressionProblemData = problemData as IRegressionProblemData;
if (regressionProblemData == null)
throw new ArgumentException("The problem data is not compatible with this symbolic regression model. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
return IsProblemDataCompatible(regressionProblemData, out errorMessage);
}
#region events
public event EventHandler TargetVariableChanged;
private void OnTargetVariableChanged(object sender, EventArgs args) {
var changed = TargetVariableChanged;
if (changed != null)
changed(sender, args);
}
#endregion
}
}