1 | #region License Information
|
---|
2 | /* HeuristicLab
|
---|
3 | * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
4 | *
|
---|
5 | * This file is part of HeuristicLab.
|
---|
6 | *
|
---|
7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
8 | * it under the terms of the GNU General Public License as published by
|
---|
9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
10 | * (at your option) any later version.
|
---|
11 | *
|
---|
12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
15 | * GNU General Public License for more details.
|
---|
16 | *
|
---|
17 | * You should have received a copy of the GNU General Public License
|
---|
18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
19 | */
|
---|
20 | #endregion
|
---|
21 |
|
---|
22 | using System;
|
---|
23 | using System.Collections.Generic;
|
---|
24 | using HeuristicLab.Common;
|
---|
25 | using HeuristicLab.Core;
|
---|
26 | using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
|
---|
27 | using HEAL.Attic;
|
---|
28 | using HeuristicLab.Data;
|
---|
29 | using System.Linq;
|
---|
30 |
|
---|
31 | namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
|
---|
32 | /// <summary>
|
---|
33 | /// Represents a symbolic regression model
|
---|
34 | /// </summary>
|
---|
35 | [StorableType("2739C33E-4DDB-4285-9DFB-C056D900B2F2")]
|
---|
36 | [Item(Name = "Symbolic Regression Model", Description = "Represents a symbolic regression model.")]
|
---|
37 | public class SymbolicRegressionModel : SymbolicDataAnalysisModel, ISymbolicRegressionModel {
|
---|
38 | [Storable]
|
---|
39 | private string targetVariable;
|
---|
40 | public string TargetVariable {
|
---|
41 | get { return targetVariable; }
|
---|
42 | set {
|
---|
43 | if (string.IsNullOrEmpty(value) || targetVariable == value) return;
|
---|
44 | targetVariable = value;
|
---|
45 | OnTargetVariableChanged(this, EventArgs.Empty);
|
---|
46 | }
|
---|
47 | }
|
---|
48 |
|
---|
49 | [Storable]
|
---|
50 | private readonly double[,] parameterCovariance;
|
---|
51 | [Storable]
|
---|
52 | private readonly double sigma;
|
---|
53 |
|
---|
54 | [StorableConstructor]
|
---|
55 | protected SymbolicRegressionModel(StorableConstructorFlag _) : base(_) {
|
---|
56 | targetVariable = string.Empty;
|
---|
57 | }
|
---|
58 |
|
---|
59 | protected SymbolicRegressionModel(SymbolicRegressionModel original, Cloner cloner)
|
---|
60 | : base(original, cloner) {
|
---|
61 | this.targetVariable = original.targetVariable;
|
---|
62 | this.parameterCovariance = original.parameterCovariance; // immutable
|
---|
63 | this.sigma = original.sigma;
|
---|
64 | }
|
---|
65 |
|
---|
66 | public SymbolicRegressionModel(string targetVariable, ISymbolicExpressionTree tree,
|
---|
67 | ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
|
---|
68 | double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue, double[,] parameterCovariance = null, double sigma = 0.0)
|
---|
69 | : base(tree, interpreter, lowerEstimationLimit, upperEstimationLimit) {
|
---|
70 | this.targetVariable = targetVariable;
|
---|
71 | if (parameterCovariance != null)
|
---|
72 | this.parameterCovariance = (double[,])parameterCovariance.Clone();
|
---|
73 | this.sigma = sigma;
|
---|
74 | }
|
---|
75 |
|
---|
76 | public override IDeepCloneable Clone(Cloner cloner) {
|
---|
77 | return new SymbolicRegressionModel(this, cloner);
|
---|
78 | }
|
---|
79 |
|
---|
80 | public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
|
---|
81 | return Interpreter.GetSymbolicExpressionTreeValues(SymbolicExpressionTree, dataset, rows)
|
---|
82 | .LimitToRange(LowerEstimationLimit, UpperEstimationLimit);
|
---|
83 | }
|
---|
84 |
|
---|
85 | public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
|
---|
86 | // must work with a copy because we change tree nodes
|
---|
87 | var treeCopy = (ISymbolicExpressionTree)SymbolicExpressionTree.Clone();
|
---|
88 | // uses sampling to produce prediction intervals
|
---|
89 | alglib.hqrndseed(31415, 926535, out var state);
|
---|
90 | var cov = parameterCovariance;
|
---|
91 | if (cov == null || cov.Length == 0) return rows.Select(_ => 0.0);
|
---|
92 | var n = 30;
|
---|
93 | var M = rows.Select(_ => new double[n]).ToArray();
|
---|
94 | var paramNodes = new List<ISymbolicExpressionTreeNode>();
|
---|
95 | var coeffList = new List<double>();
|
---|
96 | // HACK: skip linear scaling parameters because the analyzer doesn't use them (and they are likely correlated with the remaining parameters)
|
---|
97 | // only works with linear scaling
|
---|
98 | if (!(treeCopy.Root.GetSubtree(0).GetSubtree(0).Symbol is Addition) ||
|
---|
99 | !(treeCopy.Root.GetSubtree(0).GetSubtree(0).GetSubtree(0).Symbol is Multiplication))
|
---|
100 | throw new NotImplementedException("prediction intervals are implemented only for linear scaling");
|
---|
101 |
|
---|
102 | foreach (var node in treeCopy.Root.GetSubtree(0).GetSubtree(0).IterateNodesPostfix()) {
|
---|
103 | if (node is ConstantTreeNode constNode) {
|
---|
104 | paramNodes.Add(constNode);
|
---|
105 | coeffList.Add(constNode.Value);
|
---|
106 | } else if (node is VariableTreeNode varNode) {
|
---|
107 | paramNodes.Add(varNode);
|
---|
108 | coeffList.Add(varNode.Weight);
|
---|
109 | }
|
---|
110 | }
|
---|
111 | var coeff = coeffList.ToArray();
|
---|
112 | var numParams = coeff.Length;
|
---|
113 | if (cov.GetLength(0) != numParams) throw new InvalidProgramException();
|
---|
114 |
|
---|
115 | // TODO: probably we do not need to sample but can instead use a first-order or second-order approximation of f
|
---|
116 | // see http://sia.webpopix.org/nonlinearRegression.html
|
---|
117 | // also see https://rmazing.wordpress.com/2013/08/26/predictnls-part-2-taylor-approximation-confidence-intervals-for-nls-models/
|
---|
118 | // https://www.rdocumentation.org/packages/propagate/versions/1.0-4/topics/predictNLS
|
---|
119 | double[] p = new double[numParams];
|
---|
120 | for (int i = 0; i < 30; i++) {
|
---|
121 | // sample and update parameter vector delta is
|
---|
122 | alglib.hqrndnormalv(state, numParams, out var delta);
|
---|
123 | alglib.rmatrixmv(numParams, numParams, cov, 0, 0, 0, delta, 0, ref p, 0);
|
---|
124 | for (int j = 0; j < numParams; j++) {
|
---|
125 | if (paramNodes[j] is ConstantTreeNode constNode) constNode.Value = coeff[j] + p[j];
|
---|
126 | else if (paramNodes[j] is VariableTreeNode varNode) varNode.Weight = coeff[j] + p[j];
|
---|
127 | }
|
---|
128 | var r = 0;
|
---|
129 | var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(treeCopy, dataset, rows).LimitToRange(LowerEstimationLimit, UpperEstimationLimit);
|
---|
130 |
|
---|
131 | foreach (var pred in estimatedValues) {
|
---|
132 | M[r++][i] = pred;
|
---|
133 | }
|
---|
134 | }
|
---|
135 |
|
---|
136 | // reset parameters
|
---|
137 | for (int j = 0; j < numParams; j++) {
|
---|
138 | if (paramNodes[j] is ConstantTreeNode constNode) constNode.Value = coeff[j];
|
---|
139 | else if (paramNodes[j] is VariableTreeNode varNode) varNode.Weight = coeff[j];
|
---|
140 | }
|
---|
141 | var sigma2 = sigma * sigma;
|
---|
142 | return M.Select(M_i => M_i.Variance() + sigma2).ToArray();
|
---|
143 | }
|
---|
144 |
|
---|
145 | public ISymbolicRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
|
---|
146 | return new SymbolicRegressionSolution(this, new RegressionProblemData(problemData));
|
---|
147 | }
|
---|
148 | IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {
|
---|
149 | return CreateRegressionSolution(problemData);
|
---|
150 | }
|
---|
151 |
|
---|
152 | public void Scale(IRegressionProblemData problemData) {
|
---|
153 | Scale(problemData, problemData.TargetVariable);
|
---|
154 | }
|
---|
155 |
|
---|
156 | public virtual bool IsProblemDataCompatible(IRegressionProblemData problemData, out string errorMessage) {
|
---|
157 | return RegressionModel.IsProblemDataCompatible(this, problemData, out errorMessage);
|
---|
158 | }
|
---|
159 |
|
---|
160 | public override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) {
|
---|
161 | if (problemData == null) throw new ArgumentNullException("problemData", "The provided problemData is null.");
|
---|
162 | var regressionProblemData = problemData as IRegressionProblemData;
|
---|
163 | if (regressionProblemData == null)
|
---|
164 | throw new ArgumentException("The problem data is not compatible with this symbolic regression model. Instead a " + problemData.GetType().GetPrettyName() + " was provided.", "problemData");
|
---|
165 | return IsProblemDataCompatible(regressionProblemData, out errorMessage);
|
---|
166 | }
|
---|
167 |
|
---|
168 | #region events
|
---|
169 | public event EventHandler TargetVariableChanged;
|
---|
170 | private void OnTargetVariableChanged(object sender, EventArgs args) {
|
---|
171 | var changed = TargetVariableChanged;
|
---|
172 | if (changed != null)
|
---|
173 | changed(sender, args);
|
---|
174 | }
|
---|
175 | #endregion
|
---|
176 | }
|
---|
177 | }
|
---|