1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Diagnostics;
|
---|
4 | using System.Linq;
|
---|
5 | using System.Runtime.InteropServices;
|
---|
6 | using System.Text;
|
---|
7 | using System.Threading.Tasks;
|
---|
8 | using HeuristicLab.Common;
|
---|
9 | using HeuristicLab.Core;
|
---|
10 | using HeuristicLab.Problems.DataAnalysis;
|
---|
11 |
|
---|
12 | namespace HeuristicLab.Algorithms.DataAnalysis.Experimental {
|
---|
13 | public static class SBART {
|
---|
14 | /*
|
---|
15 | # A Cubic B-spline Smoothing routine.
|
---|
16 |
|
---|
17 | #
|
---|
18 | # The algorithm minimises:
|
---|
19 | #
|
---|
20 | # (1/n) * sum ws(i)**2 * (ys(i)-sz(i))**2 + lambda* int ( sz"(xs) )**2 dxs
|
---|
21 | #
|
---|
22 | # lambda is a function of the spar which is assumed to be between
|
---|
23 | # 0 and 1
|
---|
24 |
|
---|
25 |
|
---|
26 | # Input
|
---|
27 |
|
---|
28 | # n number of data points
|
---|
29 | # ys(n) vector of length n containing the observations
|
---|
30 | # ws(n) vector containing the weights given to each data point
|
---|
31 | # xs(n) vector containing the ordinates of the observations
|
---|
32 |
|
---|
33 |
|
---|
34 | # nk number of b-spline coefficients to be estimated
|
---|
35 | # nk <= n+2
|
---|
36 | # knot(nk+4) vector of knot points defining the cubic b-spline basis.
|
---|
37 |
|
---|
38 |
|
---|
39 | # spar penalised likelihood smoothing parameter
|
---|
40 | # ispar indicator saying if spar is supplied or to be estimated
|
---|
41 | # lspar, uspar lower and upper values for spar 0.,1. are good values
|
---|
42 | # tol used in Golden Search routine
|
---|
43 |
|
---|
44 | # isetup setup indicator
|
---|
45 |
|
---|
46 | # icrit indicator saying which cross validation score
|
---|
47 | # is to be computed
|
---|
48 |
|
---|
49 | # ld4 the leading dimension of abd (ie ld4=4)
|
---|
50 | # ldnk the leading dimension of p2ip (not referenced)
|
---|
51 |
|
---|
52 |
|
---|
53 | # Output
|
---|
54 |
|
---|
55 | # coef(nk) vector of spline coefficients
|
---|
56 | # sz(n) vector of smoothed z-values
|
---|
57 | # lev(n) vector of leverages
|
---|
58 | # crit either ordinary of generalized CV score
|
---|
59 | # ier error indicator
|
---|
60 | # ier = 0 ___ everything fine
|
---|
61 | # ier = 1 ___ spar too small or too big
|
---|
62 | # problem in cholesky decomposition
|
---|
63 |
|
---|
64 |
|
---|
65 |
|
---|
66 | # Working arrays/matrix
|
---|
67 | # xwy X'Wy
|
---|
68 | # hs0,hs1,hs2,hs3 the diagonals of the X'WX matrix
|
---|
69 | # sg0,sg1,sg2,sg3 the diagonals of the Gram matrix
|
---|
70 | # abd(ld4,nk) [ X'WX+lambda*SIGMA] in diagonal form
|
---|
71 | # p1ip(ld4,nk) inner products between columns of L inverse
|
---|
72 | # p2ip(ldnk,nk) all inner products between columns of L inverse
|
---|
73 | # L'L = [X'WX+lambdaSIGMA] NOT REFERENCED
|
---|
74 |
|
---|
75 | */
|
---|
76 |
|
---|
77 | /*
|
---|
78 | * sbart(xs,ys,ws,n,knot,nk,
|
---|
79 | coef,sz,lev,
|
---|
80 | crit,icrit,spar,ispar,lspar,uspar,tol,
|
---|
81 | isetup,
|
---|
82 | xwy,
|
---|
83 | hs0,hs1,hs2,hs3,
|
---|
84 | sg0,sg1,sg2,sg3,
|
---|
85 | abd,p1ip,p2ip,ld4,ldnk,ier)
|
---|
86 |
|
---|
87 | */
|
---|
88 |
|
---|
89 |
|
---|
90 |
|
---|
91 | // To build the fortran library (x64) use:
|
---|
92 | // > ifort /dll /Qm64 sbart.f /Fesbart_x64.dll
|
---|
93 | // check dumpbin /EXPORTS sbart_x64.dll
|
---|
94 | // and dumpbin /IMPORTS sbart_x64.dll
|
---|
95 | [DllImport("sbart_x64.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "sbart")]
|
---|
96 | public static extern void sbart_x64(
|
---|
97 | float[] xs, float[] ys, float[] ws, ref int n, float[] knot, ref int nk,
|
---|
98 | float[] coeff, float[] sz, float[] lev,
|
---|
99 | ref float crit, ref int icrit, ref float spar, ref int ispar, ref float lspar, ref float uspar, ref float tol,
|
---|
100 | ref int isetup,
|
---|
101 | float[] xwy,
|
---|
102 | float[] hs0, float[] hs1, float[] hs2, float[] hs3,
|
---|
103 | float[] sg0, float[] sg1, float[] sg2, float[] sg3,
|
---|
104 | float[,] abd, float[,] p1ip, float[,] p2ip, ref int ld4, ref int ldnk, ref int ier);
|
---|
105 |
|
---|
106 | [DllImport("sbart_x86.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "sbart")]
|
---|
107 | public static extern void sbart_x86();
|
---|
108 |
|
---|
109 |
|
---|
110 | [DllImport("sbart_x64.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "sknotl")]
|
---|
111 | public static extern void sknotl_x64(float[] x, ref int n, float[] knot, ref int k);
|
---|
112 | [DllImport("sbart_x64.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "setreg")]
|
---|
113 | public static extern void setreg_x64(float[] x, float[] y, float[] w, ref int n, float[] xw, ref int nx, ref float min, ref float range, float[] knot, ref int nk);
|
---|
114 |
|
---|
115 | public class SBART_Report {
|
---|
116 | public double smoothingParameter;
|
---|
117 | public double gcv;
|
---|
118 | public double[] leverage;
|
---|
119 | }
|
---|
120 |
|
---|
121 | public static IRegressionModel CalculateSBART(double[] x, double[] y,
|
---|
122 | string targetVariable, string[] inputVars, float smoothingParameter,
|
---|
123 | out SBART_Report report) {
|
---|
124 | var w = Enumerable.Repeat(1.0, x.Length).ToArray();
|
---|
125 |
|
---|
126 | int n = x.Length;
|
---|
127 | float[] leverage = new float[n];
|
---|
128 | float[] y_smoothed = new float[n];
|
---|
129 | int ic = n - 1;
|
---|
130 | int ier = -99;
|
---|
131 | int nk = n;
|
---|
132 | float[] knots = new float[nk + 6];
|
---|
133 |
|
---|
134 | float crit = -99.0f;
|
---|
135 | int icrit = 0; // 0..don't calc CV, 1 .. GCV, 2 CV
|
---|
136 |
|
---|
137 | // float smoothingParameter = -99.0f;
|
---|
138 | int smoothingParameterIndicator = 1;
|
---|
139 | float lowerSmoothingParameter = 0f;
|
---|
140 | float upperSmoothingParameter = 1.0f;
|
---|
141 | float tol = 0.01f;
|
---|
142 | int isetup = 0; // not setup?
|
---|
143 |
|
---|
144 | float min = -99.0f;
|
---|
145 | float range = -99.0f;
|
---|
146 |
|
---|
147 | if (Environment.Is64BitProcess) {
|
---|
148 | float[] xw = new float[n];
|
---|
149 | int nx = -99;
|
---|
150 | float[] xs = x.Select(xi => (float)xi).ToArray();
|
---|
151 | float[] ys = y.Select(yi => (float)yi).ToArray();
|
---|
152 | float[] ws = w.Select(wi => (float)wi).ToArray();
|
---|
153 | SBART.setreg_x64(xs, ys, ws,
|
---|
154 | ref n, xw, ref nx, ref min, ref range, knots, ref nk);
|
---|
155 | if(nx < 4) {
|
---|
156 | report = new SBART_Report();
|
---|
157 | report.leverage = new double[0];
|
---|
158 | return new ConstantModel(ys.Take(nx).Average(), targetVariable);
|
---|
159 | }
|
---|
160 |
|
---|
161 | float[] coeff = new float[nk];
|
---|
162 |
|
---|
163 | // working arrays for sbart
|
---|
164 | float[] xwy = new float[nk];
|
---|
165 | float[] hs0 = new float[nk];
|
---|
166 | float[] hs1 = new float[nk];
|
---|
167 | float[] hs2 = new float[nk];
|
---|
168 | float[] hs3 = new float[nk];
|
---|
169 | float[] sg0 = new float[nk];
|
---|
170 | float[] sg1 = new float[nk];
|
---|
171 | float[] sg2 = new float[nk];
|
---|
172 | float[] sg3 = new float[nk];
|
---|
173 | int ld4 = 4;
|
---|
174 | float[,] adb = new float[nk, ld4];
|
---|
175 |
|
---|
176 | float[,] p1ip = new float[nk, ld4];
|
---|
177 | int ldnk = nk + 4;
|
---|
178 | float[,] p2ip = new float[nk, ldnk];
|
---|
179 |
|
---|
180 | SBART.sbart_x64(xs, ys, ws, ref nx, knots, ref nk, coeff, y_smoothed, leverage, ref crit, ref icrit,
|
---|
181 | ref smoothingParameter, ref smoothingParameterIndicator, ref lowerSmoothingParameter, ref upperSmoothingParameter,
|
---|
182 | ref tol, ref isetup,
|
---|
183 | xwy, hs0, hs1, hs2, hs3, sg0, sg1, sg2, sg3, adb, p1ip, p2ip, ref ld4, ref ldnk, ref ier);
|
---|
184 |
|
---|
185 | if (ier > 0) throw new ArgumentException(ier.ToString());
|
---|
186 |
|
---|
187 | report = new SBART_Report();
|
---|
188 | report.gcv = crit;
|
---|
189 | report.smoothingParameter = smoothingParameter;
|
---|
190 | report.leverage = leverage.Select(li => (double)li).ToArray();
|
---|
191 |
|
---|
192 | return new BartRegressionModel(xs.Take(nx).ToArray(), y_smoothed.Take(nx).ToArray(), targetVariable, inputVars, min, range);
|
---|
193 |
|
---|
194 | } else {
|
---|
195 | throw new NotSupportedException();
|
---|
196 | }
|
---|
197 |
|
---|
198 | }
|
---|
199 |
|
---|
200 | public class BartRegressionModel : NamedItem, IRegressionModel {
|
---|
201 | private float[] x;
|
---|
202 | private float[] y;
|
---|
203 | private double min;
|
---|
204 | private double range;
|
---|
205 | public string TargetVariable { get; set; }
|
---|
206 |
|
---|
207 | private string[] variablesUsedForPrediction;
|
---|
208 | public IEnumerable<string> VariablesUsedForPrediction {
|
---|
209 | get {
|
---|
210 | return variablesUsedForPrediction;
|
---|
211 | }
|
---|
212 | }
|
---|
213 |
|
---|
214 | public BartRegressionModel(BartRegressionModel orig, Cloner cloner) {
|
---|
215 | this.x = orig.x;
|
---|
216 | this.y = orig.y;
|
---|
217 | this.min = orig.min;
|
---|
218 | this.range = orig.range;
|
---|
219 | this.TargetVariable = orig.TargetVariable;
|
---|
220 | this.variablesUsedForPrediction = orig.variablesUsedForPrediction;
|
---|
221 | }
|
---|
222 | public BartRegressionModel(float[] x, float[] y_smoothed, string targetVariable, string[] inputVars, double min, double range) {
|
---|
223 | this.variablesUsedForPrediction = inputVars;
|
---|
224 | this.TargetVariable = targetVariable;
|
---|
225 | this.x = x;
|
---|
226 | this.y = y_smoothed;
|
---|
227 | this.range = range;
|
---|
228 | this.min = min;
|
---|
229 | }
|
---|
230 |
|
---|
231 | public event EventHandler TargetVariableChanged;
|
---|
232 |
|
---|
233 | public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
|
---|
234 | return new RegressionSolution(this, (IRegressionProblemData) problemData.Clone());
|
---|
235 | }
|
---|
236 |
|
---|
237 | public double GetEstimatedValue(double xx) {
|
---|
238 | xx = (xx - min) / range;
|
---|
239 | // piecewise linear approximation
|
---|
240 | int n = x.Length;
|
---|
241 | if (xx <= x[0]) {
|
---|
242 | double h = xx - x[0];
|
---|
243 | return h * (y[1] - y[0]) / (x[1] - x[0]) + y[0];
|
---|
244 | } else if (xx >= x[n-1]) {
|
---|
245 | double h = xx - x[n-1];
|
---|
246 | return h * (y[n-1] - y[n-2]) / (x[n-1] - x[n-2]) + y[n-1];
|
---|
247 | } else {
|
---|
248 | // binary search
|
---|
249 | int lower = 0;
|
---|
250 | int upper = n-1;
|
---|
251 | while (true) {
|
---|
252 | if (upper < lower) throw new InvalidProgramException();
|
---|
253 | int i = lower + (upper - lower) / 2;
|
---|
254 | if (x[i] <= xx && xx < x[i + 1]) {
|
---|
255 | double h = xx - x[i];
|
---|
256 | double k = (y[i + 1] - y[i]) / (x[i + 1] - x[i]);
|
---|
257 | return h * k + y[i];
|
---|
258 | } else if (xx < x[i]) {
|
---|
259 | upper = i - 1;
|
---|
260 | } else {
|
---|
261 | lower = i + 1;
|
---|
262 | }
|
---|
263 | }
|
---|
264 | }
|
---|
265 | return 0.0;
|
---|
266 | }
|
---|
267 |
|
---|
268 | public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
|
---|
269 | foreach(var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
|
---|
270 | yield return GetEstimatedValue(x);
|
---|
271 | }
|
---|
272 | }
|
---|
273 |
|
---|
274 | public override IDeepCloneable Clone(Cloner cloner) {
|
---|
275 | return new BartRegressionModel(this, cloner);
|
---|
276 | }
|
---|
277 | }
|
---|
278 |
|
---|
279 | }
|
---|
280 | }
|
---|