source: branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/SBART.cs @ 15468

Last change on this file since 15468 was 15468, checked in by gkronber, 3 years ago

#2789 worked on sbart spline

File size: 19.4 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Runtime.InteropServices;
6using System.Text;
7using System.Threading.Tasks;
8using HeuristicLab.Common;
9using HeuristicLab.Core;
10using HeuristicLab.Problems.DataAnalysis;
11
12namespace HeuristicLab.Algorithms.DataAnalysis.Experimental {
13  public static class SBART {
14    /*
15           # A Cubic B-spline Smoothing routine.
16
17    #
18    #          The algorithm minimises:
19    #
20    #      (1/n) * sum ws(i)**2 * (ys(i)-sz(i))**2 + lambda* int ( sz"(xs) )**2 dxs
21    #
22    #        lambda is a function of the spar which is assumed to be between
23    #        0 and 1
24
25
26            # Input
27
28    #   n               number of data points
29    #  ys(n)      vector of length n containing the observations
30    #  ws(n)            vector containing the weights given to each data point
31    #  xs(n)            vector containing the ordinates of the observations
32
33
34    #  nk               number of b-spline coefficients to be estimated
35    #                   nk <= n+2
36    #  knot(nk+4)       vector of knot points defining the cubic b-spline basis.
37
38
39    #  spar             penalised likelihood smoothing parameter
40    #  ispar            indicator saying if spar is supplied or to be estimated
41    #  lspar, uspar     lower and upper values for spar 0.,1. are good values
42    #  tol              used in Golden Search routine
43
44    #  isetup           setup indicator
45
46    #  icrit            indicator saying which cross validation score
47    #       is to be computed
48
49    #  ld4              the leading dimension of abd (ie ld4=4)
50    #  ldnk             the leading dimension of p2ip (not referenced)
51
52
53                    # Output
54
55    #   coef(nk)       vector of spline coefficients
56    #   sz(n)          vector of smoothed z-values
57    #   lev(n)         vector of leverages
58    #   crit           either ordinary of generalized CV score
59    #   ier            error indicator
60    #                  ier = 0 ___  everything fine
61    #                  ier = 1 ___  spar too small or too big
62    #                               problem in cholesky decomposition
63
64
65
66             # Working arrays/matrix
67    #   xwy         X'Wy
68    #   hs0,hs1,hs2,hs3   the diagonals of the X'WX matrix
69    #   sg0,sg1,sg2,sg3   the diagonals of the Gram matrix
70    #   abd(ld4,nk)       [ X'WX+lambda*SIGMA] in diagonal form
71    #   p1ip(ld4,nk)       inner products between columns of L inverse
72    #   p2ip(ldnk,nk)      all inner products between columns of L inverse
73    #                          L'L = [X'WX+lambdaSIGMA]  NOT REFERENCED
74
75    */
76
77    /*
78     * sbart(xs,ys,ws,n,knot,nk,
79          coef,sz,lev,
80          crit,icrit,spar,ispar,lspar,uspar,tol,
81          isetup,
82          xwy,
83          hs0,hs1,hs2,hs3,
84          sg0,sg1,sg2,sg3,
85          abd,p1ip,p2ip,ld4,ldnk,ier)
86
87    */
88
89
90
91    // To build the fortran library (x64) use:
92    // > ifort /dll /Qm64 /libs:static /winapp sbart.f interv.f bsplvb.f spbfa.f spbsl.f bvalue.f scopy.f ssort.f sdot.f saxpy.f bsplvd.f /Fesbart_x64.dll
93    // check dumpbin /EXPORTS sbart_x64.dll
94    // and   dumpbin /IMPORTS sbart_x64.dll
95    [DllImport("sbart_x64.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "sbart")]
96    public static extern void sbart_x64(
97     float[] xs, float[] ys, float[] ws, ref int n, float[] knot, ref int nk,
98     float[] coeff, float[] sz, float[] lev,
99     ref float crit, ref int icrit, ref float spar, ref int ispar, ref float lspar, ref float uspar, ref float tol,
100     ref int isetup,
101     float[] xwy,
102     float[] hs0, float[] hs1, float[] hs2, float[] hs3,
103     float[] sg0, float[] sg1, float[] sg2, float[] sg3,
104     float[,] abd, float[,] p1ip, float[,] p2ip, ref int ld4, ref int ldnk, ref int ier);
105
106    [DllImport("sbart_x86.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "sbart")]
107    public static extern void sbart_x86();
108
109
110    [DllImport("sbart_x64.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "sknotl")]
111    public static extern void sknotl_x64(float[] x, ref int n, float[] knot, ref int k);
112    [DllImport("sbart_x64.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "setreg")]
113    public static extern void setreg_x64(float[] x, float[] y, float[] w, ref int n, float[] xw, ref int nx, ref float min, ref float range, float[] knot, ref int nk);
114    /*
115     * calculates value at  x  of  jderiv-th derivative of spline from b-repr.
116    c  the spline is taken to be continuous from the right, EXCEPT at the
117    c  rightmost knot, where it is taken to be continuous from the left.
118    c
119    c******  i n p u t ******
120    c  t, bcoef, n, k......forms the b-representation of the spline  f  to
121    c        be evaluated. specifically,
122    c  t.....knot sequence, of length  n+k, assumed nondecreasing.
123    c  bcoef.....b-coefficient sequence, of length  n .
124    c  n.....length of  bcoef  and dimension of spline(k,t),
125    c        a s s u m e d  positive .
126    c  k.....order of the spline .
127    c
128    c  w a r n i n g . . .   the restriction  k .le. kmax (=20)  is imposed
129    c        arbitrarily by the dimension statement for  aj, dl, dr  below,
130    c        but is  n o w h e r e  c h e c k e d  for.
131    c
132    c  x.....the point at which to evaluate .
133    c  jderiv.....integer giving the order of the derivative to be evaluated
134    c        a s s u m e d  to be zero or positive.
135    c
136    c******  o u t p u t  ******
137    c  bvalue.....the value of the (jderiv)-th derivative of  f  at  x .
138    c
139    c******  m e t h o d  ******
140    c     The nontrivial knot interval  (t(i),t(i+1))  containing  x  is lo-
141    c  cated with the aid of  interv . The  k  b-coeffs of  f  relevant for
142    c  this interval are then obtained from  bcoef (or taken to be zero if
143    c  not explicitly available) and are then differenced  jderiv  times to
144    c  obtain the b-coeffs of  (d**jderiv)f  relevant for that interval.
145    c  Precisely, with  j = jderiv, we have from x.(12) of the text that
146    c
147    c     (d**j)f  =  sum ( bcoef(.,j)*b(.,k-j,t) )
148    c
149    c  where
150    c                   / bcoef(.),                     ,  j .eq. 0
151    c                   /
152    c    bcoef(.,j)  =  / bcoef(.,j-1) - bcoef(.-1,j-1)
153    c                   / ----------------------------- ,  j .gt. 0
154    c                   /    (t(.+k-j) - t(.))/(k-j)
155    c
156    c     Then, we use repeatedly the fact that
157    c
158    c    sum ( a(.)*b(.,m,t)(x) )  =  sum ( a(.,x)*b(.,m-1,t)(x) )
159    c  with
160    c                 (x - t(.))*a(.) + (t(.+m-1) - x)*a(.-1)
161    c    a(.,x)  =    ---------------------------------------
162    c                 (x - t(.))      + (t(.+m-1) - x)
163    c
164    c  to write  (d**j)f(x)  eventually as a linear combination of b-splines
165    c  of order  1 , and the coefficient for  b(i,1,t)(x)  must then be the
166    c  desired number  (d**j)f(x). (see x.(17)-(19) of text).
167     */
168    [DllImport("sbart_x64.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "bvalue")]
169    public static extern float bvalue(float[] t, float[] bcoeff, ref int n, ref int k, ref float x, ref int jderiv);
170
171    public class SBART_Report {
172      public double smoothingParameter;
173      public double gcv;
174      public double[] leverage;     
175    }
176
177
178    public static IRegressionModel CalculateSBART(double[] x, double[] y, double[] w, int nKnots, string targetVariable, string[] inputVars, out SBART_Report rep) {
179      // use kMeans to find knot points
180      double[,] xy = new double[x.Length, 1];
181      for (int i = 0; i < x.Length; i++) xy[i, 0] = x[i];
182      double[,] c;
183      int[] xyc;
184      int info;
185      alglib.kmeansgenerate(xy, x.Length, 1, nKnots, 10, out info, out c, out xyc);
186      double[] knots = new double[nKnots];
187      for (int i = 0; i < knots.Length; i++) knots[i] = c[0, i];
188      return CalculateSBART(x, y, w, knots, targetVariable, inputVars, out rep);
189    }
190
191    public static IRegressionModel CalculateSBART(double[] x, double[] y, double[] w, double[] knots, string targetVariable, string[] inputVars, out SBART_Report rep) {
192      float[] xs = x.Select(xi=>(float)xi).ToArray();
193      float[] ys = y.Select(xi => (float)xi).ToArray();
194      float[] ws = w.Select(xi => (float)xi).ToArray();
195      float[] k = knots.Select(xi => (float)xi).ToArray();
196
197      int n = xs.Length;
198      if (n < 4) throw new ArgumentException("n < 4");
199      if (knots.Length > n + 2) throw new ArgumentException("more than n+2 knots");
200      float[] xw = new float[n];
201      int nx = -99;
202      float min = 0.0f;
203      float range = 0.0f;
204      int nk = -99;
205      float[] regKnots = new float[n + 6];
206
207      // sort xs together with ys and ws
208      // combine rows with duplicate x values
209      // transform x to range [0 .. 1]
210      // create a set of knots (using a heuristic for the number of knots)
211      // knots are located at data points. denser regions of x contain more knots.
212      SBART.setreg_x64(xs, ys, ws,
213        ref n, xw, ref nx, ref min, ref range, regKnots, ref nk);
214
215      // in this case we want to use the knots supplied by the caller.
216      // the knot values produced by setreg are overwritten with scaled knots supplied by caller.
217      // knots must be ordered as well.
218      int i = 0;
219      // left boundary
220      regKnots[i++] = 0.0f;
221      regKnots[i++] = 0.0f;
222      regKnots[i++] = 0.0f;
223      regKnots[i++] = 0.0f;
224      foreach (var knot in knots.OrderBy(ki=>ki)) {
225        regKnots[i++] = ((float)knot - min) / range;
226      }
227      // right boundary
228      regKnots[i++] = 1.0f;
229      regKnots[i++] = 1.0f;
230      regKnots[i++] = 1.0f;
231      regKnots[i++] = 1.0f;
232      nk = knots.Length + 4;
233
234      float criterion = -99.0f; // GCV
235      int icrit = 1; // calculate GCV
236      float smoothingParameter = -99.0f;
237      int smoothingParameterIndicator = 0;
238      float lowerSmoothingParameter = 0.5f;
239      float upperSmoothingParameter = 1.0f;
240      float tol = 0.01f;
241      int isetup = 0; // not setup?
242
243      // results
244      float[] coeff = new float[nk];
245      float[] leverage = new float[nx];
246      float[] y_smoothed = new float[nx];
247      int ier = -99;
248
249
250      // working arrays for sbart
251      float[] xwy = new float[nk];
252      float[] hs0 = new float[nk];
253      float[] hs1 = new float[nk];
254      float[] hs2 = new float[nk];
255      float[] hs3 = new float[nk];
256      float[] sg0 = new float[nk];
257      float[] sg1 = new float[nk];
258      float[] sg2 = new float[nk];
259      float[] sg3 = new float[nk];
260      int ld4 = 4;
261      float[,] adb = new float[ld4, nk];
262
263      float[,] p1ip = new float[nk, ld4];
264      int ldnk = nk + 4;
265      float[,] p2ip = new float[nk, nx];
266
267      SBART.sbart_x64(xs.Take(nx).ToArray(), ys.Take(nx).ToArray(), ws.Take(nx).ToArray(), ref nx,
268        regKnots, ref nk,
269        coeff, y_smoothed, leverage,
270        ref criterion, ref icrit,
271        ref smoothingParameter, ref smoothingParameterIndicator, ref lowerSmoothingParameter, ref upperSmoothingParameter,
272        ref tol, ref isetup,
273        xwy, hs0, hs1, hs2, hs3, sg0, sg1, sg2, sg3, adb, p1ip, p2ip, ref ld4, ref ldnk, ref ier);
274
275      if (ier > 0) throw new ArgumentException(ier.ToString());
276
277      rep = new SBART_Report();
278      rep.gcv = criterion;
279      rep.smoothingParameter = smoothingParameter;
280      rep.leverage = leverage.Select(li => (double)li).ToArray();
281
282      return new BartRegressionModel(regKnots.Take(nk+4).ToArray(), coeff, targetVariable, inputVars, min, range);
283    }
284
285    public static IRegressionModel CalculateSBART(double[] x, double[] y,
286      string targetVariable, string[] inputVars,
287      out SBART_Report report) {
288      var w = Enumerable.Repeat(1.0, x.Length).ToArray();
289
290      int n = x.Length;
291      int ic = n - 1;
292      int ier = -99;
293      int nk = n;
294      float[] knots = new float[nk + 6];
295
296      float crit = -99.0f;
297      int icrit = 1; // 0..don't calc CV,  1 .. GCV, 2 CV
298
299      float smoothingParameter = -99.0f;
300      int smoothingParameterIndicator = 0;
301      float lowerSmoothingParameter = 0f;
302      float upperSmoothingParameter = 1.0f;
303      float tol = 0.02f;
304      int isetup = 0; // not setup?
305
306      float min = -99.0f;
307      float range = -99.0f;
308
309      if (Environment.Is64BitProcess) {
310        float[] xw = new float[n];
311        int nx = -99;
312        float[] xs = x.Select(xi => (float)xi).ToArray();
313        float[] ys = y.Select(yi => (float)yi).ToArray();
314        float[] ws = w.Select(wi => (float)wi).ToArray();
315       
316        // sort xs together with ys and ws
317        // combine rows with duplicate x values
318        // create a set of knots (using a heuristic for the number of knots)
319        // knots are located at data points. denser regions of x contain more knots.
320        SBART.setreg_x64(xs, ys, ws,
321          ref n, xw, ref nx, ref min, ref range, knots, ref nk);
322
323        /* use all points as knot points
324        nk = nx + 2;
325        knots[0] = xs[0];
326        knots[1] = xs[0];
327        knots[2] = xs[0];
328        Array.Copy(xs, 0, knots, 3, nx);
329        knots[nx + 3] = xs[nx - 1];
330        knots[nx + 4] = xs[nx - 1];
331        knots[nx + 5] = xs[nx - 1];
332        */
333
334        /*
335        // use uniform grid of knots
336        nk = 20;
337        knots = new float[nk + 4];
338        knots[0] = xs[0];
339        knots[1] = xs[0];
340        knots[2] = xs[0];
341        for(int i = 3; i<nk+1;i++) {
342          knots[i] = (i-3f) / (nk-1);
343        }
344        knots[nk] = xs[nx - 1];
345        knots[nk + 1] = xs[nx - 1];
346        knots[nk + 2] = xs[nx - 1];
347        knots[nk + 3] = xs[nx - 1];
348        */
349        if (nx < 4) {
350          report = new SBART_Report();
351          report.leverage = new double[0];
352          return new ConstantModel(ys.Take(nx).Average(), targetVariable);
353        }
354
355        float[] coeff = new float[nk];
356        float[] leverage = new float[nx];
357        float[] y_smoothed = new float[nx];
358
359
360        // working arrays for sbart
361        float[] xwy = new float[nk];
362        float[] hs0 = new float[nk];
363        float[] hs1 = new float[nk];
364        float[] hs2 = new float[nk];
365        float[] hs3 = new float[nk];
366        float[] sg0 = new float[nk];
367        float[] sg1 = new float[nk];
368        float[] sg2 = new float[nk];
369        float[] sg3 = new float[nk];
370        int ld4 = 4;
371        float[,] adb = new float[ld4, nk];
372
373        float[,] p1ip = new float[nk, ld4];
374        int ldnk = nk + 4;
375        float[,] p2ip = new float[nk, nx];
376
377        SBART.sbart_x64(xs.Take(nx).ToArray(), ys.Take(nx).ToArray(), ws.Take(nx).ToArray(), ref nx,
378          knots, ref nk,
379          coeff, y_smoothed, leverage,
380          ref crit, ref icrit,
381          ref smoothingParameter, ref smoothingParameterIndicator, ref lowerSmoothingParameter, ref upperSmoothingParameter,
382          ref tol, ref isetup,
383          xwy, hs0, hs1, hs2, hs3, sg0, sg1, sg2, sg3, adb, p1ip, p2ip, ref ld4, ref ldnk, ref ier);
384
385        if (ier > 0) throw new ArgumentException(ier.ToString());
386
387        report = new SBART_Report();
388        report.gcv = crit;
389        report.smoothingParameter = smoothingParameter;
390        report.leverage = leverage.Select(li => (double)li).ToArray();
391
392        return new BartRegressionModel(knots.Take(nk+4).ToArray(), coeff, targetVariable, inputVars, min, range);
393
394      } else {
395        throw new NotSupportedException();
396      }
397
398    }
399
400    public class BartRegressionModel : NamedItem, IRegressionModel {
401      private float[] knots;
402      private float[] bcoeff;
403      private double min;
404      private double range;
405      public string TargetVariable { get; set; }
406
407      private string[] variablesUsedForPrediction;
408      public IEnumerable<string> VariablesUsedForPrediction {
409        get {
410          return variablesUsedForPrediction;
411        }
412      }
413
414      public BartRegressionModel(BartRegressionModel orig, Cloner cloner) {
415        this.knots = orig.knots;
416        this.bcoeff = orig.bcoeff;
417        this.min = orig.min;
418        this.range = orig.range;
419        this.TargetVariable = orig.TargetVariable;
420        this.variablesUsedForPrediction = orig.variablesUsedForPrediction;
421      }
422      public BartRegressionModel(float[] knots, float[] bcoeff, string targetVariable, string[] inputVars, double min, double range) {
423        this.variablesUsedForPrediction = inputVars;
424        this.TargetVariable = targetVariable;
425        this.knots = knots;
426        this.bcoeff = bcoeff;
427        this.range = range;
428        this.min = min;
429      }
430
431      public event EventHandler TargetVariableChanged;
432
433      public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
434        return new RegressionSolution(this, (IRegressionProblemData) problemData.Clone());
435      }
436
437      public double GetEstimatedValue(double xx) {
438        float x = (float)((xx - min) / range);
439        int n = bcoeff.Length;
440        int k = 4;
441        int zero = 0;
442        int one = 1;
443        int two = 2;
444
445
446        // linear extrapolation
447        if (x < 0) {
448          float x0 = 0.0f;
449          var y0 = bvalue(knots, bcoeff, ref n, ref k, ref x0, ref zero);
450          var y0d = bvalue(knots, bcoeff, ref n, ref k, ref x0, ref one);
451          return y0 + x * y0d;
452        }
453        if (x > 1) {
454          float x1 = 1.0f;
455          var y1 = bvalue(knots, bcoeff, ref n, ref k, ref x1, ref zero);
456          var y1d = bvalue(knots, bcoeff, ref n, ref k, ref x1, ref one);
457          return y1 + (x-1) * y1d;
458        }
459
460        lock (this) {
461          return bvalue(knots, bcoeff, ref n, ref k, ref x, ref zero);
462        }
463
464        // piecewise constant approximation
465        // if (xx <= x[0]) return bcoeff[0];
466        // if (xx >= x[n - 1]) return bcoeff[n - 1];
467        // for(int i=1;i<n-2;i++) {
468        //   var h1 = xx - x[i];
469        //   var h2 = xx - x[i + 1];
470        //   if(h1 > 0 && h2 <= 0) {
471        //     if (h1 < h2) return bcoeff[i]; else return bcoeff[i + 1];
472        //   }
473        // }
474        // return 0.0;
475
476        // // piecewise linear approximation
477        // int n = x.Length;
478        // if (xx <= x[0]) {
479        //   double h = xx - x[0];
480        //   return h * (y[1] - y[0]) / (x[1] - x[0]) + y[0];
481        // } else if (xx >= x[n-1]) {
482        //   double h = xx - x[n-1];
483        //   return h * (y[n-1] - y[n-2]) / (x[n-1] - x[n-2]) + y[n-1];
484        // } else {
485        //   // binary search
486        //   int lower = 0;
487        //   int upper = n-1;
488        //   while (true) {
489        //     if (upper < lower) throw new InvalidProgramException();
490        //     int i = lower + (upper - lower) / 2;
491        //     if (x[i] <= xx && xx < x[i + 1]) {
492        //       double h = xx - x[i];
493        //       double k = (y[i + 1] - y[i]) / (x[i + 1] - x[i]);
494        //       return h * k + y[i];
495        //     } else if (xx < x[i]) {
496        //       upper = i - 1;
497        //     } else {
498        //       lower = i + 1;
499        //     }
500        //   }
501        // }
502        // return 0.0;
503      }
504
505      public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
506        foreach(var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
507          yield return GetEstimatedValue(x);
508        }
509      }
510
511      public override IDeepCloneable Clone(Cloner cloner) {
512        return new BartRegressionModel(this, cloner);
513      }
514    }
515
516  }
517}
Note: See TracBrowser for help on using the repository browser.