Free cookie consent management tool by TermsFeed Policy Generator

source: branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/SBART.cs @ 15457

Last change on this file since 15457 was 15457, checked in by gkronber, 6 years ago

#2789 added Finbarr O'Sullivan smoothing spline code

File size: 10.0 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Runtime.InteropServices;
6using System.Text;
7using System.Threading.Tasks;
8using HeuristicLab.Common;
9using HeuristicLab.Core;
10using HeuristicLab.Problems.DataAnalysis;
11
12namespace HeuristicLab.Algorithms.DataAnalysis.Experimental {
13  public static class SBART {
14    /*
15           # A Cubic B-spline Smoothing routine.
16
17    #
18    #          The algorithm minimises:
19    #
20    #      (1/n) * sum ws(i)**2 * (ys(i)-sz(i))**2 + lambda* int ( sz"(xs) )**2 dxs
21    #
22    #        lambda is a function of the spar which is assumed to be between
23    #        0 and 1
24
25
26            # Input
27
28    #   n               number of data points
29    #  ys(n)      vector of length n containing the observations
30    #  ws(n)            vector containing the weights given to each data point
31    #  xs(n)            vector containing the ordinates of the observations
32
33
34    #  nk               number of b-spline coefficients to be estimated
35    #                   nk <= n+2
36    #  knot(nk+4)       vector of knot points defining the cubic b-spline basis.
37
38
39    #  spar             penalised likelihood smoothing parameter
40    #  ispar            indicator saying if spar is supplied or to be estimated
41    #  lspar, uspar     lower and upper values for spar 0.,1. are good values
42    #  tol              used in Golden Search routine
43
44    #  isetup           setup indicator
45
46    #  icrit            indicator saying which cross validation score
47    #       is to be computed
48
49    #  ld4              the leading dimension of abd (ie ld4=4)
50    #  ldnk             the leading dimension of p2ip (not referenced)
51
52
53                    # Output
54
55    #   coef(nk)       vector of spline coefficients
56    #   sz(n)          vector of smoothed z-values
57    #   lev(n)         vector of leverages
58    #   crit           either ordinary of generalized CV score
59    #   ier            error indicator
60    #                  ier = 0 ___  everything fine
61    #                  ier = 1 ___  spar too small or too big
62    #                               problem in cholesky decomposition
63
64
65
66             # Working arrays/matrix
67    #   xwy         X'Wy
68    #   hs0,hs1,hs2,hs3   the diagonals of the X'WX matrix
69    #   sg0,sg1,sg2,sg3   the diagonals of the Gram matrix
70    #   abd(ld4,nk)       [ X'WX+lambda*SIGMA] in diagonal form
71    #   p1ip(ld4,nk)       inner products between columns of L inverse
72    #   p2ip(ldnk,nk)      all inner products between columns of L inverse
73    #                          L'L = [X'WX+lambdaSIGMA]  NOT REFERENCED
74
75    */
76
77    /*
78     * sbart(xs,ys,ws,n,knot,nk,
79          coef,sz,lev,
80          crit,icrit,spar,ispar,lspar,uspar,tol,
81          isetup,
82          xwy,
83          hs0,hs1,hs2,hs3,
84          sg0,sg1,sg2,sg3,
85          abd,p1ip,p2ip,ld4,ldnk,ier)
86
87    */
88
89
90
91    // To build the fortran library (x64) use:
92    // > ifort /dll /Qm64 sbart.f /Fesbart_x64.dll
93    // check dumpbin /EXPORTS sbart_x64.dll
94    // and   dumpbin /IMPORTS sbart_x64.dll
95    [DllImport("sbart_x64.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "sbart")]
96    public static extern void sbart_x64(
97     float[] xs, float[] ys, float[] ws, ref int n, float[] knot, ref int nk,
98     float[] coeff, float[] sz, float[] lev,
99     ref float crit, ref int icrit, ref float spar, ref int ispar, ref float lspar, ref float uspar, ref float tol,
100     ref int isetup,
101     float[] xwy,
102     float[] hs0, float[] hs1, float[] hs2, float[] hs3,
103     float[] sg0, float[] sg1, float[] sg2, float[] sg3,
104     float[,] abd, float[,] p1ip, float[,] p2ip, ref int ld4, ref int ldnk, ref int ier);
105
106    [DllImport("sbart_x86.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "sbart")]
107    public static extern void sbart_x86();
108
109
110    [DllImport("sbart_x64.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "sknotl")]
111    public static extern void sknotl_x64(float[] x, ref int n, float[] knot, ref int k);
112    [DllImport("sbart_x64.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "setreg")]
113    public static extern void setreg_x64(float[] x, float[] y, float[] w, ref int n, float[] xw, ref int nx, ref float min, ref float range, float[] knot, ref int nk);
114
115    public class SBART_Report {
116      public double smoothingParameter;
117      public double gcv;
118      public double[] leverage;     
119    }
120
121    public static IRegressionModel CalculateSBART(double[] x, double[] y,
122      string targetVariable, string[] inputVars, float smoothingParameter,
123      out SBART_Report report) {
124      var w = Enumerable.Repeat(1.0, x.Length).ToArray();
125
126      int n = x.Length;
127      float[] leverage = new float[n];
128      float[] y_smoothed = new float[n];
129      int ic = n - 1;
130      int ier = -99;
131      int nk = n;
132      float[] knots = new float[nk + 6];
133
134      float crit = -99.0f;
135      int icrit = 0; // 0..don't calc CV,  1 .. GCV, 2 CV
136
137      // float smoothingParameter = -99.0f;
138      int smoothingParameterIndicator = 1;
139      float lowerSmoothingParameter = 0f;
140      float upperSmoothingParameter = 1.0f;
141      float tol = 0.01f;
142      int isetup = 0; // not setup?
143
144      float min = -99.0f;
145      float range = -99.0f;
146
147      if (Environment.Is64BitProcess) {
148        float[] xw = new float[n];
149        int nx = -99;
150        float[] xs = x.Select(xi => (float)xi).ToArray();
151        float[] ys = y.Select(yi => (float)yi).ToArray();
152        float[] ws = w.Select(wi => (float)wi).ToArray();
153        SBART.setreg_x64(xs, ys, ws,
154          ref n, xw, ref nx, ref min, ref range, knots, ref nk);
155        if(nx < 4) {
156          report = new SBART_Report();
157          report.leverage = new double[0];
158          return new ConstantModel(ys.Take(nx).Average(), targetVariable);
159        }
160
161        float[] coeff = new float[nk];
162
163        // working arrays for sbart
164        float[] xwy = new float[nk];
165        float[] hs0 = new float[nk];
166        float[] hs1 = new float[nk];
167        float[] hs2 = new float[nk];
168        float[] hs3 = new float[nk];
169        float[] sg0 = new float[nk];
170        float[] sg1 = new float[nk];
171        float[] sg2 = new float[nk];
172        float[] sg3 = new float[nk];
173        int ld4 = 4;
174        float[,] adb = new float[nk, ld4];
175
176        float[,] p1ip = new float[nk, ld4];
177        int ldnk = nk + 4;
178        float[,] p2ip = new float[nk, ldnk];
179
180        SBART.sbart_x64(xs, ys, ws, ref nx, knots, ref nk, coeff, y_smoothed, leverage, ref crit, ref icrit,
181          ref smoothingParameter, ref smoothingParameterIndicator, ref lowerSmoothingParameter, ref upperSmoothingParameter,
182          ref tol, ref isetup,
183          xwy, hs0, hs1, hs2, hs3, sg0, sg1, sg2, sg3, adb, p1ip, p2ip, ref ld4, ref ldnk, ref ier);
184
185        if (ier > 0) throw new ArgumentException(ier.ToString());
186
187        report = new SBART_Report();
188        report.gcv = crit;
189        report.smoothingParameter = smoothingParameter;
190        report.leverage = leverage.Select(li => (double)li).ToArray();
191
192        return new BartRegressionModel(xs.Take(nx).ToArray(), y_smoothed.Take(nx).ToArray(), targetVariable, inputVars, min, range);
193
194      } else {
195        throw new NotSupportedException();
196      }
197
198    }
199
200    public class BartRegressionModel : NamedItem, IRegressionModel {
201      private float[] x;
202      private float[] y;
203      private double min;
204      private double range;
205      public string TargetVariable { get; set; }
206
207      private string[] variablesUsedForPrediction;
208      public IEnumerable<string> VariablesUsedForPrediction {
209        get {
210          return variablesUsedForPrediction;
211        }
212      }
213
214      public BartRegressionModel(BartRegressionModel orig, Cloner cloner) {
215        this.x = orig.x;
216        this.y = orig.y;
217        this.min = orig.min;
218        this.range = orig.range;
219        this.TargetVariable = orig.TargetVariable;
220        this.variablesUsedForPrediction = orig.variablesUsedForPrediction;
221      }
222      public BartRegressionModel(float[] x, float[] y_smoothed, string targetVariable, string[] inputVars, double min, double range) {
223        this.variablesUsedForPrediction = inputVars;
224        this.TargetVariable = targetVariable;
225        this.x = x;
226        this.y = y_smoothed;
227        this.range = range;
228        this.min = min;
229      }
230
231      public event EventHandler TargetVariableChanged;
232
233      public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
234        return new RegressionSolution(this, (IRegressionProblemData) problemData.Clone());
235      }
236
237      public double GetEstimatedValue(double xx) {
238        xx = (xx - min) / range;
239        // piecewise linear approximation
240        int n = x.Length;
241        if (xx <= x[0]) {
242          double h = xx - x[0];
243          return h * (y[1] - y[0]) / (x[1] - x[0]) + y[0];
244        } else if (xx >= x[n-1]) {
245          double h = xx - x[n-1];
246          return h * (y[n-1] - y[n-2]) / (x[n-1] - x[n-2]) + y[n-1];
247        } else {
248          // binary search
249          int lower = 0;
250          int upper = n-1;
251          while (true) {
252            if (upper < lower) throw new InvalidProgramException();
253            int i = lower + (upper - lower) / 2;
254            if (x[i] <= xx && xx < x[i + 1]) {
255              double h = xx - x[i];
256              double k = (y[i + 1] - y[i]) / (x[i + 1] - x[i]);
257              return h * k + y[i];
258            } else if (xx < x[i]) {
259              upper = i - 1;
260            } else {
261              lower = i + 1;
262            }
263          }
264        }
265        return 0.0;
266      }
267
268      public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
269        foreach(var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) {
270          yield return GetEstimatedValue(x);
271        }
272      }
273
274      public override IDeepCloneable Clone(Cloner cloner) {
275        return new BartRegressionModel(this, cloner);
276      }
277    }
278
279  }
280}
Note: See TracBrowser for help on using the repository browser.