source: branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/CubicSplineGCV.cs @ 15468

Last change on this file since 15468 was 15468, checked in by gkronber, 4 years ago

#2789 worked on sbart spline

File size: 13.2 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Diagnostics;
4using System.Linq;
5using System.Runtime.InteropServices;
6using System.Text;
7using System.Threading.Tasks;
8using HeuristicLab.Problems.DataAnalysis;
9
10namespace HeuristicLab.Algorithms.DataAnalysis.Experimental {
11  public static class CubicSplineGCV {
12    // CUBGCV (X,F,DF,N,Y,C,IC,VAR,JOB,SE,WK,IER)
13    //   ARGUMENTS    X      - VECTOR OF LENGTH N CONTAINING THE
14    //                           ABSCISSAE OF THE N DATA POINTS
15    //                           (X(I),F(I)) I=1..N. (INPUT) X
16    //                           MUST BE ORDERED SO THAT
17    //                           X(I) .LT. X(I+1).
18    //                F      - VECTOR OF LENGTH N CONTAINING THE
19    //                           ORDINATES (OR FUNCTION VALUES)
20    //                           OF THE N DATA POINTS (INPUT).
21    //                DF     - VECTOR OF LENGTH N. (INPUT/OUTPUT)
22    //                           DF(I) IS THE RELATIVE STANDARD DEVIATION
23    //                           OF THE ERROR ASSOCIATED WITH DATA POINT I.
24    //                           EACH DF(I) MUST BE POSITIVE.  THE VALUES IN
25    //                           DF ARE SCALED BY THE SUBROUTINE SO THAT
26    //                           THEIR MEAN SQUARE VALUE IS 1, AND UNSCALED
27    //                           AGAIN ON NORMAL EXIT.
28    //                           THE MEAN SQUARE VALUE OF THE DF(I) IS RETURNED
29    //                           IN WK(7) ON NORMAL EXIT.
30    //                           IF THE ABSOLUTE STANDARD DEVIATIONS ARE KNOWN,
31    //                           THESE SHOULD BE PROVIDED IN DF AND THE ERROR
32    //                           VARIANCE PARAMETER VAR (SEE BELOW) SHOULD THEN
33    //                           BE SET TO 1.
34    //                           IF THE RELATIVE STANDARD DEVIATIONS ARE UNKNOWN,
35    //                           SET EACH DF(I)=1.
36    //                N      - NUMBER OF DATA POINTS (INPUT).
37    //                           N MUST BE .GE. 3.
38    //                Y,C    - SPLINE COEFFICIENTS. (OUTPUT) Y
39    //                           IS A VECTOR OF LENGTH N. C IS
40    //                           AN N-1 BY 3 MATRIX. THE VALUE
41    //                           OF THE SPLINE APPROXIMATION AT T IS
42    //                           S(T)=((C(I,3)*D+C(I,2))*D+C(I,1))*D+Y(I)
43    //                           WHERE X(I).LE.T.LT.X(I+1) AND
44    //                           D = T-X(I).
45    //                IC     - ROW DIMENSION OF MATRIX C EXACTLY
46    //                           AS SPECIFIED IN THE DIMENSION
47    //                           STATEMENT IN THE CALLING PROGRAM. (INPUT)
48    //                VAR    - ERROR VARIANCE. (INPUT/OUTPUT)
49    //                           IF VAR IS NEGATIVE (I.E. UNKNOWN) THEN
50    //                           THE SMOOTHING PARAMETER IS DETERMINED
51    //                           BY MINIMIZING THE GENERALIZED CROSS VALIDATION
52    //                           AND AN ESTIMATE OF THE ERROR VARIANCE IS
53    //                           RETURNED IN VAR.
54    //                           IF VAR IS NON-NEGATIVE (I.E. KNOWN) THEN THE
55    //                           SMOOTHING PARAMETER IS DETERMINED TO MINIMIZE
56    //                           AN ESTIMATE, WHICH DEPENDS ON VAR, OF THE TRUE
57    //                           MEAN SQUARE ERROR, AND VAR IS UNCHANGED.
58    //                           IN PARTICULAR, IF VAR IS ZERO, THEN AN
59    //                           INTERPOLATING NATURAL CUBIC SPLINE IS CALCULATED.
60    //                           VAR SHOULD BE SET TO 1 IF ABSOLUTE STANDARD
61    //                           DEVIATIONS HAVE BEEN PROVIDED IN DF (SEE ABOVE).
62    //                JOB    - JOB SELECTION PARAMETER. (INPUT)
63    //                         JOB = 0 SHOULD BE SELECTED IF POINT STANDARD ERROR
64    //                           ESTIMATES ARE NOT REQUIRED IN SE.
65    //                         JOB = 1 SHOULD BE SELECTED IF POINT STANDARD ERROR
66    //                           ESTIMATES ARE REQUIRED IN SE.
67    //                SE     - VECTOR OF LENGTH N CONTAINING BAYESIAN STANDARD
68    //                           ERROR ESTIMATES OF THE FITTED SPLINE VALUES IN Y.
69    //                           SE IS NOT REFERENCED IF JOB=0. (OUTPUT)
70    //                WK     - WORK VECTOR OF LENGTH 7*(N + 2). ON NORMAL EXIT THE
71    //                           FIRST 7 VALUES OF WK ARE ASSIGNED AS FOLLOWS:-
72    //
73    //                           WK(1) = SMOOTHING PARAMETER (= RHO/(RHO + 1))
74    //                           WK(2) = ESTIMATE OF THE NUMBER OF DEGREES OF
75    //                                   FREEDOM OF THE RESIDUAL SUM OF SQUARES
76    //                           WK(3) = GENERALIZED CROSS VALIDATION
77    //                           WK(4) = MEAN SQUARE RESIDUAL
78    //                           WK(5) = ESTIMATE OF THE TRUE MEAN SQUARE ERROR
79    //                                   AT THE DATA POINTS
80    //                           WK(6) = ESTIMATE OF THE ERROR VARIANCE
81    //                           WK(7) = MEAN SQUARE VALUE OF THE DF(I)
82    //
83    //                           IF WK(1)=0 (RHO=0) AN INTERPOLATING NATURAL CUBIC
84    //                           SPLINE HAS BEEN CALCULATED.
85    //                           IF WK(1)=1 (RHO=INFINITE) A LEAST SQUARES
86    //                           REGRESSION LINE HAS BEEN CALCULATED.
87    //                           WK(2) IS AN ESTIMATE OF THE NUMBER OF DEGREES OF
88    //                           FREEDOM OF THE RESIDUAL WHICH REDUCES TO THE
89    //                           USUAL VALUE OF N-2 WHEN A LEAST SQUARES REGRESSION
90    //                           LINE IS CALCULATED.
91    //                           WK(3),WK(4),WK(5) ARE CALCULATED WITH THE DF(I)
92    //                           SCALED TO HAVE MEAN SQUARE VALUE 1.  THE
93    //                           UNSCALED VALUES OF WK(3),WK(4),WK(5) MAY BE
94    //                           CALCULATED BY DIVIDING BY WK(7).
95    //                           WK(6) COINCIDES WITH THE OUTPUT VALUE OF VAR IF
96    //                           VAR IS NEGATIVE ON INPUT.  IT IS CALCULATED WITH
97    //                           THE UNSCALED VALUES OF THE DF(I) TO FACILITATE
98    //                           COMPARISONS WITH A PRIORI VARIANCE ESTIMATES.
99    //
100    //                IER    - ERROR PARAMETER. (OUTPUT)
101    //                         TERMINAL ERROR
102    //                           IER = 129, IC IS LESS THAN N-1.
103    //                           IER = 130, N IS LESS THAN 3.
104    //                           IER = 131, INPUT ABSCISSAE ARE NOT
105    //                             ORDERED SO THAT X(I).LT.X(I+1).
106    //                           IER = 132, DF(I) IS NOT POSITIVE FOR SOME I.
107    //                           IER = 133, JOB IS NOT 0 OR 1.
108
109    // To build the fortran library (x64) use:
110    // > gfortran -m64 642.f -shared -o 642_x64.dll  or
111    // > ifort /dll /Qm64 642.f /Fe642_x64.dll
112    // check dumpbin /EXPORTS 642_x64.dll
113    // and   dumpbin /IMPORTS 642_x64.dll
114    [DllImport("642_x64.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "cubgcv")]
115    public static extern void cubgcv_x64(
116      double[] x,
117      double[] f,
118      double[] df,
119      ref int n,
120      [Out] double[] y,
121      [Out] double[,] c,
122      ref int ic,
123      ref double var,
124      ref int job,
125      [Out] double[] se,
126      double[,] wk,
127      ref int ier);
128
129    // To build the fortran library (x86) use:
130    // > gfortran -static -Lc:/mingw/bin/libgcc_s_dw2-1.dll -m32 642.f -shared -o 642_x86.dll
131    [DllImport("642_x86.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "cubgcv")]
132    public static extern void cubgcv_x86(
133      double[] x,
134      double[] f,
135      double[] df,
136      ref Int32 n,
137      double[] y,
138      double[,] c,
139      ref Int32 ic,
140      ref double var,
141      ref Int32 job,
142      double[] se,
143      double[,] wk,
144      ref Int32 ier);
145
146
147    public class CubGcvReport {
148      //
149      //   
150      //
151      //
152      //   IF WK(1)=0 (RHO=0) AN INTERPOLATING NATURAL CUBIC
153      //   SPLINE HAS BEEN CALCULATED.
154      //   IF WK(1)=1 (RHO=INFINITE) A LEAST SQUARES
155      //   REGRESSION LINE HAS BEEN CALCULATED.
156      //
157      public double smoothingParameter;
158      //   WK(2) IS AN ESTIMATE OF THE NUMBER OF DEGREES OF
159      //   FREEDOM OF THE RESIDUAL WHICH REDUCES TO THE
160      //   USUAL VALUE OF N-2 WHEN A LEAST SQUARES REGRESSION
161      //   LINE IS CALCULATED.
162      public double estimatedRSSDegreesOfFreedom;
163      //   WK(3),WK(4),WK(5) ARE CALCULATED WITH THE DF(I)
164      //   SCALED TO HAVE MEAN SQUARE VALUE 1.  THE
165      //   UNSCALED VALUES OF WK(3),WK(4),WK(5) MAY BE
166      //   CALCULATED BY DIVIDING BY WK(7).
167      public double generalizedCrossValidation;
168      public double meanSquareResiudal;
169      public double estimatedTrueMeanSquaredErrorAtDataPoints;
170
171      //   WK(6) COINCIDES WITH THE OUTPUT VALUE OF VAR IF
172      //   VAR IS NEGATIVE ON INPUT.  IT IS CALCULATED WITH
173      //   THE UNSCALED VALUES OF THE DF(I) TO FACILITATE
174      //   COMPARISONS WITH A PRIORI VARIANCE ESTIMATES.
175      public double estimatedErrorVariance;
176      public double meanSquareOfDf;
177      public double[] se;
178    }
179    public static ReinschSmoothingSplineModel CalculateCubicSpline(double[] x, double[] y,
180      string targetVariable, string[] inputVars,
181      out CubGcvReport report) {
182      var w = Enumerable.Repeat(1.0, x.Length).ToArray();
183      double[] x2, y2, w2;
184      Splines.SortAndBin(x, y, w, out x2, out y2, out w2);
185      // TODO: use weights correctly
186
187      int n = x2.Length;
188      double[] df = Enumerable.Repeat(1.0, n).ToArray();   // set each df if actual standard deviation of data points is not known
189      double[,] c = new double[3, n - 1];
190      double[] se = new double[n]; // standard errors in points
191      double[,] wk = new double[7, (n + 2)]; // work array;
192      double[] y_smoothed = new double[n];
193      int job = 1; // calc estimates of standard errors in points
194      int ic = n - 1;
195      int ier = -99;
196      double var = -99.0;
197      if (Environment.Is64BitProcess) {
198        CubicSplineGCV.cubgcv_x64(x2, y2, df, ref n, y_smoothed,
199          c, ref ic, ref var, ref job, se, wk, ref ier);
200      } else {
201        CubicSplineGCV.cubgcv_x86(x2, y2, df, ref n, y_smoothed,
202          c, ref ic, ref var, ref job, se, wk, ref ier);
203
204
205      }
206
207      /*
208       *                  IER    - ERROR PARAMETER. (OUTPUT)
209       *                           TERMINAL ERROR
210       *                             IER = 129, IC IS LESS THAN N-1.
211       *                             IER = 130, N IS LESS THAN 3.
212       *                             IER = 131, INPUT ABSCISSAE ARE NOT
213       *                               ORDERED SO THAT X(I).LT.X(I+1).
214       *                             IER = 132, DF(I) IS NOT POSITIVE FOR SOME I.
215       *                             IER = 133, JOB IS NOT 0 OR 1.
216       *
217       */
218
219      if (ier == 131) {
220        throw new ArgumentException("x is not ordered");
221      } else if (ier != 0) throw new ArgumentException("Error in CUBGCV " + ier);
222
223      //                           OF THE SPLINE APPROXIMATION AT T IS
224      //                           S(T)=((C(I,3)*D+C(I,2))*D+C(I,1))*D+Y(I)
225      //                           WHERE X(I).LE.T.LT.X(I+1) AND
226      //                           D = T-X(I).
227
228      //                           WK(1) = SMOOTHING PARAMETER (= RHO/(RHO + 1))
229      //                           WK(2) = ESTIMATE OF THE NUMBER OF DEGREES OF
230      //                                   FREEDOM OF THE RESIDUAL SUM OF SQUARES
231      //                           WK(3) = GENERALIZED CROSS VALIDATION
232      //                           WK(4) = MEAN SQUARE RESIDUAL
233      //                           WK(5) = ESTIMATE OF THE TRUE MEAN SQUARE ERROR
234      //                                   AT THE DATA POINTS
235      //                           WK(6) = ESTIMATE OF THE ERROR VARIANCE
236      //                           WK(7) = MEAN SQUARE VALUE OF THE DF(I)
237      report = new CubGcvReport();
238      report.smoothingParameter = wk[0, 0];
239      report.estimatedRSSDegreesOfFreedom = wk[0, 1];
240      report.generalizedCrossValidation = wk[0, 2];
241      report.meanSquareResiudal = wk[0, 3];
242      report.estimatedTrueMeanSquaredErrorAtDataPoints = wk[0, 4];
243      report.estimatedErrorVariance = wk[0, 5];
244      report.meanSquareOfDf = wk[0, 6];
245      report.se = se;
246      // Debug.Assert(var == report.estimatedErrorVariance);
247
248
249
250      double[] dArr = new double[n];
251      double[] cArr = new double[n];
252      double[] bArr = new double[n];
253
254      for (int i = 0; i < n - 1; i++) {
255        bArr[i] = c[0, i];
256        cArr[i] = c[1, i];
257        dArr[i] = c[2, i];
258      }
259
260      // extrapolate linearly for xx > x[n]
261      bArr[n - 1] = bArr[n - 2];
262      // extrapolate linearly for xx < x[1]
263      dArr[0] = 0;
264
265      return new ReinschSmoothingSplineModel(
266        new MyArray<double>(1, y_smoothed),
267        new MyArray<double>(1, bArr),
268        new MyArray<double>(1, cArr),
269        new MyArray<double>(1, dArr),
270        new MyArray<double>(1, x2),
271        targetVariable, inputVars);
272
273
274    }
275
276  }
277}
Note: See TracBrowser for help on using the repository browser.