using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Runtime.InteropServices; using System.Text; using System.Threading.Tasks; using HeuristicLab.Problems.DataAnalysis; namespace HeuristicLab.Algorithms.DataAnalysis.Experimental { public static class CubicSplineGCV { // CUBGCV (X,F,DF,N,Y,C,IC,VAR,JOB,SE,WK,IER) // ARGUMENTS X - VECTOR OF LENGTH N CONTAINING THE // ABSCISSAE OF THE N DATA POINTS // (X(I),F(I)) I=1..N. (INPUT) X // MUST BE ORDERED SO THAT // X(I) .LT. X(I+1). // F - VECTOR OF LENGTH N CONTAINING THE // ORDINATES (OR FUNCTION VALUES) // OF THE N DATA POINTS (INPUT). // DF - VECTOR OF LENGTH N. (INPUT/OUTPUT) // DF(I) IS THE RELATIVE STANDARD DEVIATION // OF THE ERROR ASSOCIATED WITH DATA POINT I. // EACH DF(I) MUST BE POSITIVE. THE VALUES IN // DF ARE SCALED BY THE SUBROUTINE SO THAT // THEIR MEAN SQUARE VALUE IS 1, AND UNSCALED // AGAIN ON NORMAL EXIT. // THE MEAN SQUARE VALUE OF THE DF(I) IS RETURNED // IN WK(7) ON NORMAL EXIT. // IF THE ABSOLUTE STANDARD DEVIATIONS ARE KNOWN, // THESE SHOULD BE PROVIDED IN DF AND THE ERROR // VARIANCE PARAMETER VAR (SEE BELOW) SHOULD THEN // BE SET TO 1. // IF THE RELATIVE STANDARD DEVIATIONS ARE UNKNOWN, // SET EACH DF(I)=1. // N - NUMBER OF DATA POINTS (INPUT). // N MUST BE .GE. 3. // Y,C - SPLINE COEFFICIENTS. (OUTPUT) Y // IS A VECTOR OF LENGTH N. C IS // AN N-1 BY 3 MATRIX. THE VALUE // OF THE SPLINE APPROXIMATION AT T IS // S(T)=((C(I,3)*D+C(I,2))*D+C(I,1))*D+Y(I) // WHERE X(I).LE.T.LT.X(I+1) AND // D = T-X(I). // IC - ROW DIMENSION OF MATRIX C EXACTLY // AS SPECIFIED IN THE DIMENSION // STATEMENT IN THE CALLING PROGRAM. (INPUT) // VAR - ERROR VARIANCE. (INPUT/OUTPUT) // IF VAR IS NEGATIVE (I.E. UNKNOWN) THEN // THE SMOOTHING PARAMETER IS DETERMINED // BY MINIMIZING THE GENERALIZED CROSS VALIDATION // AND AN ESTIMATE OF THE ERROR VARIANCE IS // RETURNED IN VAR. // IF VAR IS NON-NEGATIVE (I.E. KNOWN) THEN THE // SMOOTHING PARAMETER IS DETERMINED TO MINIMIZE // AN ESTIMATE, WHICH DEPENDS ON VAR, OF THE TRUE // MEAN SQUARE ERROR, AND VAR IS UNCHANGED. // IN PARTICULAR, IF VAR IS ZERO, THEN AN // INTERPOLATING NATURAL CUBIC SPLINE IS CALCULATED. // VAR SHOULD BE SET TO 1 IF ABSOLUTE STANDARD // DEVIATIONS HAVE BEEN PROVIDED IN DF (SEE ABOVE). // JOB - JOB SELECTION PARAMETER. (INPUT) // JOB = 0 SHOULD BE SELECTED IF POINT STANDARD ERROR // ESTIMATES ARE NOT REQUIRED IN SE. // JOB = 1 SHOULD BE SELECTED IF POINT STANDARD ERROR // ESTIMATES ARE REQUIRED IN SE. // SE - VECTOR OF LENGTH N CONTAINING BAYESIAN STANDARD // ERROR ESTIMATES OF THE FITTED SPLINE VALUES IN Y. // SE IS NOT REFERENCED IF JOB=0. (OUTPUT) // WK - WORK VECTOR OF LENGTH 7*(N + 2). ON NORMAL EXIT THE // FIRST 7 VALUES OF WK ARE ASSIGNED AS FOLLOWS:- // // WK(1) = SMOOTHING PARAMETER (= RHO/(RHO + 1)) // WK(2) = ESTIMATE OF THE NUMBER OF DEGREES OF // FREEDOM OF THE RESIDUAL SUM OF SQUARES // WK(3) = GENERALIZED CROSS VALIDATION // WK(4) = MEAN SQUARE RESIDUAL // WK(5) = ESTIMATE OF THE TRUE MEAN SQUARE ERROR // AT THE DATA POINTS // WK(6) = ESTIMATE OF THE ERROR VARIANCE // WK(7) = MEAN SQUARE VALUE OF THE DF(I) // // IF WK(1)=0 (RHO=0) AN INTERPOLATING NATURAL CUBIC // SPLINE HAS BEEN CALCULATED. // IF WK(1)=1 (RHO=INFINITE) A LEAST SQUARES // REGRESSION LINE HAS BEEN CALCULATED. // WK(2) IS AN ESTIMATE OF THE NUMBER OF DEGREES OF // FREEDOM OF THE RESIDUAL WHICH REDUCES TO THE // USUAL VALUE OF N-2 WHEN A LEAST SQUARES REGRESSION // LINE IS CALCULATED. // WK(3),WK(4),WK(5) ARE CALCULATED WITH THE DF(I) // SCALED TO HAVE MEAN SQUARE VALUE 1. THE // UNSCALED VALUES OF WK(3),WK(4),WK(5) MAY BE // CALCULATED BY DIVIDING BY WK(7). // WK(6) COINCIDES WITH THE OUTPUT VALUE OF VAR IF // VAR IS NEGATIVE ON INPUT. IT IS CALCULATED WITH // THE UNSCALED VALUES OF THE DF(I) TO FACILITATE // COMPARISONS WITH A PRIORI VARIANCE ESTIMATES. // // IER - ERROR PARAMETER. (OUTPUT) // TERMINAL ERROR // IER = 129, IC IS LESS THAN N-1. // IER = 130, N IS LESS THAN 3. // IER = 131, INPUT ABSCISSAE ARE NOT // ORDERED SO THAT X(I).LT.X(I+1). // IER = 132, DF(I) IS NOT POSITIVE FOR SOME I. // IER = 133, JOB IS NOT 0 OR 1. // To build the fortran library (x64) use: // > gfortran -m64 642.f -shared -o 642_x64.dll or // > ifort /dll /Qm64 642.f /Fe642_x64.dll // check dumpbin /EXPORTS 642_x64.dll // and dumpbin /IMPORTS 642_x64.dll [DllImport("642_x64.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "cubgcv")] public static extern void cubgcv_x64( double[] x, double[] f, double[] df, ref int n, [Out] double[] y, [Out] double[,] c, ref int ic, ref double var, ref int job, [Out] double[] se, double[,] wk, ref int ier); // To build the fortran library (x86) use: // > gfortran -static -Lc:/mingw/bin/libgcc_s_dw2-1.dll -m32 642.f -shared -o 642_x86.dll [DllImport("642_x86.dll", CallingConvention = CallingConvention.Cdecl, EntryPoint = "cubgcv")] public static extern void cubgcv_x86( double[] x, double[] f, double[] df, ref Int32 n, double[] y, double[,] c, ref Int32 ic, ref double var, ref Int32 job, double[] se, double[,] wk, ref Int32 ier); public class CubGcvReport { // // // // // IF WK(1)=0 (RHO=0) AN INTERPOLATING NATURAL CUBIC // SPLINE HAS BEEN CALCULATED. // IF WK(1)=1 (RHO=INFINITE) A LEAST SQUARES // REGRESSION LINE HAS BEEN CALCULATED. // public double smoothingParameter; // WK(2) IS AN ESTIMATE OF THE NUMBER OF DEGREES OF // FREEDOM OF THE RESIDUAL WHICH REDUCES TO THE // USUAL VALUE OF N-2 WHEN A LEAST SQUARES REGRESSION // LINE IS CALCULATED. public double estimatedRSSDegreesOfFreedom; // WK(3),WK(4),WK(5) ARE CALCULATED WITH THE DF(I) // SCALED TO HAVE MEAN SQUARE VALUE 1. THE // UNSCALED VALUES OF WK(3),WK(4),WK(5) MAY BE // CALCULATED BY DIVIDING BY WK(7). public double generalizedCrossValidation; public double meanSquareResiudal; public double estimatedTrueMeanSquaredErrorAtDataPoints; // WK(6) COINCIDES WITH THE OUTPUT VALUE OF VAR IF // VAR IS NEGATIVE ON INPUT. IT IS CALCULATED WITH // THE UNSCALED VALUES OF THE DF(I) TO FACILITATE // COMPARISONS WITH A PRIORI VARIANCE ESTIMATES. public double estimatedErrorVariance; public double meanSquareOfDf; public double[] se; } public static ReinschSmoothingSplineModel CalculateCubicSpline(double[] x, double[] y, string targetVariable, string[] inputVars, out CubGcvReport report) { var w = Enumerable.Repeat(1.0, x.Length).ToArray(); double[] x2, y2, w2; Splines.SortAndBin(x, y, w, out x2, out y2, out w2); // TODO: use weights correctly int n = x2.Length; double[] df = Enumerable.Repeat(1.0, n).ToArray(); // set each df if actual standard deviation of data points is not known double[,] c = new double[3, n - 1]; double[] se = new double[n]; // standard errors in points double[,] wk = new double[7, (n + 2)]; // work array; double[] y_smoothed = new double[n]; int job = 1; // calc estimates of standard errors in points int ic = n - 1; int ier = -99; double var = -99.0; if (Environment.Is64BitProcess) { CubicSplineGCV.cubgcv_x64(x2, y2, df, ref n, y_smoothed, c, ref ic, ref var, ref job, se, wk, ref ier); } else { CubicSplineGCV.cubgcv_x86(x2, y2, df, ref n, y_smoothed, c, ref ic, ref var, ref job, se, wk, ref ier); } /* * IER - ERROR PARAMETER. (OUTPUT) * TERMINAL ERROR * IER = 129, IC IS LESS THAN N-1. * IER = 130, N IS LESS THAN 3. * IER = 131, INPUT ABSCISSAE ARE NOT * ORDERED SO THAT X(I).LT.X(I+1). * IER = 132, DF(I) IS NOT POSITIVE FOR SOME I. * IER = 133, JOB IS NOT 0 OR 1. * */ if (ier == 131) { throw new ArgumentException("x is not ordered"); } else if (ier != 0) throw new ArgumentException("Error in CUBGCV " + ier); // OF THE SPLINE APPROXIMATION AT T IS // S(T)=((C(I,3)*D+C(I,2))*D+C(I,1))*D+Y(I) // WHERE X(I).LE.T.LT.X(I+1) AND // D = T-X(I). // WK(1) = SMOOTHING PARAMETER (= RHO/(RHO + 1)) // WK(2) = ESTIMATE OF THE NUMBER OF DEGREES OF // FREEDOM OF THE RESIDUAL SUM OF SQUARES // WK(3) = GENERALIZED CROSS VALIDATION // WK(4) = MEAN SQUARE RESIDUAL // WK(5) = ESTIMATE OF THE TRUE MEAN SQUARE ERROR // AT THE DATA POINTS // WK(6) = ESTIMATE OF THE ERROR VARIANCE // WK(7) = MEAN SQUARE VALUE OF THE DF(I) report = new CubGcvReport(); report.smoothingParameter = wk[0, 0]; report.estimatedRSSDegreesOfFreedom = wk[0, 1]; report.generalizedCrossValidation = wk[0, 2]; report.meanSquareResiudal = wk[0, 3]; report.estimatedTrueMeanSquaredErrorAtDataPoints = wk[0, 4]; report.estimatedErrorVariance = wk[0, 5]; report.meanSquareOfDf = wk[0, 6]; report.se = se; // Debug.Assert(var == report.estimatedErrorVariance); double[] dArr = new double[n]; double[] cArr = new double[n]; double[] bArr = new double[n]; for (int i = 0; i < n - 1; i++) { bArr[i] = c[0, i]; cArr[i] = c[1, i]; dArr[i] = c[2, i]; } // extrapolate linearly for xx > x[n] bArr[n - 1] = bArr[n - 2]; // extrapolate linearly for xx < x[1] dArr[0] = 0; return new ReinschSmoothingSplineModel( new MyArray(1, y_smoothed), new MyArray(1, bArr), new MyArray(1, cArr), new MyArray(1, dArr), new MyArray(1, x2), targetVariable, inputVars); } } }