Changeset 15469


Ignore:
Timestamp:
11/10/17 09:05:38 (3 years ago)
Author:
gkronber
Message:

#2789 trying to get SBART to work correctly.

Location:
branches/MathNetNumerics-Exploration-2789
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/GAM.cs

    r15468 r15469  
    223223          product = product.Zip(problemData.Dataset.GetDoubleValues(inputVars[i], problemData.TrainingIndices), (pi, vi) => pi * vi).ToArray();
    224224        }
    225         CubicSplineGCV.CubGcvReport report;
    226         return CubicSplineGCV.CalculateCubicSpline(
    227           product,
    228           (double[])target.Clone(),
    229           problemData.TargetVariable, inputVars, out report
    230           );
    231 
    232         double optTolerance; double cvRMSE;
     225        // CubicSplineGCV.CubGcvReport report;
     226        // return CubicSplineGCV.CalculateCubicSpline(
     227        //   product,
     228        //   (double[])target.Clone(),
     229        //   problemData.TargetVariable, inputVars, out report
     230        //   );
     231        //
     232        // double optTolerance; double cvRMSE;
    233233        // find tolerance
    234234        // var ensemble = Splines.CalculateSmoothingSplineReinsch(product, (double[])target.Clone(), inputVars, problemData.TargetVariable, out optTolerance, out cvRMSE);
     
    239239        // find tolerance
    240240        //var bestLambda = double.NaN;
    241         double bestCVRMSE = target.StandardDeviation();
    242         double avgTrainRMSE = double.PositiveInfinity;
    243         double[] bestPredictions = new double[target.Length]; // zero
     241        // double bestCVRMSE = target.StandardDeviation();
     242        // double avgTrainRMSE = double.PositiveInfinity;
     243        // double[] bestPredictions = new double[target.Length]; // zero
    244244
    245245
     
    269269        // return Splines.CalculatePenalizedRegressionSpline(product, (double[])target.Clone(), lambda, problemData.TargetVariable, inputVars, out avgTrainRMSE, out cvRMSE, out bestPredictions);
    270270        SBART.SBART_Report rep;
    271         var model = SBART.CalculateSBART(product, (double[])target.Clone(), problemData.TargetVariable, inputVars, out rep);
     271        var w = product.Select(_ => 1.0).ToArray();
     272        var model = SBART.CalculateSBART(product, (double[])target.Clone(), w, 10, problemData.TargetVariable, inputVars, out rep);
    272273        Console.WriteLine("{0} {1:N5} {2:N5} {3:N5} {4:N5}", string.Join(",", inputVars), rep.gcv, rep.leverage.Sum(), product.StandardDeviation(), target.StandardDeviation());
    273274        return model;
  • branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/SBART.cs

    r15468 r15469  
    184184      int info;
    185185      alglib.kmeansgenerate(xy, x.Length, 1, nKnots, 10, out info, out c, out xyc);
     186      var g = x.Zip(xyc, (double xi, int ci) => Tuple.Create(xi,ci)).GroupBy(t => t.Item2).Select(gr => HeuristicLab.Common.EnumerableStatisticExtensions.Median(gr.Select(gi=>gi.Item1))).ToArray();
     187      // find medians
    186188      double[] knots = new double[nKnots];
    187       for (int i = 0; i < knots.Length; i++) knots[i] = c[0, i];
     189      for (int i = 0; i < g.Length; i++) knots[i] = g[i];
    188190      return CalculateSBART(x, y, w, knots, targetVariable, inputVars, out rep);
    189191    }
    190192
    191193    public static IRegressionModel CalculateSBART(double[] x, double[] y, double[] w, double[] knots, string targetVariable, string[] inputVars, out SBART_Report rep) {
    192       float[] xs = x.Select(xi=>(float)xi).ToArray();
    193       float[] ys = y.Select(xi => (float)xi).ToArray();
    194       float[] ws = w.Select(xi => (float)xi).ToArray();
    195       float[] k = knots.Select(xi => (float)xi).ToArray();
    196 
    197       int n = xs.Length;
    198       if (n < 4) throw new ArgumentException("n < 4");
    199       if (knots.Length > n + 2) throw new ArgumentException("more than n+2 knots");
    200       float[] xw = new float[n];
    201       int nx = -99;
    202       float min = 0.0f;
    203       float range = 0.0f;
    204       int nk = -99;
    205       float[] regKnots = new float[n + 6];
    206 
    207       // sort xs together with ys and ws
    208       // combine rows with duplicate x values
    209       // transform x to range [0 .. 1]
    210       // create a set of knots (using a heuristic for the number of knots)
    211       // knots are located at data points. denser regions of x contain more knots.
    212       SBART.setreg_x64(xs, ys, ws,
    213         ref n, xw, ref nx, ref min, ref range, regKnots, ref nk);
    214 
    215       // in this case we want to use the knots supplied by the caller.
    216       // the knot values produced by setreg are overwritten with scaled knots supplied by caller.
    217       // knots must be ordered as well.
    218       int i = 0;
    219       // left boundary
    220       regKnots[i++] = 0.0f;
    221       regKnots[i++] = 0.0f;
    222       regKnots[i++] = 0.0f;
    223       regKnots[i++] = 0.0f;
    224       foreach (var knot in knots.OrderBy(ki=>ki)) {
    225         regKnots[i++] = ((float)knot - min) / range;
    226       }
    227       // right boundary
    228       regKnots[i++] = 1.0f;
    229       regKnots[i++] = 1.0f;
    230       regKnots[i++] = 1.0f;
    231       regKnots[i++] = 1.0f;
    232       nk = knots.Length + 4;
    233 
    234       float criterion = -99.0f; // GCV
    235       int icrit = 1; // calculate GCV
    236       float smoothingParameter = -99.0f;
    237       int smoothingParameterIndicator = 0;
    238       float lowerSmoothingParameter = 0.5f;
    239       float upperSmoothingParameter = 1.0f;
     194      int ier = 99;
     195      int tries = 0;
    240196      float tol = 0.01f;
    241       int isetup = 0; // not setup?
    242 
    243       // results
    244       float[] coeff = new float[nk];
    245       float[] leverage = new float[nx];
    246       float[] y_smoothed = new float[nx];
    247       int ier = -99;
    248 
    249 
    250       // working arrays for sbart
    251       float[] xwy = new float[nk];
    252       float[] hs0 = new float[nk];
    253       float[] hs1 = new float[nk];
    254       float[] hs2 = new float[nk];
    255       float[] hs3 = new float[nk];
    256       float[] sg0 = new float[nk];
    257       float[] sg1 = new float[nk];
    258       float[] sg2 = new float[nk];
    259       float[] sg3 = new float[nk];
    260       int ld4 = 4;
    261       float[,] adb = new float[ld4, nk];
    262 
    263       float[,] p1ip = new float[nk, ld4];
    264       int ldnk = nk + 4;
    265       float[,] p2ip = new float[nk, nx];
    266 
    267       SBART.sbart_x64(xs.Take(nx).ToArray(), ys.Take(nx).ToArray(), ws.Take(nx).ToArray(), ref nx,
    268         regKnots, ref nk,
    269         coeff, y_smoothed, leverage,
    270         ref criterion, ref icrit,
    271         ref smoothingParameter, ref smoothingParameterIndicator, ref lowerSmoothingParameter, ref upperSmoothingParameter,
    272         ref tol, ref isetup,
    273         xwy, hs0, hs1, hs2, hs3, sg0, sg1, sg2, sg3, adb, p1ip, p2ip, ref ld4, ref ldnk, ref ier);
    274 
    275       if (ier > 0) throw new ArgumentException(ier.ToString());
    276 
    277       rep = new SBART_Report();
    278       rep.gcv = criterion;
    279       rep.smoothingParameter = smoothingParameter;
    280       rep.leverage = leverage.Select(li => (double)li).ToArray();
    281 
    282       return new BartRegressionModel(regKnots.Take(nk+4).ToArray(), coeff, targetVariable, inputVars, min, range);
     197
     198      do {
     199        tries++;
     200        float[] xs = x.Select(xi => (float)xi).ToArray();
     201        float[] ys = y.Select(xi => (float)xi).ToArray();
     202        float[] ws = w.Select(xi => (float)xi).ToArray();
     203        float[] k = knots.Select(xi => (float)xi).ToArray();
     204
     205        int n = xs.Length;
     206        if (n < 4) throw new ArgumentException("n < 4");
     207        if (knots.Length > n + 2) throw new ArgumentException("more than n+2 knots");
     208        float[] xw = new float[n];
     209        int nx = -99;
     210        float min = 0.0f;
     211        float range = 0.0f;
     212        int nk = -99;
     213        float[] regKnots = new float[n + 6];
     214
     215        // sort xs together with ys and ws
     216        // combine rows with duplicate x values
     217        // transform x to range [0 .. 1]
     218        // create a set of knots (using a heuristic for the number of knots)
     219        // knots are located at data points. denser regions of x contain more knots.
     220        SBART.setreg_x64(xs, ys, ws,
     221          ref n, xw, ref nx, ref min, ref range, regKnots, ref nk);
     222
     223        // in this case we want to use the knots supplied by the caller.
     224        // the knot values produced by setreg are overwritten with scaled knots supplied by caller.
     225        // knots must be ordered as well.
     226        int i = 0;
     227        // left boundary
     228        regKnots[i++] = 0.0f;
     229        regKnots[i++] = 0.0f;
     230        regKnots[i++] = 0.0f;
     231        regKnots[i++] = 0.0f;
     232        int j = 1;
     233        foreach (var knot in knots.OrderBy(ki => ki)) {
     234          regKnots[i++] = xs[j * nx / (knots.Length + 1)];  // ((float)knot - min) / range;
     235          j++;
     236        }
     237        // right boundary
     238        regKnots[i++] = 1.0f;
     239        regKnots[i++] = 1.0f;
     240        regKnots[i++] = 1.0f;
     241        regKnots[i++] = 1.0f;
     242        nk = i - 4;
     243
     244        float criterion = -99.0f; // GCV
     245        int icrit = 1; // calculate GCV
     246        float smoothingParameter = -99.0f;
     247        int smoothingParameterIndicator = 0;
     248        float lowerSmoothingParameter = 0.0f;
     249        float upperSmoothingParameter = 1.0f;
     250        int isetup = 0; // not setup?
     251
     252        // results
     253        float[] coeff = new float[nk];
     254        float[] leverage = new float[nx];
     255        float[] y_smoothed = new float[nx];
     256
     257        // working arrays for sbart
     258        float[] xwy = new float[nk];
     259        float[] hs0 = new float[nk];
     260        float[] hs1 = new float[nk];
     261        float[] hs2 = new float[nk];
     262        float[] hs3 = new float[nk];
     263        float[] sg0 = new float[nk];
     264        float[] sg1 = new float[nk];
     265        float[] sg2 = new float[nk];
     266        float[] sg3 = new float[nk];
     267        int ld4 = 4;
     268        float[,] adb = new float[ld4, nk];
     269
     270        float[,] p1ip = new float[nk, ld4];
     271        int ldnk = nk + 4;
     272        float[,] p2ip = new float[nk, nx];
     273
     274        SBART.sbart_x64(xs.Take(nx).ToArray(), ys.Take(nx).ToArray(), ws.Take(nx).ToArray(), ref nx,
     275          regKnots, ref nk,
     276          coeff, y_smoothed, leverage,
     277          ref criterion, ref icrit,
     278          ref smoothingParameter, ref smoothingParameterIndicator, ref lowerSmoothingParameter, ref upperSmoothingParameter,
     279          ref tol, ref isetup,
     280          xwy, hs0, hs1, hs2, hs3, sg0, sg1, sg2, sg3, adb, p1ip, p2ip, ref ld4, ref ldnk, ref ier);
     281
     282
     283        if (ier > 0) {
     284          Console.WriteLine("ERROR {0} smooth {1}  criterion {2}", ier, smoothingParameter, criterion);
     285          tol *= 2;
     286          tol = Math.Min(tol, 1.0f);
     287        } else {
     288          if (tries > 1) {
     289            Console.WriteLine("Success {0} smooth {1}  criterion {2}", ier, smoothingParameter, criterion);
     290          }
     291          rep = new SBART_Report();
     292          rep.gcv = criterion;
     293          rep.smoothingParameter = smoothingParameter;
     294          rep.leverage = leverage.Select(li => (double)li).ToArray();
     295          return new BartRegressionModel(regKnots.Take(nk + 4).ToArray(), coeff, targetVariable, inputVars, min, range);
     296        }
     297      } while (ier > 0);
     298      throw new ArgumentException();
    283299    }
    284300
  • branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/Splines.cs

    r15468 r15469  
    223223          case "Penalized Regression Spline (alglib)": {
    224224              var lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value; // controls extent of smoothing
    225               var model = CalculatePenalizedRegressionSpline(x, y, lambda, Problem.ProblemData.TargetVariable, inputVars);
     225              var model = CalculatePenalizedRegressionSpline(x, y, lambda, x.Length, Problem.ProblemData.TargetVariable, inputVars);
    226226              var targetVar = Problem.ProblemData.TargetVariable;
    227227              var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
     
    237237
    238238
    239     public static IRegressionModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars) {
     239    public static IRegressionModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, int numKnots, string targetVar, string[] inputVars) {
    240240      int info;
    241241      alglib.spline1dinterpolant interpolant;
    242242      alglib.spline1dfitreport rep;
    243       int n = x.Length;
    244       n = (int)Math.Max(50, 3 * Math.Sqrt(n));
    245 
    246       alglib.spline1dfitpenalized(x, y, n, lambda, out info, out interpolant, out rep);
     243
     244      alglib.spline1dfitpenalized(x, y, numKnots, lambda, out info, out interpolant, out rep);
    247245      return new Spline1dModel(interpolant, targetVar, inputVars);
    248246    }
     
    728726      this.interpolant = (alglib.spline1dinterpolant)orig.interpolant.make_copy();
    729727    }
    730     internal Spline1dModel(
     728    public Spline1dModel(
    731729      alglib.spline1dinterpolant interpolant,
    732730      string targetVar, string[] inputs, double offset = 0, double scale = 1) : base("SplineModel", "SplineModel") {
  • branches/MathNetNumerics-Exploration-2789/Main/Program.cs

    r15459 r15469  
    1313    static void Main(string[] args) {
    1414
    15       var xs = HeuristicLab.Common.SequenceGenerator.GenerateSteps(-3.5, 3.5, 0.02, includeEnd: true).ToArray();
    16       var ys = xs.Select(xi => (1.0 / (Math.Sqrt(2 * Math.PI)) * Math.Exp(-0.5 * xi * xi))).ToArray();
    17 
    18       alglib.hqrndstate state;
    19       alglib.hqrndseed(1234, 5678, out state);
    20       var ys_noise = ys.Select(yi => yi + alglib.hqrndnormal(state) * 0.01).ToArray();
    21 
    22       SBART.SBART_Report rep;
    23       SBART.CalculateSBART(xs, ys_noise, "y", new string[] { "x" }, 1.0f, out rep);
     15       var xs = HeuristicLab.Common.SequenceGenerator.GenerateSteps(-3.5, 3.5, 0.02, includeEnd: true).ToArray();
     16       var ys = xs.Select(xi => (1.0 / (Math.Sqrt(2 * Math.PI)) * Math.Exp(-0.5 * xi * xi))).ToArray();
     17       var ws = xs.Select(_ => 1.0).ToArray();
     18 
     19       alglib.hqrndstate state;
     20       alglib.hqrndseed(1234, 5678, out state);
     21       var ys_noise = ys.Select(yi => yi + alglib.hqrndnormal(state) * 0.01).ToArray();
     22 
     23       SBART.SBART_Report rep;
     24       SBART.CalculateSBART(xs, ys_noise, ws, 2, "y", new string[] { "x" }, out rep);
    2425
    2526
  • branches/MathNetNumerics-Exploration-2789/Test/UnitTest1.cs

    r15450 r15469  
    4646        double loo_rmse;
    4747        double avgTrainRmse;
    48         Splines.CalculatePenalizedRegressionSpline(xs, ys_noise, rho, "y", new string[] { "x" }, out avgTrainRmse, out loo_rmse);
     48        double[] predictions;
     49        Splines.CalculatePenalizedRegressionSpline(xs, ys_noise, rho, "y", new string[] { "x" }, out avgTrainRmse, out loo_rmse, out predictions);
    4950        iters++;
    5051        Console.WriteLine("{0} {1} {2}", rho, avgTrainRmse, loo_rmse);
Note: See TracChangeset for help on using the changeset viewer.