Changeset 15457


Ignore:
Timestamp:
11/07/17 17:03:02 (22 months ago)
Author:
gkronber
Message:

#2789 added Finbarr O'Sullivan smoothing spline code

Location:
branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental
Files:
165 added
5 edited

Legend:

Unmodified
Added
Removed
  • branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/CubicSplineGCV.cs

    r15449 r15457  
    188188      double[] df = Enumerable.Repeat(1.0, n).ToArray();   // set each df if actual standard deviation of data points is not known
    189189      double[,] c = new double[3, n - 1];
    190       double var = -99.0;
    191190      double[] se = new double[n]; // standard errors in points
    192191      double[,] wk = new double[7, (n + 2)]; // work array;
     
    195194      int ic = n - 1;
    196195      int ier = -99;
     196      double var = -99.0;
    197197      if (Environment.Is64BitProcess) {
    198198        CubicSplineGCV.cubgcv_x64(x2, y2, df, ref n, y_smoothed,
     
    244244      report.meanSquareOfDf = wk[0, 6];
    245245      report.se = se;
    246       Debug.Assert(var == report.estimatedErrorVariance);
     246      // Debug.Assert(var == report.estimatedErrorVariance);
    247247
    248248
  • branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/GAM.cs

    r15450 r15457  
    9696    protected override void Run(CancellationToken cancellationToken) {
    9797      double lambda = Lambda;
    98       int maxIters = MaxIterations ;
     98      int maxIters = MaxIterations;
    9999      int maxInteractions = MaxInteractions;
    100100      if (maxInteractions < 1 || maxInteractions > 5) throw new ArgumentException("Max interactions is outside the valid range [1 .. 5]");
     
    133133
    134134      var combinations = new List<string[]>();
    135       for(int i=1;i<=maxInteractions;i++)
     135      for (int i = 1; i <= maxInteractions; i++)
    136136        combinations.AddRange(HeuristicLab.Common.EnumerableExtensions.Combinations(inputVars, i).Select(c => c.ToArray()));
    137137      // combinations.Add(new string[] { "X1", "X2" });
     
    243243
    244244        // find tolerance
    245         var bestLambda = -5.0;
    246         double bestCVRMSE = double.PositiveInfinity;
     245        //var bestLambda = double.NaN;
     246        double bestCVRMSE = target.StandardDeviation();
    247247        double avgTrainRMSE = double.PositiveInfinity;
    248         for (double curLambda = -5.0; curLambda <= 6.0; curLambda += 1.0) {
    249           var ensemble = Splines.CalculatePenalizedRegressionSpline(product, (double[])target.Clone(), curLambda, problemData.TargetVariable,  inputVars, out avgTrainRMSE, out cvRMSE);
    250           Console.Write("{0} {1} {2}", curLambda, avgTrainRMSE, cvRMSE);
    251           if (bestCVRMSE > cvRMSE) {
    252             Console.Write(" *");
    253             bestCVRMSE = cvRMSE;
    254             bestLambda = curLambda;
    255           }
    256           Console.WriteLine();
    257         }
     248        double[] bestPredictions = new double[target.Length]; // zero
     249
     250
     251        //double[] bestSSE = target.Select(ti => ti*ti).ToArray(); // target - zero
     252        //for (double curLambda = 6.0; curLambda >= -6.0; curLambda -= 1.0) {
     253        //  double[] predictions;
     254        //  var ensemble = Splines.CalculatePenalizedRegressionSpline(product, (double[])target.Clone(), curLambda, problemData.TargetVariable, inputVars, out avgTrainRMSE, out cvRMSE, out predictions);
     255        //  double[] sse = target.Zip(predictions, (t, p) => (t - p)*(t-p)).ToArray();
     256        //  // Console.Write("{0} {1} {2}", curLambda, avgTrainRMSE, cvRMSE);
     257        //  double bothTails = .0, leftTail = .0, rightTail = .0;
     258        //  alglib.stest.onesamplesigntest(bestSSE.Zip(sse, (a, b) => a-b).ToArray(), predictions.Length, 0.0, ref bothTails, ref leftTail, ref rightTail);
     259        //  if (bothTails < 0.1 && bestCVRMSE > cvRMSE) {
     260        //    Console.Write(" *");
     261        //    bestCVRMSE = cvRMSE;
     262        //    bestLambda = curLambda;
     263        //    bestSSE = sse;
     264        //    bestPredictions = predictions;
     265        //  }
     266        //  // Console.WriteLine();
     267        //}
     268        //if (double.IsNaN(bestLambda)) {
     269        //  return new ConstantModel(target.Average(), problemData.TargetVariable);
     270        //} else {
    258271        // train on whole data
    259        return Splines.CalculatePenalizedRegressionSpline(product, (double[])target.Clone(), bestLambda, problemData.TargetVariable, inputVars, out avgTrainRMSE, out cvRMSE);
     272
     273
     274        // return Splines.CalculatePenalizedRegressionSpline(product, (double[])target.Clone(), lambda, problemData.TargetVariable, inputVars, out avgTrainRMSE, out cvRMSE, out bestPredictions);
     275        SBART.SBART_Report rep;
     276        var model = SBART.CalculateSBART(product, (double[])target.Clone(), problemData.TargetVariable, inputVars, (float)lambda, out rep);
     277        Console.WriteLine("{0} {1:N5} {2:N5} {3:N5} {4:N5}", string.Join(",", inputVars), rep.gcv, rep.leverage.Sum(), product.StandardDeviation(), target.StandardDeviation());
     278        return model;
     279        // }
    260280
    261281      } else return new ConstantModel(target.Average(), problemData.TargetVariable);
  • branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/HeuristicLab.Algorithms.DataAnalysis.Experimental.csproj

    r15449 r15457  
    151151  </ItemGroup>
    152152  <ItemGroup>
     153    <Compile Include="SBART.cs" />
    153154    <Compile Include="CubicSplineGCV.cs" />
    154155    <Compile Include="ForwardSelection.cs" />
     
    183184    <Content Include="FSharp.Quotations.Evaluator.dll">
    184185      <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
     186    </Content>
     187    <Content Include="sbart_x64.dll">
     188      <CopyToOutputDirectory>Always</CopyToOutputDirectory>
    185189    </Content>
    186190  </ItemGroup>
  • branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/Splines.cs

    r15450 r15457  
    7070          "B-Spline Smoothing",
    7171          "Penalized Regression Spline (alglib)",
    72           "CUBGCV"
     72          "CUBGCV",
     73          "Finnbar O'Sullivan (sbart)"
    7374      }.Select(s => new StringValue(s)));
    7475
     
    172173              break;
    173174            }
     175          case "Finnbar O'Sullivan (sbart)": {
     176              SBART.SBART_Report report;
     177              var lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value; // controls extent of smoothing
     178              var model =
     179                SBART.CalculateSBART(x, y,
     180                Problem.ProblemData.TargetVariable, inputVars, (float)lambda, out report);
     181              var targetVar = Problem.ProblemData.TargetVariable;
     182              var problemData = (IRegressionProblemData)Problem.ProblemData.Clone();
     183              Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData)));
     184              Results.Add(new Result("GCV", new DoubleValue(report.gcv)));
     185              Results.Add(new Result("Smoothing parameter (optimized for GCV)", new DoubleValue(report.smoothingParameter)));
     186              break;
     187
     188            }
    174189          case "B-Spline Smoothing": {
    175190              BSplineSmoothing(x, y, inputVars);
     
    205220    public static IRegressionEnsembleModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars,
    206221      out double avgTrainRMSE,
    207       out double cvRMSE) {
     222      out double cvRMSE, out double[] predictions) {
    208223      int info;
    209224      alglib.spline1dinterpolant interpolant;
     
    218233      double[] x_loo = new double[(folds - 1) * foldSize];
    219234      double[] y_loo = new double[(folds - 1) * foldSize];
     235      double[] w_loo = Enumerable.Repeat(1.0, x_loo.Length).ToArray();
    220236
    221237      var models = new List<IRegressionModel>();
     
    225241      int nTest = 0;
    226242
     243      predictions = new double[y.Length];
    227244
    228245      for (int i = 0; i < folds; i++) {
     
    232249          Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, (folds - i - 1) * foldSize);
    233250        }
    234         alglib.spline1dfitpenalized(x_loo, y_loo, n, lambda, out info, out interpolant, out rep);
    235         Debug.Assert(y_loo.All(yi => yi != 0.0));
    236         Debug.Assert(x_loo.All(xi => xi != 0.0));
     251        alglib.spline1dfitpenalizedw(x_loo, y_loo, w_loo, n, lambda, out info, out interpolant, out rep);
     252        //Debug.Assert(y_loo.All(yi => yi != 0.0));
     253        //Debug.Assert(x_loo.All(xi => xi != 0.0));
    237254
    238255        // training error
    239256        for (int j = 0; j < x_loo.Length; j++) {
    240           var y_pred = alglib.spline1dcalc(interpolant, x[i]);
     257          var y_pred = alglib.spline1dcalc(interpolant, x[j]);
    241258          double res = y[j] - y_pred;
    242259          sumTrainSE += res * res;
     
    245262        // test
    246263        for (int j = i * foldSize; j < (i + 1) * foldSize; j++) {
    247           var y_pred = alglib.spline1dcalc(interpolant, x[i]);
    248           double res = y[j] - y_pred;
     264          predictions[j] = alglib.spline1dcalc(interpolant, x[j]);
     265          double res = y[j] - predictions[j];
    249266          sumTestSE += res * res;
    250267          nTest++;
     
    630647      int n = x.Length;
    631648      var range = x[n - 1] - x[0];
    632       var binSize = range / n / 20;
     649      var binSize = range / n / 10;
    633650      {
    634651        // binning
     
    759776
    760777    public double GetEstimatedValue(double xx) {
     778      xx = (xx - offset) * scale;
    761779      int n = a.Length;
    762780      if (xx <= x[1]) {
     
    791809        product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray();
    792810      }
    793       foreach (var xx in product.Select(xi => (xi - offset) * scale)) {
     811      foreach (var xx in product) {
    794812        yield return GetEstimatedValue(xx);
    795813      }
Note: See TracChangeset for help on using the changeset viewer.