Ignore:
Timestamp:
11/06/17 13:12:41 (4 years ago)
Author:
gkronber
Message:

#2789 more tests with CV and automatic determination of smoothing parameter

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/GAM.cs

    r15449 r15450  
    111111        nTerms += inputVars.Combinations(i).Count();
    112112      }
     113
    113114      IRegressionModel[] f = new IRegressionModel[nTerms];
    114115      for (int i = 0; i < f.Length; i++) {
     
    130131      string[] terms = new string[f.Length];
    131132      Results.Add(new Result("RSS Values", typeof(DoubleMatrix)));
     133
     134      var combinations = new List<string[]>();
     135      for(int i=1;i<=maxInteractions;i++)
     136        combinations.AddRange(HeuristicLab.Common.EnumerableExtensions.Combinations(inputVars, i).Select(c => c.ToArray()));
     137      // combinations.Add(new string[] { "X1", "X2" });
     138      // combinations.Add(new string[] { "X3", "X4" });
     139      // combinations.Add(new string[] { "X5", "X6" });
     140      // combinations.Add(new string[] { "X1", "X7", "X9" });
     141      // combinations.Add(new string[] { "X3", "X6", "X10" });
     142
     143
    132144
    133145      // until convergence
     
    144156        //}
    145157
    146         for (int interaction = 1; interaction <= maxInteractions; interaction++) {
    147           var selectedVars = HeuristicLab.Common.EnumerableExtensions.Combinations(inputVars, interaction);
    148 
    149           foreach (var element in selectedVars) {
    150             var res = CalculateResiduals(problemData, f, j, avgY, problemData.TrainingIndices);
    151             rss[j] = res.Variance();
    152             terms[j] = string.Format("f({0})", string.Join(",", element));
    153             f[j] = RegressSpline(problemData, element.ToArray(), res, lambda);
    154             j++;
    155           }
     158
     159
     160        foreach (var element in combinations) {
     161          var res = CalculateResiduals(problemData, f, j, avgY, problemData.TrainingIndices);
     162          rss[j] = res.Variance();
     163          terms[j] = string.Format("f({0})", string.Join(",", element));
     164          f[j] = RegressSpline(problemData, element.ToArray(), res, lambda);
     165          j++;
    156166        }
    157167
     
    218228          product = product.Zip(problemData.Dataset.GetDoubleValues(inputVars[i], problemData.TrainingIndices), (pi, vi) => pi * vi).ToArray();
    219229        }
    220         CubicSplineGCV.CubGcvReport report;
    221         return CubicSplineGCV.CalculateCubicSpline(
    222           product,
    223           (double[])target.Clone(),
    224           problemData.TargetVariable, inputVars, out report
    225           );
     230        // CubicSplineGCV.CubGcvReport report;
     231        // return CubicSplineGCV.CalculateCubicSpline(
     232        //   product,
     233        //   (double[])target.Clone(),
     234        //   problemData.TargetVariable, inputVars, out report
     235        //   );
     236
     237        double optTolerance; double cvRMSE;
     238        // find tolerance
     239        // var ensemble = Splines.CalculateSmoothingSplineReinsch(product, (double[])target.Clone(), inputVars, problemData.TargetVariable, out optTolerance, out cvRMSE);
     240        // // train on whole data
     241        // return Splines.CalculateSmoothingSplineReinsch(product, (double[])target.Clone(), inputVars, optTolerance, product.Length - 1, problemData.TargetVariable);
     242
     243
     244        // find tolerance
     245        var bestLambda = -5.0;
     246        double bestCVRMSE = double.PositiveInfinity;
     247        double avgTrainRMSE = double.PositiveInfinity;
     248        for (double curLambda = -5.0; curLambda <= 6.0; curLambda += 1.0) {
     249          var ensemble = Splines.CalculatePenalizedRegressionSpline(product, (double[])target.Clone(), curLambda, problemData.TargetVariable,  inputVars, out avgTrainRMSE, out cvRMSE);
     250          Console.Write("{0} {1} {2}", curLambda, avgTrainRMSE, cvRMSE);
     251          if (bestCVRMSE > cvRMSE) {
     252            Console.Write(" *");
     253            bestCVRMSE = cvRMSE;
     254            bestLambda = curLambda;
     255          }
     256          Console.WriteLine();
     257        }
     258        // train on whole data
     259       return Splines.CalculatePenalizedRegressionSpline(product, (double[])target.Clone(), bestLambda, problemData.TargetVariable, inputVars, out avgTrainRMSE, out cvRMSE);
     260
    226261      } else return new ConstantModel(target.Average(), problemData.TargetVariable);
    227262    }
Note: See TracChangeset for help on using the changeset viewer.