Changeset 15457
- Timestamp:
- 11/07/17 17:03:02 (7 years ago)
- Location:
- branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental
- Files:
-
- 165 added
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/CubicSplineGCV.cs
r15449 r15457 188 188 double[] df = Enumerable.Repeat(1.0, n).ToArray(); // set each df if actual standard deviation of data points is not known 189 189 double[,] c = new double[3, n - 1]; 190 double var = -99.0;191 190 double[] se = new double[n]; // standard errors in points 192 191 double[,] wk = new double[7, (n + 2)]; // work array; … … 195 194 int ic = n - 1; 196 195 int ier = -99; 196 double var = -99.0; 197 197 if (Environment.Is64BitProcess) { 198 198 CubicSplineGCV.cubgcv_x64(x2, y2, df, ref n, y_smoothed, … … 244 244 report.meanSquareOfDf = wk[0, 6]; 245 245 report.se = se; 246 Debug.Assert(var == report.estimatedErrorVariance);246 // Debug.Assert(var == report.estimatedErrorVariance); 247 247 248 248 -
branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/GAM.cs
r15450 r15457 96 96 protected override void Run(CancellationToken cancellationToken) { 97 97 double lambda = Lambda; 98 int maxIters = MaxIterations 98 int maxIters = MaxIterations; 99 99 int maxInteractions = MaxInteractions; 100 100 if (maxInteractions < 1 || maxInteractions > 5) throw new ArgumentException("Max interactions is outside the valid range [1 .. 5]"); … … 133 133 134 134 var combinations = new List<string[]>(); 135 for (int i=1;i<=maxInteractions;i++)135 for (int i = 1; i <= maxInteractions; i++) 136 136 combinations.AddRange(HeuristicLab.Common.EnumerableExtensions.Combinations(inputVars, i).Select(c => c.ToArray())); 137 137 // combinations.Add(new string[] { "X1", "X2" }); … … 243 243 244 244 // find tolerance 245 var bestLambda = -5.0;246 double bestCVRMSE = double.PositiveInfinity;245 //var bestLambda = double.NaN; 246 double bestCVRMSE = target.StandardDeviation(); 247 247 double avgTrainRMSE = double.PositiveInfinity; 248 for (double curLambda = -5.0; curLambda <= 6.0; curLambda += 1.0) { 249 var ensemble = Splines.CalculatePenalizedRegressionSpline(product, (double[])target.Clone(), curLambda, problemData.TargetVariable, inputVars, out avgTrainRMSE, out cvRMSE); 250 Console.Write("{0} {1} {2}", curLambda, avgTrainRMSE, cvRMSE); 251 if (bestCVRMSE > cvRMSE) { 252 Console.Write(" *"); 253 bestCVRMSE = cvRMSE; 254 bestLambda = curLambda; 255 } 256 Console.WriteLine(); 257 } 248 double[] bestPredictions = new double[target.Length]; // zero 249 250 251 //double[] bestSSE = target.Select(ti => ti*ti).ToArray(); // target - zero 252 //for (double curLambda = 6.0; curLambda >= -6.0; curLambda -= 1.0) { 253 // double[] predictions; 254 // var ensemble = Splines.CalculatePenalizedRegressionSpline(product, (double[])target.Clone(), curLambda, problemData.TargetVariable, inputVars, out avgTrainRMSE, out cvRMSE, out predictions); 255 // double[] sse = target.Zip(predictions, (t, p) => (t - p)*(t-p)).ToArray(); 256 // // Console.Write("{0} {1} {2}", curLambda, avgTrainRMSE, cvRMSE); 257 // double bothTails = .0, leftTail = .0, rightTail = .0; 258 // alglib.stest.onesamplesigntest(bestSSE.Zip(sse, (a, b) => a-b).ToArray(), predictions.Length, 0.0, ref bothTails, ref leftTail, ref rightTail); 259 // if (bothTails < 0.1 && bestCVRMSE > cvRMSE) { 260 // Console.Write(" *"); 261 // bestCVRMSE = cvRMSE; 262 // bestLambda = curLambda; 263 // bestSSE = sse; 264 // bestPredictions = predictions; 265 // } 266 // // Console.WriteLine(); 267 //} 268 //if (double.IsNaN(bestLambda)) { 269 // return new ConstantModel(target.Average(), problemData.TargetVariable); 270 //} else { 258 271 // train on whole data 259 return Splines.CalculatePenalizedRegressionSpline(product, (double[])target.Clone(), bestLambda, problemData.TargetVariable, inputVars, out avgTrainRMSE, out cvRMSE); 272 273 274 // return Splines.CalculatePenalizedRegressionSpline(product, (double[])target.Clone(), lambda, problemData.TargetVariable, inputVars, out avgTrainRMSE, out cvRMSE, out bestPredictions); 275 SBART.SBART_Report rep; 276 var model = SBART.CalculateSBART(product, (double[])target.Clone(), problemData.TargetVariable, inputVars, (float)lambda, out rep); 277 Console.WriteLine("{0} {1:N5} {2:N5} {3:N5} {4:N5}", string.Join(",", inputVars), rep.gcv, rep.leverage.Sum(), product.StandardDeviation(), target.StandardDeviation()); 278 return model; 279 // } 260 280 261 281 } else return new ConstantModel(target.Average(), problemData.TargetVariable); -
branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/HeuristicLab.Algorithms.DataAnalysis.Experimental.csproj
r15449 r15457 151 151 </ItemGroup> 152 152 <ItemGroup> 153 <Compile Include="SBART.cs" /> 153 154 <Compile Include="CubicSplineGCV.cs" /> 154 155 <Compile Include="ForwardSelection.cs" /> … … 183 184 <Content Include="FSharp.Quotations.Evaluator.dll"> 184 185 <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory> 186 </Content> 187 <Content Include="sbart_x64.dll"> 188 <CopyToOutputDirectory>Always</CopyToOutputDirectory> 185 189 </Content> 186 190 </ItemGroup> -
branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/Splines.cs
r15450 r15457 70 70 "B-Spline Smoothing", 71 71 "Penalized Regression Spline (alglib)", 72 "CUBGCV" 72 "CUBGCV", 73 "Finnbar O'Sullivan (sbart)" 73 74 }.Select(s => new StringValue(s))); 74 75 … … 172 173 break; 173 174 } 175 case "Finnbar O'Sullivan (sbart)": { 176 SBART.SBART_Report report; 177 var lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value; // controls extent of smoothing 178 var model = 179 SBART.CalculateSBART(x, y, 180 Problem.ProblemData.TargetVariable, inputVars, (float)lambda, out report); 181 var targetVar = Problem.ProblemData.TargetVariable; 182 var problemData = (IRegressionProblemData)Problem.ProblemData.Clone(); 183 Results.Add(new Result("Solution", model.CreateRegressionSolution(problemData))); 184 Results.Add(new Result("GCV", new DoubleValue(report.gcv))); 185 Results.Add(new Result("Smoothing parameter (optimized for GCV)", new DoubleValue(report.smoothingParameter))); 186 break; 187 188 } 174 189 case "B-Spline Smoothing": { 175 190 BSplineSmoothing(x, y, inputVars); … … 205 220 public static IRegressionEnsembleModel CalculatePenalizedRegressionSpline(double[] x, double[] y, double lambda, string targetVar, string[] inputVars, 206 221 out double avgTrainRMSE, 207 out double cvRMSE ) {222 out double cvRMSE, out double[] predictions) { 208 223 int info; 209 224 alglib.spline1dinterpolant interpolant; … … 218 233 double[] x_loo = new double[(folds - 1) * foldSize]; 219 234 double[] y_loo = new double[(folds - 1) * foldSize]; 235 double[] w_loo = Enumerable.Repeat(1.0, x_loo.Length).ToArray(); 220 236 221 237 var models = new List<IRegressionModel>(); … … 225 241 int nTest = 0; 226 242 243 predictions = new double[y.Length]; 227 244 228 245 for (int i = 0; i < folds; i++) { … … 232 249 Array.Copy(y, (i + 1) * foldSize, y_loo, i * foldSize, (folds - i - 1) * foldSize); 233 250 } 234 alglib.spline1dfitpenalized (x_loo, y_loo, n, lambda, out info, out interpolant, out rep);235 Debug.Assert(y_loo.All(yi => yi != 0.0));236 Debug.Assert(x_loo.All(xi => xi != 0.0));251 alglib.spline1dfitpenalizedw(x_loo, y_loo, w_loo, n, lambda, out info, out interpolant, out rep); 252 //Debug.Assert(y_loo.All(yi => yi != 0.0)); 253 //Debug.Assert(x_loo.All(xi => xi != 0.0)); 237 254 238 255 // training error 239 256 for (int j = 0; j < x_loo.Length; j++) { 240 var y_pred = alglib.spline1dcalc(interpolant, x[ i]);257 var y_pred = alglib.spline1dcalc(interpolant, x[j]); 241 258 double res = y[j] - y_pred; 242 259 sumTrainSE += res * res; … … 245 262 // test 246 263 for (int j = i * foldSize; j < (i + 1) * foldSize; j++) { 247 var y_pred = alglib.spline1dcalc(interpolant, x[i]);248 double res = y[j] - y_pred;264 predictions[j] = alglib.spline1dcalc(interpolant, x[j]); 265 double res = y[j] - predictions[j]; 249 266 sumTestSE += res * res; 250 267 nTest++; … … 630 647 int n = x.Length; 631 648 var range = x[n - 1] - x[0]; 632 var binSize = range / n / 20;649 var binSize = range / n / 10; 633 650 { 634 651 // binning … … 759 776 760 777 public double GetEstimatedValue(double xx) { 778 xx = (xx - offset) * scale; 761 779 int n = a.Length; 762 780 if (xx <= x[1]) { … … 791 809 product = product.Zip(dataset.GetDoubleValues(factor, rows), (pi, fi) => pi * fi).ToArray(); 792 810 } 793 foreach (var xx in product .Select(xi => (xi - offset) * scale)) {811 foreach (var xx in product) { 794 812 yield return GetEstimatedValue(xx); 795 813 }
Note: See TracChangeset
for help on using the changeset viewer.