Changeset 15364
- Timestamp:
- 09/15/17 18:20:14 (7 years ago)
- Location:
- branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/HeuristicLab.Algorithms.DataAnalysis.Experimental.csproj
r15362 r15364 134 134 <ItemGroup> 135 135 <Compile Include="ForwardSelection.cs" /> 136 <Compile Include="GAM.cs" /> 136 137 <Compile Include="RBF.cs" /> 137 138 <Compile Include="Splines.cs" /> -
branches/MathNetNumerics-Exploration-2789/HeuristicLab.Algorithms.DataAnalysis.Experimental/Splines.cs
r15362 r15364 37 37 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression; 38 38 using MathNet.Numerics.Interpolation; 39 using MathNet.Numerics.LinearAlgebra.Double; 40 using MathNet.Numerics.LinearAlgebra.Double.Solvers; 39 41 40 42 namespace HeuristicLab.Algorithms.DataAnalysis.Experimental { … … 66 68 "LogLinear (Math.NET)", 67 69 "Common (Math.NET)", 70 "Smoothing Spline (Mine)", 71 "Smoothing Spline (Reinsch)", 68 72 }.Select(s => new StringValue(s))); 69 73 70 74 Parameters.Add(new ConstrainedValueParameter<StringValue>("Type", "The type of spline (as supported by alglib)", validTypes, validTypes.First())); 75 Parameters.Add(new ValueParameter<DoubleValue>("Lambda", "Regularization parameter for smoothing splines (0..+inf)", new DoubleValue(100))); 71 76 Problem = new RegressionProblem(); 72 77 } … … 89 94 switch (type) { 90 95 case "Monotone": 91 alglib.spline1dbuildmonotone(x, y, out c); 96 alglib.spline1dbuildmonotone(x, y, out c); 92 97 AddAlglibSplineResult(c, inputVars); 93 98 break; … … 132 137 break; 133 138 } 139 case "Smoothing Spline (Mine)": { 140 CalculateSmoothingSpline(x, y, inputVars); 141 break; 142 } 143 case "Smoothing Spline (Reinsch)": { 144 CalculateSmoothingSplineReinsch(x, y, inputVars); 145 break; 146 } 134 147 default: throw new NotSupportedException(); 135 148 } 136 149 137 150 } 151 } 152 153 // array with non-zero lower index 154 private class MyArray<T> { 155 private T[] arr; 156 private int lowerBound; 157 158 public T this[int key] { 159 get { 160 return arr[key - lowerBound]; 161 } 162 set { 163 arr[key - lowerBound] = value; 164 } 165 } 166 167 public MyArray(int lowerBound, int numElements) { 168 this.lowerBound = lowerBound; 169 arr = new T[numElements]; 170 } 171 public MyArray(int lowerBound, T[] source) : this(lowerBound, source.Length) { 172 Array.Copy(source, arr, source.Length); 173 } 174 175 public T[] ToArray() { 176 var res = new T[arr.Length]; 177 Array.Copy(arr, res, res.Length); 178 return res; 179 } 180 } 181 182 public void CalculateSmoothingSplineReinsch(double[] xOrig, double[] yOrig, string[] inputVars) { 183 double s = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value; // controls extent of smoothing 184 185 var model = CalculateSmoothingSplineReinsch(xOrig, yOrig, inputVars, s, Problem.ProblemData.TargetVariable); 186 187 Results.Add(new Result("Solution", new RegressionSolution(model, (IRegressionProblemData)Problem.ProblemData.Clone()))); 188 189 190 } 191 192 public static IRegressionModel CalculateSmoothingSplineReinsch(double[] xOrig, double[] yOrig, string[] inputVars, double s, string targetVar) { 193 var minX = xOrig.Min(); 194 var maxX = xOrig.Max(); 195 var range = maxX - minX; 196 197 SortAndBin(xOrig, yOrig, out xOrig, out yOrig, scaling: true); 198 199 // See Smoothing by Spline Functions, Reinsch, 1967 200 // move x and y into an array indexed with 1..n to match indexing in Reinsch paper 201 int n = xOrig.Length; 202 var x = new MyArray<double>(1, xOrig); 203 var y = new MyArray<double>(1, yOrig); 204 var inv_dy = new MyArray<double>(1, Enumerable.Repeat(1.0, n).ToArray()); // equal weights 205 206 int n1 = 1; 207 int n2 = n - 1; 208 209 // results 210 var a = new MyArray<double>(n1, n); 211 var b = new MyArray<double>(n1, n); 212 var c = new MyArray<double>(n1, n); 213 var d = new MyArray<double>(n1, n); 214 215 // smooth 216 { 217 int i, m1, m2; double e, f, f2, g = 0.0, h, p; 218 MyArray<double> 219 r = new MyArray<double>(n1 - 1, n + 2), 220 r1 = new MyArray<double>(n1 - 1, n + 2), 221 r2 = new MyArray<double>(n1 - 1, n + 2), 222 t = new MyArray<double>(n1 - 1, n + 2), 223 t1 = new MyArray<double>(n1 - 1, n + 2), 224 u = new MyArray<double>(n1 - 1, n + 2), 225 v = new MyArray<double>(n1 - 1, n + 2); 226 m1 = n1 - 1; 227 m2 = n2 + 1; 228 r[m1] = r[n1] = r1[n2] = r2[n2] = r2[m2] = 229 u[m1] = u[n1] = u[n2] = u[m2] = p = 0.0; 230 m1 = n1 + 1; 231 m2 = n2 - 1; 232 h = x[m1] - x[n1]; f = (y[m1] - y[n1]) / h; 233 for (i = m1; i <= m2; i++) { 234 g = h; h = x[i + 1] - x[i]; 235 e = f; f = (y[i + 1] - y[i]) / h; 236 a[i] = f - e; t[i] = 2 * (g + h) / 3; t1[i] = h / 3; 237 r2[i] = inv_dy[i - 1] / g; r[i] = inv_dy[i + 1] / h; 238 r1[i] = -inv_dy[i] / g - inv_dy[i] / h; 239 } 240 for (i = m1; i <= m2; i++) { 241 b[i] = r[i] * r[i] + r1[i] * r1[i] + r2[i] * r2[i]; 242 c[i] = r[i] * r1[i + 1] + r1[i] * r2[i + 1]; 243 d[i] = r[i] * r2[i + 2]; 244 } 245 f2 = -s; 246 next: 247 for (i = m1; i <= m2; i++) { 248 r1[i - 1] = f * r[i - 1]; r2[i - 2] = g * r[i - 2]; 249 r[i] = 1 / (p * b[i] + t[i] - f * r1[i - 1] - g * r2[i - 2]); 250 u[i] = a[i] - r1[i - 1] * u[i - 1] - r2[i - 2] * u[i - 2]; 251 f = p * c[i] + t1[i] - h * r1[i - 1]; g = h; h = d[i] * p; 252 } 253 for (i = m2; i >= m1; i--) { 254 u[i] = r[i] * u[i] - r1[i] * u[i + 1] - r2[i] * u[i + 2]; 255 } 256 e = h = 0; 257 for (i = n1; i <= m2; i++) { 258 g = h; h = (u[i + 1] - u[i]) / (x[i + 1] - x[i]); 259 v[i] = (h - g) * inv_dy[i] * inv_dy[i]; e = e + v[i] * (h - g); 260 } 261 g = v[n2] = -h * inv_dy[n2] * inv_dy[n2]; e = e - g * h; 262 g = f2; f2 = e * p * p; 263 if (f2 >= s || f2 <= g) goto fin; 264 f = 0; h = (v[m1] - v[n1]) / (x[m1] - x[n1]); 265 for (i = m1; i <= m2; i++) { 266 g = h; h = (v[i + 1] - v[i]) / (x[i + 1] - x[i]); 267 g = h - g - r1[i - 1] * r[i - 1] - r2[i - 2] * r[i - 2]; 268 f = f + g * r[i] * g; r[i] = g; 269 } 270 h = e - p * f; if (h <= 0) goto fin; 271 p = p + (s - f2) / ((Math.Sqrt(s / e) + p) * h); goto next; 272 273 fin: 274 for (i = n1; i <= n2; i++) { 275 a[i] = y[i] - p * v[i]; 276 c[i] = u[i]; 277 } 278 for (i = n1; i <= m2; i++) { 279 h = x[i + 1] - x[i]; 280 d[i] = (c[i + 1] - c[i]) / (3 * h); 281 b[i] = (a[i + 1] - a[i]) / h - (h * d[i] + c[i]) * h; 282 } 283 } 284 285 return new ReinschSmoothingSplineModel(a.ToArray(), b.ToArray(), c.ToArray(), d.ToArray(), xOrig, targetVar, inputVars, minX, 1.0 / range); 286 } 287 288 private void CalculateSmoothingSpline(double[] x, double[] y, string[] inputVars) { 289 // see Smoothing and Non-Parametric Regression, Germán Rodríguez, 2001 2.3.1 290 SortAndBin(x, y, out x, out y, scaling: false); 291 int n = x.Length; 292 293 SparseMatrix delta = new SparseMatrix(n - 2, n); 294 // double[,] delta = new double[n - 2, n]; 295 //double[,] W = new double[n - 2, n - 2]; 296 SparseMatrix W = new SparseMatrix(n - 2, n - 2); 297 Matrix WInvD = new DenseMatrix(n - 2, n); 298 299 // double[,] W_inv_D = new double[n - 2, n]; 300 // double[,] K = new double[n, n]; 301 302 // go over successive knots to determine distances and fill Delta and W 303 for (int i = 0; i < n - 2; i++) { 304 double h = x[i + 1] - x[i]; 305 double h_next = x[i + 2] - x[i + 1]; 306 delta[i, i] = 1.0 / h; 307 delta[i, i + 1] = -1.0 / h - 1.0 / h_next; 308 delta[i, i + 2] = 1.0 / h_next; 309 W[i, i] = (h + h_next) / 3; 310 if (i > 0) { 311 W[i - 1, i] = 312 W[i, i - 1] = h / 6.0; 313 } 314 } 315 316 var solvResult = W.TrySolveIterative(delta, WInvD, new MlkBiCgStab()); 317 318 // alglib.ablas.rmatrixgemm(n - 2, n, n - 2, 1.0, W, 0, 0, 0, delta, 0, 0, 0, 1.0, W_inv_D, 0, 0); // W^-1(M = n-2, K = n-2) D(K = n-2, N=n) 319 // alglib.ablas.rmatrixgemm(n, n, n - 2, 1.0, delta, 0, 0, 1, W_inv_D, 0, 0, 0, 1.0, K, 0, 0); // D(M=n-2, K=n)^T * W^-1D (K=n, N=n-2) 320 321 var K = delta.TransposeThisAndMultiply(WInvD); 322 323 double lambda = ((IValueParameter<DoubleValue>)Parameters["Lambda"]).Value.Value; 324 325 for (int i = 0; i < n; i++) { 326 for (int j = 0; j < n; j++) { 327 K[i, j] *= lambda; 328 if (i == j) K[i, j] += 1; 329 } 330 } 331 332 // solve for y 333 // double[] s; 334 // int solverInfo; 335 // alglib.densesolverreport solverRep; 336 // alglib.rmatrixsolve(K, n, y, out solverInfo, out solverRep, out s); 337 338 var s = K.Solve(new DenseVector(y)).ToArray(); 339 340 Results.Add(new Result("Solution", new RegressionSolution(new SmoothingSplineModel(s, x, Problem.ProblemData.TargetVariable, inputVars), 341 (IRegressionProblemData)Problem.ProblemData.Clone()))); 342 } 343 344 private static void SortAndBin(double[] x, double[] y, out double[] x2, out double[] y2, bool scaling = false) { 345 // sort y by x 346 Array.Sort(x, y); 347 348 var xl = new List<double>(); 349 var yl = new List<double>(); 350 351 int n = x.Length; 352 var range = x[n - 1] - x[0]; 353 var binSize = range / n / 20; 354 { 355 // binning 356 int i = 0; 357 while (i < n) { 358 int k = 0; 359 int j = i; 360 double sumX = 0.0; 361 double sumY = 0.0; 362 while (j < n && x[j] - x[i] <= binSize) { 363 k++; 364 sumX += x[j]; 365 sumY += y[j]; 366 j++; 367 } 368 var avgX = sumX / k; 369 if (scaling) avgX = (avgX - x[0]) / range; 370 xl.Add(avgX); // scale 371 yl.Add(sumY / k); 372 i += k; 373 } 374 } 375 376 x2 = xl.ToArray(); 377 y2 = yl.ToArray(); 138 378 } 139 379 … … 146 386 Results.Add(new Result("Solution", new RegressionSolution(new MathNetSplineModel(c, Problem.ProblemData.TargetVariable, inputVars), 147 387 (IRegressionProblemData)Problem.ProblemData.Clone()))); 148 149 388 } 150 389 } 151 390 391 392 393 // UNFINISHED 394 public class ReinschSmoothingSplineModel : NamedItem, IRegressionModel { 395 private double[] a; 396 private double[] b; 397 private double[] c; 398 private double[] d; 399 private double[] x; 400 private double offset; 401 private double scale; 402 403 public string TargetVariable { get; set; } 404 405 public IEnumerable<string> VariablesUsedForPrediction { get; private set; } 406 407 public event EventHandler TargetVariableChanged; 408 409 public ReinschSmoothingSplineModel(ReinschSmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) { 410 this.TargetVariable = orig.TargetVariable; 411 this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray(); 412 this.a = orig.a; 413 this.b = orig.b; 414 this.c = orig.c; 415 this.d = orig.d; 416 this.x = orig.x; 417 this.scale = orig.scale; 418 this.offset = orig.offset; 419 } 420 public ReinschSmoothingSplineModel(double[] a, double[] b, double[] c, double[] d, double[] x, string targetVar, string[] inputs, double offset, double scale) : base("SplineModel", "SplineModel") { 421 this.a = a; 422 this.b = b; 423 this.c = c; 424 this.d = d; 425 this.x = x; 426 this.TargetVariable = targetVar; 427 this.VariablesUsedForPrediction = inputs; 428 this.scale = scale; 429 this.offset = offset; 430 } 431 432 public override IDeepCloneable Clone(Cloner cloner) { 433 return new ReinschSmoothingSplineModel(this, cloner); 434 } 435 436 public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 437 return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone()); 438 } 439 440 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 441 int n = x.Length; 442 foreach (var xx in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows).Select(xi => (xi - offset) * scale)) { 443 if (xx < x[0]) yield return a[0]; 444 else if (xx >= x[n - 1]) yield return a[n - 2]; 445 else { 446 // binary search 447 int lower = 0; 448 int upper = n - 1; 449 while (true) { 450 if (upper < lower) throw new InvalidProgramException(); 451 int i = lower + (upper - lower) / 2; 452 if (x[i] <= xx && xx < x[i + 1]) { 453 double h = xx - x[i]; 454 yield return a[i] + h * (b[i] + h * (c[i] + h * d[i])); 455 break; 456 } else if (xx < x[i]) { 457 upper = i - 1; 458 } else { 459 lower = i + 1; 460 } 461 } 462 } 463 } 464 } 465 } 466 467 // UNFINISHED 468 public class SmoothingSplineModel : NamedItem, IRegressionModel { 469 private double[] s; 470 private IInterpolation interpolation; 471 472 public string TargetVariable { get; set; } 473 474 public IEnumerable<string> VariablesUsedForPrediction { get; private set; } 475 476 public event EventHandler TargetVariableChanged; 477 478 public SmoothingSplineModel(SmoothingSplineModel orig, Cloner cloner) : base(orig, cloner) { 479 this.TargetVariable = orig.TargetVariable; 480 this.VariablesUsedForPrediction = orig.VariablesUsedForPrediction.ToArray(); 481 this.s = orig.s; // TODO 482 this.interpolation = orig.interpolation; 483 } 484 public SmoothingSplineModel(double[] s, double[] x, string targetVar, string[] inputs) : base("SplineModel", "SplineModel") { 485 this.s = s; 486 this.TargetVariable = targetVar; 487 this.VariablesUsedForPrediction = inputs; 488 this.interpolation = MathNet.Numerics.Interpolate.CubicSpline(x, s); 489 } 490 491 public override IDeepCloneable Clone(Cloner cloner) { 492 return new SmoothingSplineModel(this, cloner); 493 } 494 495 public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 496 return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone()); 497 } 498 499 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 500 foreach (var x in dataset.GetDoubleValues(VariablesUsedForPrediction.First(), rows)) { 501 502 yield return interpolation.Interpolate(x); 503 504 } 505 } 506 } 152 507 153 508 // UNFINISHED
Note: See TracChangeset
for help on using the changeset viewer.