Changeset 14029 for branches/crossvalidation-2434/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs
- Timestamp:
- 07/08/16 14:40:02 (8 years ago)
- Location:
- branches/crossvalidation-2434
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/crossvalidation-2434
- Property svn:mergeinfo changed
-
branches/crossvalidation-2434/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
-
branches/crossvalidation-2434/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs
r12820 r14029 34 34 [StorableClass] 35 35 [Item("GaussianProcessModel", "Represents a Gaussian process posterior.")] 36 public sealed class GaussianProcessModel : NamedItem, IGaussianProcessModel { 36 public sealed class GaussianProcessModel : RegressionModel, IGaussianProcessModel { 37 public override IEnumerable<string> VariablesUsedForPrediction { 38 get { return allowedInputVariables; } 39 } 40 37 41 [Storable] 38 42 private double negativeLogLikelihood; … … 61 65 get { return meanFunction; } 62 66 } 63 [Storable] 64 private string targetVariable; 65 public string TargetVariable { 66 get { return targetVariable; } 67 } 67 68 68 [Storable] 69 69 private string[] allowedInputVariables; … … 124 124 this.meanFunction = cloner.Clone(original.meanFunction); 125 125 this.covarianceFunction = cloner.Clone(original.covarianceFunction); 126 this.inputScaling = cloner.Clone(original.inputScaling); 126 if (original.inputScaling != null) 127 this.inputScaling = cloner.Clone(original.inputScaling); 127 128 this.trainingDataset = cloner.Clone(original.trainingDataset); 128 129 this.negativeLogLikelihood = original.negativeLogLikelihood; 129 this.targetVariable = original.targetVariable;130 130 this.sqrSigmaNoise = original.sqrSigmaNoise; 131 131 if (original.meanParameter != null) { … … 144 144 } 145 145 public GaussianProcessModel(IDataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows, 146 IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction) 147 : base() { 146 IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction, 147 bool scaleInputs = true) 148 : base(targetVariable) { 148 149 this.name = ItemName; 149 150 this.description = ItemDescription; 150 151 this.meanFunction = (IMeanFunction)meanFunction.Clone(); 151 152 this.covarianceFunction = (ICovarianceFunction)covarianceFunction.Clone(); 152 this.targetVariable = targetVariable;153 153 this.allowedInputVariables = allowedInputVariables.ToArray(); 154 154 … … 163 163 .ToArray(); 164 164 sqrSigmaNoise = Math.Exp(2.0 * hyp.Last()); 165 CalculateModel(ds, rows); 166 } 167 168 private void CalculateModel(IDataset ds, IEnumerable<int> rows) { 165 try { 166 CalculateModel(ds, rows, scaleInputs); 167 } 168 catch (alglib.alglibexception ae) { 169 // wrap exception so that calling code doesn't have to know about alglib implementation 170 throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae); 171 } 172 } 173 174 private void CalculateModel(IDataset ds, IEnumerable<int> rows, bool scaleInputs = true) { 169 175 this.trainingDataset = (IDataset)ds.Clone(); 170 176 this.trainingRows = rows.ToArray(); 171 this.inputScaling = new Scaling(trainingDataset, allowedInputVariables, rows); 172 this.x = CalculateX(trainingDataset, allowedInputVariables, rows, inputScaling); 173 var y = ds.GetDoubleValues(targetVariable, rows); 177 this.inputScaling = scaleInputs ? new Scaling(ds, allowedInputVariables, rows) : null; 178 179 x = GetData(ds, this.allowedInputVariables, this.trainingRows, this.inputScaling); 180 181 IEnumerable<double> y; 182 y = ds.GetDoubleValues(TargetVariable, rows); 174 183 175 184 int n = x.GetLength(0); 176 185 186 var columns = Enumerable.Range(0, x.GetLength(1)).ToArray(); 177 187 // calculate cholesky decomposed (lower triangular) covariance matrix 178 var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, x.GetLength(1)));188 var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns); 179 189 this.l = CalculateL(x, cov, sqrSigmaNoise); 180 190 181 191 // calculate mean 182 var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, Enumerable.Range(0, x.GetLength(1)));192 var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns); 183 193 double[] m = Enumerable.Range(0, x.GetLength(0)) 184 194 .Select(r => mean.Mean(x, r)) 185 195 .ToArray(); 186 187 188 196 189 197 // calculate sum of diagonal elements for likelihood … … 219 227 double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)]; 220 228 for (int k = 0; k < meanGradients.Length; k++) { 221 var meanGrad = Enumerable.Range(0, alpha.Length) 222 .Select(r => mean.Gradient(x, r, k)); 229 var meanGrad = new double[alpha.Length]; 230 for (int g = 0; g < meanGrad.Length; g++) 231 meanGrad[g] = mean.Gradient(x, g, k); 223 232 meanGradients[k] = -Util.ScalarProd(meanGrad, alpha); 224 233 } … … 228 237 for (int i = 0; i < n; i++) { 229 238 for (int j = 0; j < i; j++) { 230 var g = cov.CovarianceGradient(x, i, j) .ToArray();239 var g = cov.CovarianceGradient(x, i, j); 231 240 for (int k = 0; k < covGradients.Length; k++) { 232 241 covGradients[k] += lCopy[i, j] * g[k]; … … 234 243 } 235 244 236 var gDiag = cov.CovarianceGradient(x, i, i) .ToArray();245 var gDiag = cov.CovarianceGradient(x, i, i); 237 246 for (int k = 0; k < covGradients.Length; k++) { 238 247 // diag … … 249 258 } 250 259 251 private static double[,] CalculateX(IDataset ds, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows, Scaling inputScaling) { 252 return AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputVariables, rows, inputScaling); 260 private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) { 261 if (scaling != null) { 262 return AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputs, rows, scaling); 263 } else { 264 return AlglibUtil.PrepareInputMatrix(ds, allowedInputs, rows); 265 } 253 266 } 254 267 … … 286 299 287 300 #region IRegressionModel Members 288 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {301 public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 289 302 return GetEstimatedValuesHelper(dataset, rows); 290 303 } 291 public GaussianProcessRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {304 public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) { 292 305 return new GaussianProcessRegressionSolution(this, new RegressionProblemData(problemData)); 293 306 } 294 IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {295 return CreateRegressionSolution(problemData);296 }297 307 #endregion 298 308 299 309 300 310 private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) { 301 if (x == null) { 302 this.x = CalculateX(trainingDataset, allowedInputVariables, trainingRows, inputScaling); 303 } 304 int n = x.GetLength(0); 305 306 var newX = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, inputScaling); 307 int newN = newX.GetLength(0); 308 309 var Ks = new double[newN, n]; 310 var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, Enumerable.Range(0, newX.GetLength(1))); 311 var ms = Enumerable.Range(0, newX.GetLength(0)) 312 .Select(r => mean.Mean(newX, r)) 313 .ToArray(); 314 var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, newX.GetLength(1))); 315 for (int i = 0; i < newN; i++) { 316 for (int j = 0; j < n; j++) { 317 Ks[i, j] = cov.CrossCovariance(x, newX, j, i); 318 } 319 } 320 321 return Enumerable.Range(0, newN) 322 .Select(i => ms[i] + Util.ScalarProd(Util.GetRow(Ks, i), alpha)); 311 try { 312 if (x == null) { 313 x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling); 314 } 315 int n = x.GetLength(0); 316 317 double[,] newX = GetData(dataset, allowedInputVariables, rows, inputScaling); 318 int newN = newX.GetLength(0); 319 320 var Ks = new double[newN][]; 321 var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray(); 322 var mean = meanFunction.GetParameterizedMeanFunction(meanParameter, columns); 323 var ms = Enumerable.Range(0, newX.GetLength(0)) 324 .Select(r => mean.Mean(newX, r)) 325 .ToArray(); 326 var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns); 327 for (int i = 0; i < newN; i++) { 328 Ks[i] = new double[n]; 329 for (int j = 0; j < n; j++) { 330 Ks[i][j] = cov.CrossCovariance(x, newX, j, i); 331 } 332 } 333 334 return Enumerable.Range(0, newN) 335 .Select(i => ms[i] + Util.ScalarProd(Ks[i], alpha)); 336 } 337 catch (alglib.alglibexception ae) { 338 // wrap exception so that calling code doesn't have to know about alglib implementation 339 throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae); 340 } 323 341 } 324 342 325 343 public IEnumerable<double> GetEstimatedVariance(IDataset dataset, IEnumerable<int> rows) { 326 if (x == null) { 327 this.x = CalculateX(trainingDataset, allowedInputVariables, trainingRows, inputScaling); 328 } 329 int n = x.GetLength(0); 330 331 var newX = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, inputScaling); 332 int newN = newX.GetLength(0); 333 334 var kss = new double[newN]; 335 double[,] sWKs = new double[n, newN]; 336 var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, Enumerable.Range(0, x.GetLength(1))); 337 338 if (l == null) { 339 l = CalculateL(x, cov, sqrSigmaNoise); 340 } 341 342 // for stddev 343 for (int i = 0; i < newN; i++) 344 kss[i] = cov.Covariance(newX, i, i); 345 346 for (int i = 0; i < newN; i++) { 347 for (int j = 0; j < n; j++) { 348 sWKs[j, i] = cov.CrossCovariance(x, newX, j, i) / Math.Sqrt(sqrSigmaNoise); 349 } 350 } 351 352 // for stddev 353 alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0); 354 355 for (int i = 0; i < newN; i++) { 356 var sumV = Util.ScalarProd(Util.GetCol(sWKs, i), Util.GetCol(sWKs, i)); 357 kss[i] -= sumV; 358 if (kss[i] < 0) kss[i] = 0; 359 } 360 return kss; 361 } 344 try { 345 if (x == null) { 346 x = GetData(trainingDataset, allowedInputVariables, trainingRows, inputScaling); 347 } 348 int n = x.GetLength(0); 349 350 var newX = GetData(dataset, allowedInputVariables, rows, inputScaling); 351 int newN = newX.GetLength(0); 352 353 var kss = new double[newN]; 354 double[,] sWKs = new double[n, newN]; 355 var columns = Enumerable.Range(0, newX.GetLength(1)).ToArray(); 356 var cov = covarianceFunction.GetParameterizedCovarianceFunction(covarianceParameter, columns); 357 358 if (l == null) { 359 l = CalculateL(x, cov, sqrSigmaNoise); 360 } 361 362 // for stddev 363 for (int i = 0; i < newN; i++) 364 kss[i] = cov.Covariance(newX, i, i); 365 366 for (int i = 0; i < newN; i++) { 367 for (int j = 0; j < n; j++) { 368 sWKs[j, i] = cov.CrossCovariance(x, newX, j, i) / Math.Sqrt(sqrSigmaNoise); 369 } 370 } 371 372 // for stddev 373 alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0); 374 375 for (int i = 0; i < newN; i++) { 376 var col = Util.GetCol(sWKs, i).ToArray(); 377 var sumV = Util.ScalarProd(col, col); 378 kss[i] += sqrSigmaNoise; // kss is V(f), add noise variance of predictive distibution to get V(y) 379 kss[i] -= sumV; 380 if (kss[i] < 0) kss[i] = 0; 381 } 382 return kss; 383 } 384 catch (alglib.alglibexception ae) { 385 // wrap exception so that calling code doesn't have to know about alglib implementation 386 throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae); 387 } 388 } 389 362 390 } 363 391 }
Note: See TracChangeset
for help on using the changeset viewer.