- Timestamp:
- 08/13/12 16:18:37 (12 years ago)
- Location:
- branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis
- Files:
-
- 24 edited
- 8 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
/trunk/sources/HeuristicLab.Algorithms.DataAnalysis merged: 8419,8421,8439,8448,8452,8455,8463-8465,8467,8471,8473,8475
- Property svn:mergeinfo changed
-
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/CovarianceLinear.cs
r8417 r8477 29 29 [Item(Name = "CovarianceLinear", Description = "Linear covariance function for Gaussian processes.")] 30 30 public class CovarianceLinear : Item, ICovarianceFunction { 31 private static readonly double[] emptyArray = new double[0];32 33 31 [Storable] 34 32 private double[,] x; … … 89 87 } 90 88 91 public double [] GetGradient(int i, int j) {92 return emptyArray;89 public double GetGradient(int i, int j, int k) { 90 throw new NotSupportedException("CovarianceLinear does not have hyperparameters."); 93 91 } 94 92 -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/CovariancePeriodic.cs
r8417 r8477 35 35 [Storable] 36 36 private double sf2; 37 public double Scale { get { return sf2; } } 37 38 [Storable] 38 39 private double l; 40 public double Length { get { return l; } } 39 41 [Storable] 40 42 private double p; 43 public double Period { get { return p; } } 41 44 42 45 private bool symmetric; … … 74 77 this.p = Math.Exp(hyp[1]); 75 78 this.sf2 = Math.Exp(2 * hyp[2]); 76 77 sf2 = Math.Min(10E6, sf2); // upper limit for the scale 79 // sf2 = Math.Min(10E6, sf2); // upper limit for the scale 78 80 79 81 sd = null; … … 102 104 } 103 105 106 public double GetGradient(int i, int j, int k) { 107 double v = Math.PI * sd[i, j] / p; 108 switch (k) { 109 case 0: { 110 double newK = Math.Sin(v) / l; 111 newK = newK * newK; 112 return 4 * sf2 * Math.Exp(-2 * newK) * newK; 113 } 114 case 1: { 115 double r = Math.Sin(v) / l; 116 return 4 * sf2 / l * Math.Exp(-2 * r * r) * r * Math.Cos(v) * v; 117 } 118 case 2: { 119 double newK = Math.Sin(v) / l; 120 newK = newK * newK; 121 return 2 * sf2 * Math.Exp(-2 * newK); 104 122 105 public double[] GetDiagonalCovariances() { 106 if (x != xt) throw new InvalidOperationException(); 107 int rows = x.GetLength(0); 108 var cov = new double[rows]; 109 for (int i = 0; i < rows; i++) { 110 double k = Math.Sqrt(Util.SqrDist(Util.GetRow(x, i), Util.GetRow(xt, i))); 111 k = Math.PI * k / p; 112 k = Math.Sin(k) / l; 113 k = k * k; 114 cov[i] = sf2 * Math.Exp(-2.0 * k); 123 } 124 default: { 125 throw new ArgumentException("CovariancePeriodic only has three hyperparameters.", "k"); 126 } 115 127 } 116 return cov;117 }118 119 public double[] GetGradient(int i, int j) {120 121 var res = new double[3];122 double k = sd[i, j];123 k = Math.PI * k / p;124 {125 double newK = Math.Sin(k) / l;126 newK = newK * newK;127 res[0] = 4 * sf2 * Math.Exp(-2 * newK) * newK;128 }129 {130 double r = Math.Sin(k) / l;131 res[1] = 4 * sf2 / l * Math.Exp(-2 * r * r) * r * Math.Cos(k) * k;132 }133 {134 double newK = Math.Sin(k) / l;135 newK = newK * newK;136 res[2] = 2 * sf2 * Math.Exp(-2 * newK);137 }138 139 return res;140 128 } 141 129 -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/CovarianceProd.cs
r8416 r8477 20 20 #endregion 21 21 22 using System; 23 using System.Collections.Generic; 22 24 using System.Linq; 23 25 using HeuristicLab.Common; … … 48 50 this.factors = cloner.Clone(original.factors); 49 51 this.numberOfVariables = original.numberOfVariables; 52 AttachEventHandlers(); 50 53 } 51 54 … … 53 56 : base() { 54 57 this.factors = new ItemList<ICovarianceFunction>(); 58 AttachEventHandlers(); 59 } 60 61 private void AttachEventHandlers() { 62 this.factors.CollectionReset += (sender, args) => ClearCache(); 63 this.factors.ItemsAdded += (sender, args) => ClearCache(); 64 this.factors.ItemsRemoved += (sender, args) => ClearCache(); 65 this.factors.ItemsReplaced += (sender, args) => ClearCache(); 66 this.factors.ItemsMoved += (sender, args) => ClearCache(); 55 67 } 56 68 … … 86 98 } 87 99 88 public double[] GetGradient(int i, int j) { 89 return factors.Select(t => t.GetGradient(i, j)).SelectMany(seq => seq).ToArray(); 100 private Dictionary<int, Tuple<int, int>> cachedParameterMap; 101 public double GetGradient(int i, int j, int k) { 102 if (cachedParameterMap == null) { 103 CalculateParameterMap(); 104 } 105 int ti = cachedParameterMap[k].Item1; 106 k = cachedParameterMap[k].Item2; 107 double res = 1.0; 108 for (int ii = 0; ii < factors.Count; ii++) { 109 var f = factors[ii]; 110 if (ii == ti) { 111 res *= f.GetGradient(i, j, k); 112 } else { 113 res *= f.GetCovariance(i, j); 114 } 115 } 116 return res; 117 } 118 119 private void ClearCache() { 120 cachedParameterMap = null; 121 } 122 123 private void CalculateParameterMap() { 124 cachedParameterMap = new Dictionary<int, Tuple<int, int>>(); 125 int k = 0; 126 for (int ti = 0; ti < factors.Count; ti++) { 127 for (int ti_k = 0; ti_k < factors[ti].GetNumberOfParameters(numberOfVariables); ti_k++) { 128 cachedParameterMap[k++] = Tuple.Create(ti, ti_k); 129 } 130 } 90 131 } 91 132 } -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/CovarianceSEard.cs
r8416 r8477 36 36 [Storable] 37 37 private double sf2; 38 public double Scale { get { return sf2; } } 39 38 40 [Storable] 39 41 private double[] l; 42 public double[] Length { 43 get { 44 if (l == null) return new double[0]; 45 var copy = new double[l.Length]; 46 Array.Copy(l, copy, copy.Length); 47 return copy; 48 } 49 } 40 50 41 51 private double[,] sd; … … 76 86 this.l = hyp.Take(hyp.Length - 1).Select(Math.Exp).ToArray(); 77 87 this.sf2 = Math.Exp(2 * hyp[hyp.Length - 1]); 78 sf2 = Math.Min(10E6, sf2); // upper limit for the scale88 // sf2 = Math.Min(10E6, sf2); // upper limit for the scale 79 89 80 90 sd = null; … … 99 109 } 100 110 101 public double[] GetGradient(int i, int j) { 102 var res = new double[l.Length + 1]; 103 for (int k = 0; k < l.Length; k++) { 111 public double GetGradient(int i, int j, int k) { 112 if (k < l.Length) { 104 113 double sqrDist = Util.SqrDist(x[i, k] / l[k], xt[j, k] / l[k]); 105 106 res[k] = sf2 * Math.Exp(-sd[i, j] / 2.0) * sqrDist; 114 return sf2 * Math.Exp(-sd[i, j] / 2.0) * sqrDist; 115 } else if (k == l.Length) { 116 return 2.0 * sf2 * Math.Exp(-sd[i, j] / 2.0); 117 } else { 118 throw new ArgumentException("CovarianceSEard has dimension+1 hyperparameters.", "k"); 107 119 } 108 res[res.Length - 1] = 2.0 * sf2 * Math.Exp(-sd[i, j] / 2.0);109 return res;110 120 } 111 121 -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/CovarianceSEiso.cs
r8416 r8477 37 37 [Storable] 38 38 private double sf2; 39 public double Scale { get { return sf2; } } 39 40 [Storable] 40 41 private double l; 42 public double Length { get { return l; } } 41 43 [Storable] 42 44 private bool symmetric; … … 80 82 public void SetParameter(double[] hyp) { 81 83 this.l = Math.Exp(hyp[0]); 82 this.sf2 = Math. Min(1E6, Math.Exp(2 * hyp[1])); // upper limit for scale84 this.sf2 = Math.Exp(2 * hyp[1]); 83 85 sd = null; 84 86 } … … 101 103 } 102 104 103 public double[] GetGradient(int i, int j) { 104 var res = new double[2]; 105 res[0] = sf2 * Math.Exp(-sd[i, j] / 2.0) * sd[i, j]; 106 res[1] = 2.0 * sf2 * Math.Exp(-sd[i, j] / 2.0); 107 return res; 105 public double GetGradient(int i, int j, int k) { 106 switch (k) { 107 case 0: return sf2 * Math.Exp(-sd[i, j] / 2.0) * sd[i, j]; 108 case 1: return 2.0 * sf2 * Math.Exp(-sd[i, j] / 2.0); 109 default: throw new ArgumentException("CovarianceSEiso has two hyperparameters", "k"); 110 } 108 111 } 109 112 … … 113 116 int cols = xt.GetLength(0); 114 117 sd = new double[rows, cols]; 118 double lInv = 1.0 / l; 115 119 if (symmetric) { 116 120 for (int i = 0; i < rows; i++) { 117 121 for (int j = i; j < rows; j++) { 118 sd[i, j] = Util.SqrDist(Util.GetRow(x, i).Select(e => e / l), Util.GetRow(xt, j).Select(e => e / l));122 sd[i, j] = Util.SqrDist(Util.GetRow(x, i).Select(e => e * lInv), Util.GetRow(xt, j).Select(e => e * lInv)); 119 123 sd[j, i] = sd[i, j]; 120 124 } … … 123 127 for (int i = 0; i < rows; i++) { 124 128 for (int j = 0; j < cols; j++) { 125 sd[i, j] = Util.SqrDist(Util.GetRow(x, i).Select(e => e / l), Util.GetRow(xt, j).Select(e => e / l));129 sd[i, j] = Util.SqrDist(Util.GetRow(x, i).Select(e => e * lInv), Util.GetRow(xt, j).Select(e => e * lInv)); 126 130 } 127 131 } -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/CovarianceSum.cs
r8416 r8477 20 20 #endregion 21 21 22 using System; 23 using System.Collections.Generic; 22 24 using System.Linq; 23 25 using HeuristicLab.Common; … … 48 50 this.terms = cloner.Clone(original.terms); 49 51 this.numberOfVariables = original.numberOfVariables; 52 AttachEventHandlers(); 50 53 } 51 54 … … 53 56 : base() { 54 57 this.terms = new ItemList<ICovarianceFunction>(); 58 AttachEventHandlers(); 59 } 60 61 private void AttachEventHandlers() { 62 this.terms.CollectionReset += (sender, args) => ClearCache(); 63 this.terms.ItemsAdded += (sender, args) => ClearCache(); 64 this.terms.ItemsRemoved += (sender, args) => ClearCache(); 65 this.terms.ItemsReplaced += (sender, args) => ClearCache(); 66 this.terms.ItemsMoved += (sender, args) => ClearCache(); 55 67 } 56 68 … … 86 98 } 87 99 88 public double[] GetGradient(int i, int j) { 89 return terms.Select(t => t.GetGradient(i, j)).SelectMany(seq => seq).ToArray(); 100 private Dictionary<int, Tuple<int, int>> cachedParameterMap; 101 public double GetGradient(int i, int j, int k) { 102 if (cachedParameterMap == null) { 103 CalculateParameterMap(); 104 } 105 int ti = cachedParameterMap[k].Item1; 106 k = cachedParameterMap[k].Item2; 107 return terms[ti].GetGradient(i, j, k); 108 } 109 private void ClearCache() { 110 cachedParameterMap = null; 111 } 112 113 private void CalculateParameterMap() { 114 cachedParameterMap = new Dictionary<int, Tuple<int, int>>(); 115 int k = 0; 116 for (int ti = 0; ti < terms.Count; ti++) { 117 for (int ti_k = 0; ti_k < terms[ti].GetNumberOfParameters(numberOfVariables); ti_k++) { 118 cachedParameterMap[k++] = Tuple.Create(ti, ti_k); 119 } 120 } 90 121 } 91 122 } -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessHyperparameterInitializer.cs
r8401 r8477 38 38 private const string ProblemDataParameterName = "ProblemData"; 39 39 private const string HyperparameterParameterName = "Hyperparameter"; 40 private const string RandomParameterName = "Random"; 40 41 41 42 #region Parameter Properties … … 49 50 public ILookupParameter<IDataAnalysisProblemData> ProblemDataParameter { 50 51 get { return (ILookupParameter<IDataAnalysisProblemData>)Parameters[ProblemDataParameterName]; } 52 } 53 public ILookupParameter<IRandom> RandomParameter { 54 get { return (ILookupParameter<IRandom>)Parameters[RandomParameterName]; } 51 55 } 52 56 // out … … 71 75 Parameters.Add(new LookupParameter<ICovarianceFunction>(CovarianceFunctionParameterName, "The covariance function for the Gaussian process model.")); 72 76 Parameters.Add(new LookupParameter<IDataAnalysisProblemData>(ProblemDataParameterName, "The input data for the Gaussian process.")); 77 Parameters.Add(new LookupParameter<IRandom>(RandomParameterName, "The pseudo random number generator to use for initializing the hyperparameter vector.")); 73 78 // out 74 79 Parameters.Add(new LookupParameter<RealVector>(HyperparameterParameterName, "The initial hyperparameter vector for the Gaussian process model.")); … … 83 88 int l = 1 + MeanFunction.GetNumberOfParameters(inputVariablesCount) + 84 89 CovarianceFunction.GetNumberOfParameters(inputVariablesCount); 85 HyperparameterParameter.ActualValue = new RealVector(l); 90 var r = new RealVector(l); 91 var rand = RandomParameter.ActualValue; 92 for (int i = 0; i < r.Length; i++) 93 r[i] = rand.NextDouble() * 2 - 1; 94 95 HyperparameterParameter.ActualValue = r; 86 96 return base.Apply(); 87 97 } -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs
r8416 r8477 73 73 private double[,] x; 74 74 [Storable] 75 private Scaling scaling;75 private Scaling inputScaling; 76 76 77 77 … … 82 82 this.meanFunction = cloner.Clone(original.meanFunction); 83 83 this.covarianceFunction = cloner.Clone(original.covarianceFunction); 84 this. scaling = cloner.Clone(original.scaling);84 this.inputScaling = cloner.Clone(original.inputScaling); 85 85 this.negativeLogLikelihood = original.negativeLogLikelihood; 86 86 this.targetVariable = original.targetVariable; … … 103 103 this.allowedInputVariables = allowedInputVariables.ToArray(); 104 104 105 sqrSigmaNoise = Math.Exp(2.0 * hyp.First());106 sqrSigmaNoise = Math.Max(10E-6, sqrSigmaNoise); // lower limit for the noise level107 105 108 106 int nVariables = this.allowedInputVariables.Length; 109 this.meanFunction.SetParameter(hyp .Skip(1)107 this.meanFunction.SetParameter(hyp 110 108 .Take(this.meanFunction.GetNumberOfParameters(nVariables)) 111 109 .ToArray()); 112 this.covarianceFunction.SetParameter(hyp.Skip( 1 +this.meanFunction.GetNumberOfParameters(nVariables))110 this.covarianceFunction.SetParameter(hyp.Skip(this.meanFunction.GetNumberOfParameters(nVariables)) 113 111 .Take(this.covarianceFunction.GetNumberOfParameters(nVariables)) 114 112 .ToArray()); 113 sqrSigmaNoise = Math.Exp(2.0 * hyp.Last()); 115 114 116 115 CalculateModel(ds, rows); … … 118 117 119 118 private void CalculateModel(Dataset ds, IEnumerable<int> rows) { 120 scaling = new Scaling(ds, allowedInputVariables, rows); 121 x = AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputVariables, rows, scaling); 122 123 var y = ds.GetDoubleValues(targetVariable, rows).ToArray(); 119 inputScaling = new Scaling(ds, allowedInputVariables, rows); 120 x = AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputVariables, rows, inputScaling); 121 var y = ds.GetDoubleValues(targetVariable, rows); 124 122 125 123 int n = x.GetLength(0); … … 132 130 double[] m = meanFunction.GetMean(x); 133 131 for (int i = 0; i < n; i++) { 134 135 132 for (int j = i; j < n; j++) { 136 133 l[j, i] = covarianceFunction.GetCovariance(i, j) / sqrSigmaNoise; … … 144 141 145 142 var res = alglib.trfac.spdmatrixcholesky(ref l, n, false); 146 if (!res) throw new InvalidOperationException("Matrix is not positive semidefinite");143 if (!res) throw new ArgumentException("Matrix is not positive semidefinite"); 147 144 148 145 // calculate sum of diagonal elements for likelihood … … 162 159 int n = x.GetLength(0); 163 160 int nAllowedVariables = x.GetLength(1); 164 double[,] q = new double[n, n];165 double[,] eye = new double[n, n];166 for (int i = 0; i < n; i++) eye[i, i] = 1.0;167 161 168 162 int info; 169 alglib.densesolverreport denseSolveRep; 170 171 alglib.spdmatrixcholeskysolvem(l, n, false, eye, n, out info, out denseSolveRep, out q); 172 // double[,] a2 = outerProd(alpha, alpha); 163 alglib.matinvreport matInvRep; 164 double[,] lCopy = new double[l.GetLength(0), l.GetLength(1)]; 165 Array.Copy(l, lCopy, lCopy.Length); 166 167 alglib.spdmatrixcholeskyinverse(ref lCopy, n, false, out info, out matInvRep); 168 if (info != 1) throw new ArgumentException("Can't invert matrix to calculate gradients."); 173 169 for (int i = 0; i < n; i++) { 174 for (int j = 0; j < n; j++)175 q[i, j] = q[i, j] / sqrSigmaNoise - alpha[i] * alpha[j]; // a2[i,j];176 } 177 178 double noiseGradient = sqrSigmaNoise * Enumerable.Range(0, n).Select(i => q[i, i]).Sum();170 for (int j = 0; j <= i; j++) 171 lCopy[i, j] = lCopy[i, j] / sqrSigmaNoise - alpha[i] * alpha[j]; 172 } 173 174 double noiseGradient = sqrSigmaNoise * Enumerable.Range(0, n).Select(i => lCopy[i, i]).Sum(); 179 175 180 176 double[] meanGradients = new double[meanFunction.GetNumberOfParameters(nAllowedVariables)]; … … 187 183 if (covGradients.Length > 0) { 188 184 for (int i = 0; i < n; i++) { 189 for (int j = 0; j < n; j++) { 190 var covDeriv = covarianceFunction.GetGradient(i, j); 191 for (int k = 0; k < covGradients.Length; k++) { 192 covGradients[k] += q[i, j] * covDeriv[k]; 185 for (int k = 0; k < covGradients.Length; k++) { 186 for (int j = 0; j < i; j++) { 187 covGradients[k] += lCopy[i, j] * covarianceFunction.GetGradient(i, j, k); 193 188 } 189 covGradients[k] += 0.5 * lCopy[i, i] * covarianceFunction.GetGradient(i, i, k); 194 190 } 195 191 } 196 covGradients = covGradients.Select(g => g / 2.0).ToArray();197 } 198 199 return new double[] { noiseGradient }200 .Concat( meanGradients)201 .Concat( covGradients).ToArray();192 } 193 194 return 195 meanGradients 196 .Concat(covGradients) 197 .Concat(new double[] { noiseGradient }).ToArray(); 202 198 } 203 199 … … 220 216 221 217 private IEnumerable<double> GetEstimatedValuesHelper(Dataset dataset, IEnumerable<int> rows) { 222 var newX = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, scaling);218 var newX = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, inputScaling); 223 219 int newN = newX.GetLength(0); 224 220 int n = x.GetLength(0); … … 230 226 // var kss = new double[newN]; 231 227 var Ks = new double[newN, n]; 232 double[,] sWKs = new double[n, newN];228 //double[,] sWKs = new double[n, newN]; 233 229 // double[,] v; 234 230 … … 242 238 var ms = meanFunction.GetMean(newX); 243 239 for (int i = 0; i < newN; i++) { 244 245 240 for (int j = 0; j < n; j++) { 246 241 Ks[i, j] = covarianceFunction.GetCovariance(j, i); 247 sWKs[j, i] = Ks[i, j] / Math.Sqrt(sqrSigmaNoise);242 //sWKs[j, i] = Ks[i, j] / Math.Sqrt(sqrSigmaNoise); 248 243 } 249 244 } … … 252 247 // alglib.rmatrixsolvem(l, n, sWKs, newN, true, out info, out denseSolveRep, out v); 253 248 254 249 return Enumerable.Range(0, newN) 250 .Select(i => ms[i] + Util.ScalarProd(Util.GetRow(Ks, i), alpha)); 251 //for (int i = 0; i < newN; i++) { 252 // // predMean[i] = ms[i] + prod(GetRow(Ks, i), alpha); 253 // // var sumV2 = prod(GetCol(v, i), GetCol(v, i)); 254 // // predVar[i] = kss[i] - sumV2; 255 //} 256 257 } 258 259 public IEnumerable<double> GetEstimatedVariance(Dataset dataset, IEnumerable<int> rows) { 260 var newX = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, inputScaling); 261 int newN = newX.GetLength(0); 262 int n = x.GetLength(0); 263 264 var kss = new double[newN]; 265 double[,] sWKs = new double[n, newN]; 266 267 // for stddev 268 covarianceFunction.SetData(newX); 269 for (int i = 0; i < newN; i++) 270 kss[i] = covarianceFunction.GetCovariance(i, i); 271 272 covarianceFunction.SetData(x, newX); 255 273 for (int i = 0; i < newN; i++) { 256 // predMean[i] = ms[i] + prod(GetRow(Ks, i), alpha); 257 yield return ms[i] + Util.ScalarProd(Util.GetRow(Ks, i), alpha); 258 // var sumV2 = prod(GetCol(v, i), GetCol(v, i)); 259 // predVar[i] = kss[i] - sumV2; 260 } 261 274 for (int j = 0; j < n; j++) { 275 sWKs[j, i] = covarianceFunction.GetCovariance(j, i) / Math.Sqrt(sqrSigmaNoise); 276 } 277 } 278 279 // for stddev 280 int info; 281 alglib.densesolverreport denseSolveRep; 282 double[,] v; 283 284 alglib.rmatrixsolvem(l, n, sWKs, newN, false, out info, out denseSolveRep, out v); 285 286 for (int i = 0; i < newN; i++) { 287 var sumV = Util.ScalarProd(Util.GetCol(v, i), Util.GetCol(v, i)); 288 kss[i] -= sumV; 289 if (kss[i] < 0) kss[i] = 0; 290 } 291 return kss; 262 292 } 263 293 } -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessRegression.cs
r8401 r8477 55 55 private const string MinimizationIterationsParameterName = "Iterations"; 56 56 private const string ApproximateGradientsParameterName = "ApproximateGradients"; 57 private const string SeedParameterName = "Seed"; 58 private const string SetSeedRandomlyParameterName = "SetSeedRandomly"; 57 59 58 60 #region parameter properties … … 65 67 public IValueParameter<IntValue> MinimizationIterationsParameter { 66 68 get { return (IValueParameter<IntValue>)Parameters[MinimizationIterationsParameterName]; } 69 } 70 public IValueParameter<IntValue> SeedParameter { 71 get { return (IValueParameter<IntValue>)Parameters[SeedParameterName]; } 72 } 73 public IValueParameter<BoolValue> SetSeedRandomlyParameter { 74 get { return (IValueParameter<BoolValue>)Parameters[SetSeedRandomlyParameterName]; } 67 75 } 68 76 #endregion … … 80 88 get { return MinimizationIterationsParameter.Value.Value; } 81 89 } 90 public int Seed { get { return SeedParameter.Value.Value; } set { SeedParameter.Value.Value = value; } } 91 public bool SetSeedRandomly { get { return SetSeedRandomlyParameter.Value.Value; } set { SetSeedRandomlyParameter.Value.Value = value; } } 82 92 #endregion 93 83 94 [StorableConstructor] 84 95 private GaussianProcessRegression(bool deserializing) : base(deserializing) { } … … 97 108 98 109 Parameters.Add(new ConstrainedValueParameter<IMeanFunction>(MeanFunctionParameterName, "The mean function to use.", 99 new ItemSet<IMeanFunction>(meanFunctions), meanFunctions. First()));110 new ItemSet<IMeanFunction>(meanFunctions), meanFunctions.OfType<MeanConst>().First())); 100 111 Parameters.Add(new ConstrainedValueParameter<ICovarianceFunction>(CovarianceFunctionParameterName, "The covariance function to use.", 101 new ItemSet<ICovarianceFunction>(covFunctions), covFunctions. First()));112 new ItemSet<ICovarianceFunction>(covFunctions), covFunctions.OfType<CovarianceSEiso>().First())); 102 113 Parameters.Add(new ValueParameter<IntValue>(MinimizationIterationsParameterName, "The number of iterations for likelihood optimization with LM-BFGS.", new IntValue(20))); 114 Parameters.Add(new ValueParameter<IntValue>(SeedParameterName, "The random seed used to initialize the new pseudo random number generator.", new IntValue(0))); 115 Parameters.Add(new ValueParameter<BoolValue>(SetSeedRandomlyParameterName, "True if the random seed should be set to a random value, otherwise false.", new BoolValue(true))); 116 103 117 Parameters.Add(new ValueParameter<BoolValue>(ApproximateGradientsParameterName, "Indicates that gradients should not be approximated (necessary for LM-BFGS).", new BoolValue(false))); 104 118 Parameters[ApproximateGradientsParameterName].Hidden = true; // should not be changed 105 119 120 var randomCreator = new HeuristicLab.Random.RandomCreator(); 106 121 var gpInitializer = new GaussianProcessHyperparameterInitializer(); 107 122 var bfgsInitializer = new LbfgsInitializer(); … … 115 130 var solutionCreator = new GaussianProcessRegressionSolutionCreator(); 116 131 117 OperatorGraph.InitialOperator = gpInitializer; 132 OperatorGraph.InitialOperator = randomCreator; 133 randomCreator.SeedParameter.ActualName = SeedParameterName; 134 randomCreator.SeedParameter.Value = null; 135 randomCreator.SetSeedRandomlyParameter.ActualName = SetSeedRandomlyParameterName; 136 randomCreator.SetSeedRandomlyParameter.Value = null; 137 randomCreator.Successor = gpInitializer; 118 138 119 139 gpInitializer.CovarianceFunctionParameter.ActualName = CovarianceFunctionParameterName; … … 121 141 gpInitializer.ProblemDataParameter.ActualName = Problem.ProblemDataParameter.Name; 122 142 gpInitializer.HyperparameterParameter.ActualName = modelCreator.HyperparameterParameter.Name; 143 gpInitializer.RandomParameter.ActualName = randomCreator.RandomParameter.Name; 123 144 gpInitializer.Successor = bfgsInitializer; 124 145 -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessRegressionModelCreator.cs
r8401 r8477 20 20 #endregion 21 21 22 using System; 22 23 using System.Linq; 23 24 using HeuristicLab.Common; … … 60 61 61 62 public override IOperation Apply() { 62 var model = Create(ProblemData, Hyperparameter.ToArray(), MeanFunction, CovarianceFunction); 63 ModelParameter.ActualValue = model; 64 NegativeLogLikelihoodParameter.ActualValue = new DoubleValue(model.NegativeLogLikelihood); 65 HyperparameterGradientsParameter.ActualValue = new RealVector(model.GetHyperparameterGradients()); 63 try { 64 var model = Create(ProblemData, Hyperparameter.ToArray(), MeanFunction, CovarianceFunction); 65 ModelParameter.ActualValue = model; 66 NegativeLogLikelihoodParameter.ActualValue = new DoubleValue(model.NegativeLogLikelihood); 67 HyperparameterGradientsParameter.ActualValue = new RealVector(model.GetHyperparameterGradients()); 68 return base.Apply(); 69 } 70 catch (ArgumentException) { } 71 catch (alglib.alglibexception) { } 72 NegativeLogLikelihoodParameter.ActualValue = new DoubleValue(1E300); 73 HyperparameterGradientsParameter.ActualValue = new RealVector(Hyperparameter.Count()); 66 74 return base.Apply(); 67 75 } -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessRegressionSolution.cs
r8371 r8477 20 20 #endregion 21 21 22 using System.Collections.Generic; 23 using System.Linq; 22 24 using HeuristicLab.Common; 23 25 using HeuristicLab.Core; … … 51 53 return new GaussianProcessRegressionSolution(this, cloner); 52 54 } 55 56 public IEnumerable<double> EstimatedVariance { 57 get { return GetEstimatedVariance(Enumerable.Range(0, ProblemData.Dataset.Rows)); } 58 } 59 public IEnumerable<double> EstimatedTrainingVariance { 60 get { return GetEstimatedVariance(ProblemData.TrainingIndices); } 61 } 62 public IEnumerable<double> EstimatedTestVariance { 63 get { return GetEstimatedVariance(ProblemData.TestIndices); } 64 } 65 66 public IEnumerable<double> GetEstimatedVariance(IEnumerable<int> rows) { 67 return Model.GetEstimatedVariance(ProblemData.Dataset, rows); 68 } 53 69 } 54 70 } -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessRegressionSolutionCreator.cs
r8416 r8477 75 75 76 76 public override IOperation Apply() { 77 var m = ModelParameter.ActualValue;78 var data = ProblemDataParameter.ActualValue;77 var m = (IGaussianProcessModel)ModelParameter.ActualValue.Clone(); 78 var data = (IRegressionProblemData)ProblemDataParameter.ActualValue.Clone(); 79 79 var s = new GaussianProcessRegressionSolution(m, data); 80 80 -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/ICovarianceFunction.cs
r8416 r8477 30 30 31 31 double GetCovariance(int i, int j); 32 double [] GetGradient(int i, int j);32 double GetGradient(int i, int j, int k); 33 33 } 34 34 } -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/MeanConst.cs
r8416 r8477 32 32 [Storable] 33 33 private double c; 34 public double Value { get { return c; } } 35 34 36 public int GetNumberOfParameters(int numberOfVariables) { 35 37 return 1; -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/MeanLinear.cs
r8416 r8477 31 31 [Storable] 32 32 private double[] alpha; 33 public double[] Weights { 34 get { 35 if (alpha == null) return new double[0]; 36 var copy = new double[alpha.Length]; 37 Array.Copy(alpha, copy, copy.Length); 38 return copy; 39 } 40 } 33 41 public int GetNumberOfParameters(int numberOfVariables) { 34 42 return numberOfVariables; -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/Util.cs
r8401 r8477 20 20 #endregion 21 21 22 using System;23 22 using System.Collections.Generic; 24 23 using System.Linq; … … 32 31 public static double SqrDist(double x, double y) { 33 32 double d = x - y; 34 return Math.Max(d * d, 0.0);33 return d * d; 35 34 } 36 35 37 36 public static double SqrDist(IEnumerable<double> x, IEnumerable<double> y) { 38 return x.Zip(y, SqrDist).Sum();37 return x.Zip(y, (a, b) => (a - b) * (a - b)).Sum(); 39 38 } 40 39 -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r8430 r8477 101 101 </PropertyGroup> 102 102 <ItemGroup> 103 <Reference Include="ALGLIB-3. 5.0, Version=3.5.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">104 <HintPath>..\..\..\..\trunk\sources\bin\ALGLIB-3. 5.0.dll</HintPath>103 <Reference Include="ALGLIB-3.6.0, Version=3.6.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 104 <HintPath>..\..\..\..\trunk\sources\bin\ALGLIB-3.6.0.dll</HintPath> 105 105 <Private>False</Private> 106 106 </Reference> … … 174 174 </Compile> 175 175 <Compile Include="FixedDataAnalysisAlgorithm.cs" /> 176 <Compile Include="GaussianProcess\CovarianceRQiso.cs" /> 177 <Compile Include="GaussianProcess\CovarianceNoise.cs" /> 178 <Compile Include="GaussianProcess\CovarianceConst.cs" /> 179 <Compile Include="GaussianProcess\MeanProd.cs" /> 180 <Compile Include="GaussianProcess\MeanSum.cs" /> 176 181 <Compile Include="GaussianProcess\CovarianceProd.cs" /> 177 182 <Compile Include="GaussianProcess\CovarianceSum.cs" /> … … 197 202 <Compile Include="Interfaces\IGaussianProcessModel.cs" /> 198 203 <Compile Include="Interfaces\IGaussianProcessSolution.cs" /> 204 <Compile Include="Interfaces\INcaClassificationSolution.cs" /> 205 <Compile Include="Interfaces\INcaModel.cs" /> 199 206 <Compile Include="Interfaces\INearestNeighbourClassificationSolution.cs" /> 200 207 <Compile Include="Interfaces\INearestNeighbourRegressionSolution.cs" /> … … 227 234 <Compile Include="Linear\MultinomialLogitClassificationSolution.cs" /> 228 235 <Compile Include="Linear\MultinomialLogitModel.cs" /> 236 <Compile Include="Nca\Initialization\INcaInitializer.cs" /> 237 <Compile Include="Nca\Initialization\LdaInitializer.cs" /> 238 <Compile Include="Nca\Initialization\PcaInitializer.cs" /> 239 <Compile Include="Nca\Initialization\RandomInitializer.cs" /> 240 <Compile Include="Nca\Matrix.cs" /> 241 <Compile Include="Nca\NcaAlgorithm.cs" /> 242 <Compile Include="Nca\NcaClassificationSolution.cs" /> 243 <Compile Include="Nca\NcaModel.cs" /> 229 244 <Compile Include="NearestNeighbour\NearestNeighbourClassification.cs" /> 230 245 <Compile Include="NearestNeighbour\NearestNeighbourClassificationSolution.cs" /> … … 280 295 <Name>HeuristicLab.Problems.DataAnalysis-3.4</Name> 281 296 </ProjectReference> 297 <ProjectReference Include="..\..\HeuristicLab.Random\3.3\HeuristicLab.Random-3.3.csproj"> 298 <Project>{F4539FB6-4708-40C9-BE64-0A1390AEA197}</Project> 299 <Name>HeuristicLab.Random-3.3</Name> 300 <Private>False</Private> 301 </ProjectReference> 282 302 </ItemGroup> 283 303 <ItemGroup> -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/IGaussianProcessModel.cs
r8430 r8477 20 20 #endregion 21 21 22 using System.Collections.Generic; 22 23 using HeuristicLab.Problems.DataAnalysis; 23 24 … … 31 32 ICovarianceFunction CovarianceFunction { get; } 32 33 double[] GetHyperparameterGradients(); 34 35 IEnumerable<double> GetEstimatedVariance(Dataset ds, IEnumerable<int> rows); 33 36 } 34 37 } -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/Scaling.cs
r8430 r8477 59 59 return ds.GetDoubleValues(variable, rows).Select(x => (x - min) / (max - min)); 60 60 } 61 62 public void GetScalingParameters(string variable, out double min, out double max) { 63 min = scalingParameters[variable].Item1; 64 max = scalingParameters[variable].Item2; 65 } 61 66 } 62 67 } -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs
r8430 r8477 21 21 22 22 using System; 23 using System.Collections.Generic;24 23 using System.Linq; 25 24 using HeuristicLab.Common; 26 25 using HeuristicLab.Core; 27 26 using HeuristicLab.Data; 28 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;29 27 using HeuristicLab.Optimization; 28 using HeuristicLab.Parameters; 30 29 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 31 30 using HeuristicLab.Problems.DataAnalysis; 32 using HeuristicLab.Problems.DataAnalysis.Symbolic;33 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;34 using HeuristicLab.Parameters;35 31 36 32 namespace HeuristicLab.Algorithms.DataAnalysis { … … 84 80 85 81 public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k) { 86 Dataset dataset = problemData.Dataset; 87 string targetVariable = problemData.TargetVariable; 88 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 89 IEnumerable<int> rows = problemData.TrainingIndices; 90 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 91 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 92 throw new NotSupportedException("Nearest neighbour classification does not support NaN or infinity values in the input dataset."); 82 var problemDataClone = (IClassificationProblemData)problemData.Clone(); 83 return new NearestNeighbourClassificationSolution(problemDataClone, Train(problemDataClone, k)); 84 } 93 85 94 alglib.nearestneighbor.kdtree kdtree = new alglib.nearestneighbor.kdtree(); 95 96 int nRows = inputMatrix.GetLength(0); 97 int nFeatures = inputMatrix.GetLength(1) - 1; 98 double[] classValues = dataset.GetDoubleValues(targetVariable).Distinct().OrderBy(x => x).ToArray(); 99 int nClasses = classValues.Count(); 100 // map original class values to values [0..nClasses-1] 101 Dictionary<double, double> classIndices = new Dictionary<double, double>(); 102 for (int i = 0; i < nClasses; i++) { 103 classIndices[classValues[i]] = i; 104 } 105 for (int row = 0; row < nRows; row++) { 106 inputMatrix[row, nFeatures] = classIndices[inputMatrix[row, nFeatures]]; 107 } 108 alglib.nearestneighbor.kdtreebuild(inputMatrix, nRows, inputMatrix.GetLength(1) - 1, 1, 2, kdtree); 109 var problemDataClone = (IClassificationProblemData) problemData.Clone(); 110 return new NearestNeighbourClassificationSolution(problemDataClone, new NearestNeighbourModel(kdtree, k, targetVariable, allowedInputVariables, problemDataClone.ClassValues.ToArray())); 86 public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k) { 87 return new NearestNeighbourModel(problemData.Dataset, 88 problemData.TrainingIndices, 89 k, 90 problemData.TargetVariable, 91 problemData.AllowedInputVariables, 92 problemData.ClassValues.ToArray()); 111 93 } 112 94 #endregion -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs
r7294 r8477 33 33 /// </summary> 34 34 [StorableClass] 35 [Item("NearestNeighbourModel", "Represents a ne ural networkfor regression and classification.")]35 [Item("NearestNeighbourModel", "Represents a nearest neighbour model for regression and classification.")] 36 36 public sealed class NearestNeighbourModel : NamedItem, INearestNeighbourModel { 37 37 … … 56 56 [Storable] 57 57 private int k; 58 58 59 [StorableConstructor] 59 60 private NearestNeighbourModel(bool deserializing) … … 95 96 this.classValues = (double[])original.classValues.Clone(); 96 97 } 97 public NearestNeighbourModel(alglib.nearestneighbor.kdtree kdTree, int k, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues = null) 98 : base() { 99 this.name = ItemName; 100 this.description = ItemDescription; 101 this.kdTree = kdTree; 98 public NearestNeighbourModel(Dataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues = null) { 99 Name = ItemName; 100 Description = ItemDescription; 102 101 this.k = k; 103 102 this.targetVariable = targetVariable; 104 103 this.allowedInputVariables = allowedInputVariables.ToArray(); 105 if (classValues != null) 104 105 var inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, 106 allowedInputVariables.Concat(new string[] { targetVariable }), 107 rows); 108 109 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 110 throw new NotSupportedException( 111 "Nearest neighbour classification does not support NaN or infinity values in the input dataset."); 112 113 this.kdTree = new alglib.nearestneighbor.kdtree(); 114 115 var nRows = inputMatrix.GetLength(0); 116 var nFeatures = inputMatrix.GetLength(1) - 1; 117 118 if (classValues != null) { 106 119 this.classValues = (double[])classValues.Clone(); 120 int nClasses = classValues.Length; 121 // map original class values to values [0..nClasses-1] 122 var classIndices = new Dictionary<double, double>(); 123 for (int i = 0; i < nClasses; i++) 124 classIndices[classValues[i]] = i; 125 126 for (int row = 0; row < nRows; row++) { 127 inputMatrix[row, nFeatures] = classIndices[inputMatrix[row, nFeatures]]; 128 } 129 } 130 alglib.nearestneighbor.kdtreebuild(inputMatrix, nRows, inputMatrix.GetLength(1) - 1, 1, 2, kdTree); 107 131 } 108 132 … … 140 164 141 165 public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) { 166 if (classValues == null) throw new InvalidOperationException("No class values are defined."); 142 167 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 143 168 -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs
r8430 r8477 21 21 22 22 using System; 23 using System.Collections.Generic;24 using System.Linq;25 23 using HeuristicLab.Common; 26 24 using HeuristicLab.Core; 27 25 using HeuristicLab.Data; 28 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;29 26 using HeuristicLab.Optimization; 27 using HeuristicLab.Parameters; 30 28 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 31 29 using HeuristicLab.Problems.DataAnalysis; 32 using HeuristicLab.Problems.DataAnalysis.Symbolic;33 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;34 using HeuristicLab.Parameters;35 30 36 31 namespace HeuristicLab.Algorithms.DataAnalysis { … … 84 79 85 80 public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k) { 86 Dataset dataset = problemData.Dataset; 87 string targetVariable = problemData.TargetVariable; 88 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 89 IEnumerable<int> rows = problemData.TrainingIndices; 90 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 91 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 92 throw new NotSupportedException("Nearest neighbour regression does not support NaN or infinity values in the input dataset."); 81 var clonedProblemData = (IRegressionProblemData)problemData.Clone(); 82 return new NearestNeighbourRegressionSolution(clonedProblemData, Train(problemData, k)); 83 } 93 84 94 alglib.nearestneighbor.kdtree kdtree = new alglib.nearestneighbor.kdtree(); 95 96 int nRows = inputMatrix.GetLength(0); 97 98 alglib.nearestneighbor.kdtreebuild(inputMatrix, nRows, inputMatrix.GetLength(1) - 1, 1, 2, kdtree); 99 100 return new NearestNeighbourRegressionSolution((IRegressionProblemData)problemData.Clone(), new NearestNeighbourModel(kdtree, k, targetVariable, allowedInputVariables)); 85 public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k) { 86 return new NearestNeighbourModel(problemData.Dataset, 87 problemData.TrainingIndices, 88 k, 89 problemData.TargetVariable, 90 problemData.AllowedInputVariables); 101 91 } 102 92 #endregion -
branches/HeuristicLab.TimeSeries/HeuristicLab.Algorithms.DataAnalysis/3.4/Plugin.cs.frame
r8430 r8477 28 28 [Plugin("HeuristicLab.Algorithms.DataAnalysis", "Provides wrappers for data analysis algorithms implemented in external libraries (linear regression, linear discriminant analysis, k-means clustering, support vector classification and regression)", "3.4.3.$WCREV$")] 29 29 [PluginFile("HeuristicLab.Algorithms.DataAnalysis-3.4.dll", PluginFileType.Assembly)] 30 [PluginDependency("HeuristicLab.ALGLIB", "3. 5.0")]30 [PluginDependency("HeuristicLab.ALGLIB", "3.6.0")] 31 31 [PluginDependency("HeuristicLab.Algorithms.GradientDescent", "3.3")] 32 32 [PluginDependency("HeuristicLab.Analysis", "3.3")] … … 47 47 [PluginDependency("HeuristicLab.Problems.DataAnalysis.Symbolic.Regression", "3.4")] 48 48 [PluginDependency("HeuristicLab.LibSVM", "1.6.3")] 49 [PluginDependency("HeuristicLab.Random", "3.3")] 49 50 public class HeuristicLabAlgorithmsDataAnalysisPlugin : PluginBase { 50 51 }
Note: See TracChangeset
for help on using the changeset viewer.