- Timestamp:
- 07/09/15 16:11:52 (9 years ago)
- Location:
- branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees
- Files:
-
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithmStatic.cs
r12661 r12696 55 55 internal double[] pred; 56 56 internal double[] predTest; 57 internal double[] w;58 57 internal double[] y; 59 58 internal int[] activeIdx; … … 77 76 78 77 y = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray(); 79 // weights are all 1 for now (HL doesn't support weights yet)80 w = Enumerable.Repeat(1.0, nRows).ToArray();81 78 82 79 treeBuilder = new RegressionTreeBuilder(problemData, random); … … 85 82 86 83 var zeros = Enumerable.Repeat(0.0, nRows); 87 var ones = Enumerable.Repeat(1.0, nRows); 88 double f0 = lossFunction.GetLineSearchFunc(y, zeros, ones)(activeIdx, 0, nRows - 1); // initial constant value (mean for squared errors) 84 double f0 = lossFunction.GetLineSearchFunc(y, zeros)(activeIdx, 0, nRows - 1); // initial constant value (mean for squared errors) 89 85 pred = Enumerable.Repeat(f0, nRows).ToArray(); 90 86 predTest = Enumerable.Repeat(f0, problemData.TestIndices.Count()).ToArray(); … … 107 103 public double GetTrainLoss() { 108 104 int nRows = y.Length; 109 return lossFunction.GetLoss(y, pred , w) / nRows;105 return lossFunction.GetLoss(y, pred) / nRows; 110 106 } 111 107 public double GetTestLoss() { 112 108 var yTest = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TestIndices); 113 var wTest = problemData.TestIndices.Select(_ => 1.0); // ones114 109 var nRows = problemData.TestIndices.Count(); 115 return lossFunction.GetLoss(yTest, predTest , wTest) / nRows;110 return lossFunction.GetLoss(yTest, predTest) / nRows; 116 111 } 117 112 } … … 160 155 var yPred = gbmState.pred; 161 156 var yPredTest = gbmState.predTest; 162 var w = gbmState.w;163 157 var treeBuilder = gbmState.treeBuilder; 164 158 var y = gbmState.y; … … 168 162 // copy output of gradient function to pre-allocated rim array (pseudo-residuals) 169 163 int rimIdx = 0; 170 foreach (var g in lossFunction.GetLossGradient(y, yPred , w)) {164 foreach (var g in lossFunction.GetLossGradient(y, yPred)) { 171 165 pseudoRes[rimIdx++] = g; 172 166 } 173 167 174 var tree = treeBuilder.CreateRegressionTreeForGradientBoosting(pseudoRes, maxSize, activeIdx, lossFunction.GetLineSearchFunc(y, yPred , w), r, m);168 var tree = treeBuilder.CreateRegressionTreeForGradientBoosting(pseudoRes, maxSize, activeIdx, lossFunction.GetLineSearchFunc(y, yPred), r, m); 175 169 176 170 int i = 0; -
branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/LossFunctions/AbsoluteErrorLoss.cs
r12635 r12696 30 30 // loss function for the weighted absolute error 31 31 public class AbsoluteErrorLoss : ILossFunction { 32 public double GetLoss(IEnumerable<double> target, IEnumerable<double> pred , IEnumerable<double> weight) {32 public double GetLoss(IEnumerable<double> target, IEnumerable<double> pred) { 33 33 var targetEnum = target.GetEnumerator(); 34 34 var predEnum = pred.GetEnumerator(); 35 var weightEnum = weight.GetEnumerator();36 35 37 36 double s = 0; 38 while (targetEnum.MoveNext() & predEnum.MoveNext() & weightEnum.MoveNext()) {37 while (targetEnum.MoveNext() & predEnum.MoveNext()) { 39 38 double res = targetEnum.Current - predEnum.Current; 40 s += weightEnum.Current * Math.Abs(res); // w *|res|39 s += Math.Abs(res); // |res| 41 40 } 42 if (targetEnum.MoveNext() | predEnum.MoveNext() | weightEnum.MoveNext())43 throw new ArgumentException("target , pred and weighthave differing lengths");41 if (targetEnum.MoveNext() | predEnum.MoveNext()) 42 throw new ArgumentException("target and pred have differing lengths"); 44 43 45 44 return s; 46 45 } 47 46 48 public IEnumerable<double> GetLossGradient(IEnumerable<double> target, IEnumerable<double> pred , IEnumerable<double> weight) {47 public IEnumerable<double> GetLossGradient(IEnumerable<double> target, IEnumerable<double> pred) { 49 48 var targetEnum = target.GetEnumerator(); 50 49 var predEnum = pred.GetEnumerator(); 51 var weightEnum = weight.GetEnumerator();52 50 53 while (targetEnum.MoveNext() & predEnum.MoveNext() & weightEnum.MoveNext()) {54 // dL(y, f(x)) / df(x) = weight *sign(res)51 while (targetEnum.MoveNext() & predEnum.MoveNext()) { 52 // dL(y, f(x)) / df(x) = sign(res) 55 53 var res = targetEnum.Current - predEnum.Current; 56 if (res > 0) yield return weightEnum.Current;57 else if (res < 0) yield return - weightEnum.Current;54 if (res > 0) yield return 1.0; 55 else if (res < 0) yield return -1.0; 58 56 else yield return 0.0; 59 57 } 60 if (targetEnum.MoveNext() | predEnum.MoveNext() | weightEnum.MoveNext())61 throw new ArgumentException("target , pred and weighthave differing lengths");58 if (targetEnum.MoveNext() | predEnum.MoveNext()) 59 throw new ArgumentException("target and pred have differing lengths"); 62 60 } 63 61 64 62 // return median of residuals 65 public LineSearchFunc GetLineSearchFunc(IEnumerable<double> target, IEnumerable<double> pred , IEnumerable<double> weight) {63 public LineSearchFunc GetLineSearchFunc(IEnumerable<double> target, IEnumerable<double> pred) { 66 64 var targetArr = target.ToArray(); 67 65 var predArr = pred.ToArray(); 68 var weightArr = weight.ToArray();69 // the optimal constant value that should be added to the predictions is the median of the residuals70 // weights are not supported yet (need to calculate a weighted median)71 Debug.Assert(weightArr.All(w => w.IsAlmost(1.0)));72 66 73 if (targetArr.Length != predArr.Length || predArr.Length != weightArr.Length)74 throw new ArgumentException("target , pred and weighthave differing lengths");67 if (targetArr.Length != predArr.Length) 68 throw new ArgumentException("target and pred have differing lengths"); 75 69 76 70 // line search for abs error -
branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/LossFunctions/ILossFunction.cs
r12607 r12696 30 30 // target represents the target vector (original targets from the problem data, never changed) 31 31 // pred represents the current vector of predictions (a weighted combination of models learned so far, this vector is updated after each step) 32 // weight represents a weight vector for rows (this is not supported yet -> all weights are 1)33 32 public interface ILossFunction { 34 // returns the weightedloss of the current prediction vector35 double GetLoss(IEnumerable<double> target, IEnumerable<double> pred , IEnumerable<double> weight);33 // returns the loss of the current prediction vector 34 double GetLoss(IEnumerable<double> target, IEnumerable<double> pred); 36 35 37 // returns an enumerable of the weightedloss gradient for each row38 IEnumerable<double> GetLossGradient(IEnumerable<double> target, IEnumerable<double> pred , IEnumerable<double> weight);36 // returns an enumerable of the loss gradient for each row 37 IEnumerable<double> GetLossGradient(IEnumerable<double> target, IEnumerable<double> pred); 39 38 40 39 // returns a function that returns the optimal prediction value for a subset of rows from target and pred (see LineSearchFunc delegate above) 41 LineSearchFunc GetLineSearchFunc(IEnumerable<double> target, IEnumerable<double> pred , IEnumerable<double> weight);40 LineSearchFunc GetLineSearchFunc(IEnumerable<double> target, IEnumerable<double> pred); 42 41 } 43 42 } -
branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/LossFunctions/LogisticRegressionLoss.cs
r12607 r12696 30 30 // Greedy Function Approximation: A Gradient Boosting Machine (page 9) 31 31 public class LogisticRegressionLoss : ILossFunction { 32 public double GetLoss(IEnumerable<double> target, IEnumerable<double> pred , IEnumerable<double> weight) {32 public double GetLoss(IEnumerable<double> target, IEnumerable<double> pred) { 33 33 var targetEnum = target.GetEnumerator(); 34 34 var predEnum = pred.GetEnumerator(); 35 var weightEnum = weight.GetEnumerator();36 35 37 36 double s = 0; 38 while (targetEnum.MoveNext() & predEnum.MoveNext() & weightEnum.MoveNext()) {37 while (targetEnum.MoveNext() & predEnum.MoveNext()) { 39 38 Debug.Assert(targetEnum.Current.IsAlmost(0.0) || targetEnum.Current.IsAlmost(1.0), "labels must be 0 or 1 for logistic regression loss"); 40 39 41 40 var y = targetEnum.Current * 2 - 1; // y in {-1,1} 42 s += weightEnum.Current *Math.Log(1 + Math.Exp(-2 * y * predEnum.Current));41 s += Math.Log(1 + Math.Exp(-2 * y * predEnum.Current)); 43 42 } 44 if (targetEnum.MoveNext() | predEnum.MoveNext() | weightEnum.MoveNext())45 throw new ArgumentException("target , pred and weighthave different lengths");43 if (targetEnum.MoveNext() | predEnum.MoveNext()) 44 throw new ArgumentException("target and pred have different lengths"); 46 45 47 46 return s; 48 47 } 49 48 50 public IEnumerable<double> GetLossGradient(IEnumerable<double> target, IEnumerable<double> pred , IEnumerable<double> weight) {49 public IEnumerable<double> GetLossGradient(IEnumerable<double> target, IEnumerable<double> pred) { 51 50 var targetEnum = target.GetEnumerator(); 52 51 var predEnum = pred.GetEnumerator(); 53 var weightEnum = weight.GetEnumerator();54 52 55 while (targetEnum.MoveNext() & predEnum.MoveNext() & weightEnum.MoveNext()) {53 while (targetEnum.MoveNext() & predEnum.MoveNext()) { 56 54 Debug.Assert(targetEnum.Current.IsAlmost(0.0) || targetEnum.Current.IsAlmost(1.0), "labels must be 0 or 1 for logistic regression loss"); 57 55 var y = targetEnum.Current * 2 - 1; // y in {-1,1} 58 56 59 yield return weightEnum.Current *2 * y / (1 + Math.Exp(2 * y * predEnum.Current));57 yield return 2 * y / (1 + Math.Exp(2 * y * predEnum.Current)); 60 58 61 59 } 62 if (targetEnum.MoveNext() | predEnum.MoveNext() | weightEnum.MoveNext())63 throw new ArgumentException("target , pred and weighthave different lengths");60 if (targetEnum.MoveNext() | predEnum.MoveNext()) 61 throw new ArgumentException("target and pred have different lengths"); 64 62 } 65 63 66 public LineSearchFunc GetLineSearchFunc(IEnumerable<double> target, IEnumerable<double> pred , IEnumerable<double> weight) {64 public LineSearchFunc GetLineSearchFunc(IEnumerable<double> target, IEnumerable<double> pred) { 67 65 var targetArr = target.ToArray(); 68 66 var predArr = pred.ToArray(); 69 var weightArr = weight.ToArray();70 // weights are not supported yet71 // when weights are supported we need to calculate a weighted median72 Debug.Assert(weightArr.All(w => w.IsAlmost(1.0)));73 67 74 if (targetArr.Length != predArr.Length || predArr.Length != weightArr.Length)75 throw new ArgumentException("target , pred and weighthave different lengths");68 if (targetArr.Length != predArr.Length) 69 throw new ArgumentException("target and pred have different lengths"); 76 70 77 71 // "Simple Newton-Raphson step" of eqn. 23 … … 82 76 var row = idx[i]; 83 77 var y = targetArr[row] * 2 - 1; // y in {-1,1} 84 var pseudoResponse = weightArr[row] *2 * y / (1 + Math.Exp(2 * y * predArr[row]));78 var pseudoResponse = 2 * y / (1 + Math.Exp(2 * y * predArr[row])); 85 79 86 80 sumY += pseudoResponse; -
branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/LossFunctions/RelativeErrorLoss.cs
r12635 r12696 30 30 // relative error loss is a special case of weighted absolute error loss with weights = (1/target) 31 31 public class RelativeErrorLoss : ILossFunction { 32 public double GetLoss(IEnumerable<double> target, IEnumerable<double> pred , IEnumerable<double> weight) {32 public double GetLoss(IEnumerable<double> target, IEnumerable<double> pred) { 33 33 var targetEnum = target.GetEnumerator(); 34 34 var predEnum = pred.GetEnumerator(); 35 var weightEnum = weight.GetEnumerator();36 35 37 36 double s = 0; 38 while (targetEnum.MoveNext() & predEnum.MoveNext() & weightEnum.MoveNext()) {37 while (targetEnum.MoveNext() & predEnum.MoveNext()) { 39 38 double res = targetEnum.Current - predEnum.Current; 40 s += weightEnum.Current *Math.Abs(res) * Math.Abs(1.0 / targetEnum.Current);39 s += Math.Abs(res) * Math.Abs(1.0 / targetEnum.Current); 41 40 } 42 if (targetEnum.MoveNext() | predEnum.MoveNext() | weightEnum.MoveNext())43 throw new ArgumentException("target , pred and weighthave different lengths");41 if (targetEnum.MoveNext() | predEnum.MoveNext()) 42 throw new ArgumentException("target and pred have different lengths"); 44 43 45 44 return s; 46 45 } 47 46 48 public IEnumerable<double> GetLossGradient(IEnumerable<double> target, IEnumerable<double> pred , IEnumerable<double> weight) {47 public IEnumerable<double> GetLossGradient(IEnumerable<double> target, IEnumerable<double> pred) { 49 48 var targetEnum = target.GetEnumerator(); 50 49 var predEnum = pred.GetEnumerator(); 51 var weightEnum = weight.GetEnumerator();52 50 53 while (targetEnum.MoveNext() & predEnum.MoveNext() & weightEnum.MoveNext()) {54 // weight *sign(res) * abs(1 / target)51 while (targetEnum.MoveNext() & predEnum.MoveNext()) { 52 // sign(res) * abs(1 / target) 55 53 var res = targetEnum.Current - predEnum.Current; 56 if (res > 0) yield return weightEnum.Current *1.0 / Math.Abs(targetEnum.Current);57 else if (res < 0) yield return - weightEnum.Current *1.0 / Math.Abs(targetEnum.Current);54 if (res > 0) yield return 1.0 / Math.Abs(targetEnum.Current); 55 else if (res < 0) yield return -1.0 / Math.Abs(targetEnum.Current); 58 56 else yield return 0.0; 59 57 } 60 if (targetEnum.MoveNext() | predEnum.MoveNext() | weightEnum.MoveNext())61 throw new ArgumentException("target , pred and weighthave different lengths");58 if (targetEnum.MoveNext() | predEnum.MoveNext()) 59 throw new ArgumentException("target and pred have different lengths"); 62 60 } 63 61 64 public LineSearchFunc GetLineSearchFunc(IEnumerable<double> target, IEnumerable<double> pred , IEnumerable<double> weight) {62 public LineSearchFunc GetLineSearchFunc(IEnumerable<double> target, IEnumerable<double> pred) { 65 63 var targetArr = target.ToArray(); 66 64 var predArr = pred.ToArray(); 67 var weightArr = weight.ToArray();68 Debug.Assert(weightArr.All(w => w.IsAlmost(1.0)));69 65 70 if (targetArr.Length != predArr.Length || predArr.Length != weightArr.Length)71 throw new ArgumentException("target , pred and weighthave different lengths");66 if (targetArr.Length != predArr.Length) 67 throw new ArgumentException("target and pred have different lengths"); 72 68 73 69 // line search for relative error … … 79 75 else if (nRows == 2) { 80 76 // weighted average of two residuals 81 var w0 = weightArr[idx[startIdx]] *Math.Abs(1.0 / targetArr[idx[startIdx]]);82 var w1 = weightArr[idx[endIdx]] *Math.Abs(1.0 / targetArr[idx[endIdx]]);77 var w0 = Math.Abs(1.0 / targetArr[idx[startIdx]]); 78 var w1 = Math.Abs(1.0 / targetArr[idx[endIdx]]); 83 79 if (w0 > w1) { 84 80 return targetArr[idx[startIdx]] - predArr[idx[startIdx]]; … … 96 92 int row = idx[i]; 97 93 var res = targetArr[row] - predArr[row]; 98 var w = weightArr[row] *Math.Abs(1.0 / targetArr[row]);94 var w = Math.Abs(1.0 / targetArr[row]); 99 95 res_w[i - startIdx] = new KeyValuePair<double, double>(res, w); 100 96 totalWeight += w; -
branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/LossFunctions/SquaredErrorLoss.cs
r12607 r12696 27 27 namespace HeuristicLab.Algorithms.DataAnalysis { 28 28 public class SquaredErrorLoss : ILossFunction { 29 public double GetLoss(IEnumerable<double> target, IEnumerable<double> pred , IEnumerable<double> weight) {29 public double GetLoss(IEnumerable<double> target, IEnumerable<double> pred) { 30 30 var targetEnum = target.GetEnumerator(); 31 31 var predEnum = pred.GetEnumerator(); 32 var weightEnum = weight.GetEnumerator();33 32 34 33 double s = 0; 35 while (targetEnum.MoveNext() & predEnum.MoveNext() & weightEnum.MoveNext()) {34 while (targetEnum.MoveNext() & predEnum.MoveNext()) { 36 35 double res = targetEnum.Current - predEnum.Current; 37 s += weightEnum.Current * res * res; // w *(res)^236 s += res * res; // (res)^2 38 37 } 39 if (targetEnum.MoveNext() | predEnum.MoveNext() | weightEnum.MoveNext())40 throw new ArgumentException("target , pred and weighthave different lengths");38 if (targetEnum.MoveNext() | predEnum.MoveNext()) 39 throw new ArgumentException("target and pred have different lengths"); 41 40 42 41 return s; 43 42 } 44 43 45 public IEnumerable<double> GetLossGradient(IEnumerable<double> target, IEnumerable<double> pred , IEnumerable<double> weight) {44 public IEnumerable<double> GetLossGradient(IEnumerable<double> target, IEnumerable<double> pred) { 46 45 var targetEnum = target.GetEnumerator(); 47 46 var predEnum = pred.GetEnumerator(); 48 var weightEnum = weight.GetEnumerator();49 47 50 while (targetEnum.MoveNext() & predEnum.MoveNext() & weightEnum.MoveNext()) {51 yield return weightEnum.Current * 2.0 * (targetEnum.Current - predEnum.Current); // dL(y, f(x)) / df(x) = w *2 * res48 while (targetEnum.MoveNext() & predEnum.MoveNext()) { 49 yield return 2.0 * (targetEnum.Current - predEnum.Current); // dL(y, f(x)) / df(x) = 2 * res 52 50 } 53 if (targetEnum.MoveNext() | predEnum.MoveNext() | weightEnum.MoveNext())54 throw new ArgumentException("target , pred and weighthave different lengths");51 if (targetEnum.MoveNext() | predEnum.MoveNext()) 52 throw new ArgumentException("target and pred have different lengths"); 55 53 } 56 54 57 public LineSearchFunc GetLineSearchFunc(IEnumerable<double> target, IEnumerable<double> pred , IEnumerable<double> weight) {55 public LineSearchFunc GetLineSearchFunc(IEnumerable<double> target, IEnumerable<double> pred) { 58 56 var targetArr = target.ToArray(); 59 57 var predArr = pred.ToArray(); 60 var weightArr = weight.ToArray(); 61 if (targetArr.Length != predArr.Length || predArr.Length != weightArr.Length) 62 throw new ArgumentException("target, pred and weight have different lengths"); 58 if (targetArr.Length != predArr.Length) 59 throw new ArgumentException("target and pred have different lengths"); 63 60 64 61 // line search for squared error loss -
branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/RegressionTreeBuilder.cs
r12661 r12696 128 128 var seLoss = new SquaredErrorLoss(); 129 129 var zeros = Enumerable.Repeat(0.0, y.Length); 130 var ones = Enumerable.Repeat(1.0, y.Length); 131 132 var model = CreateRegressionTreeForGradientBoosting(y, maxSize, problemData.TrainingIndices.ToArray(), seLoss.GetLineSearchFunc(y, zeros, ones), r, m); 130 131 var model = CreateRegressionTreeForGradientBoosting(y, maxSize, problemData.TrainingIndices.ToArray(), seLoss.GetLineSearchFunc(y, zeros), r, m); 133 132 134 133 return new GradientBoostedTreesModel(new[] { new ConstantRegressionModel(yAvg), model }, new[] { 1.0, 1.0 });
Note: See TracChangeset
for help on using the changeset viewer.