Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
07/06/15 15:20:28 (9 years ago)
Author:
gkronber
Message:

#2261: also use line search function for the initial estimation f0, changed logistic regression loss function to match description in GBM paper, comments and code improvements

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/LossFunctions/LogisticRegressionLoss.cs

    r12590 r12607  
    2828
    2929namespace HeuristicLab.Algorithms.DataAnalysis {
     30  // Greedy Function Approximation: A Gradient Boosting Machine (page 9)
    3031  public class LogisticRegressionLoss : ILossFunction {
    3132    public double GetLoss(IEnumerable<double> target, IEnumerable<double> pred, IEnumerable<double> weight) {
     
    3637      double s = 0;
    3738      while (targetEnum.MoveNext() & predEnum.MoveNext() & weightEnum.MoveNext()) {
    38         // assert target == 0 or target == 1
    39         if (!targetEnum.Current.IsAlmost(0.0) && !targetEnum.Current.IsAlmost(1.0))
    40           throw new NotSupportedException("labels must be 0 or 1 for logistic regression loss");
    41         double f = Math.Max(-7, Math.Min(7, predEnum.Current)); // threshold for exponent
    42         var probPos = Math.Exp(2 * f) / (1 + Math.Exp(2 * f));
    43         s += weightEnum.Current * (-targetEnum.Current * Math.Log(probPos) - (1 - targetEnum.Current) * Math.Log(1 - probPos));
     39        Debug.Assert(targetEnum.Current.IsAlmost(0.0) || targetEnum.Current.IsAlmost(1.0), "labels must be 0 or 1 for logistic regression loss");
     40
     41        var y = targetEnum.Current * 2 - 1; // y in {-1,1}
     42        s += weightEnum.Current * Math.Log(1 + Math.Exp(-2 * y * predEnum.Current));
    4443      }
    4544      if (targetEnum.MoveNext() | predEnum.MoveNext() | weightEnum.MoveNext())
    46         throw new ArgumentException("target, pred and weight have differing lengths");
     45        throw new ArgumentException("target, pred and weight have different lengths");
    4746
    4847      return s;
     
    5554
    5655      while (targetEnum.MoveNext() & predEnum.MoveNext() & weightEnum.MoveNext()) {
    57         // assert target == 0 or target == 1
    58         if (!targetEnum.Current.IsAlmost(0.0) && !targetEnum.Current.IsAlmost(1.0))
    59           throw new NotSupportedException("labels must be 0 or 1 for logistic regression loss");
    60         double f = Math.Max(-7, Math.Min(7, predEnum.Current)); // threshold for exponent
    61         var probPos = Math.Exp(2 * f) / (1 + Math.Exp(2 * f));
    62         yield return weightEnum.Current * (targetEnum.Current - probPos) / (probPos * probPos - probPos);
     56        Debug.Assert(targetEnum.Current.IsAlmost(0.0) || targetEnum.Current.IsAlmost(1.0), "labels must be 0 or 1 for logistic regression loss");
     57        var y = targetEnum.Current * 2 - 1; // y in {-1,1}
     58
     59        yield return weightEnum.Current * 2 * y / (1 + Math.Exp(2 * y * predEnum.Current));
     60
    6361      }
    6462      if (targetEnum.MoveNext() | predEnum.MoveNext() | weightEnum.MoveNext())
    65         throw new ArgumentException("target, pred and weight have differing lengths");
     63        throw new ArgumentException("target, pred and weight have different lengths");
    6664    }
    6765
     
    7573
    7674      if (targetArr.Length != predArr.Length || predArr.Length != weightArr.Length)
    77         throw new ArgumentException("target, pred and weight have differing lengths");
     75        throw new ArgumentException("target, pred and weight have different lengths");
    7876
    79       // line search for abs error
     77      // "Simple Newton-Raphson step" of eqn. 23
    8078      LineSearchFunc lineSearch = (idx, startIdx, endIdx) => {
    8179        double sumY = 0.0;
    8280        double sumDiff = 0.0;
    8381        for (int i = startIdx; i <= endIdx; i++) {
    84           var yi = (targetArr[idx[i]] - predArr[idx[i]]);
    85           var wi = weightArr[idx[i]];
     82          var row = idx[i];
     83          var y = targetArr[row] * 2 - 1; // y in {-1,1}
     84          var pseudoResponse = weightArr[row] * 2 * y / (1 + Math.Exp(2 * y * predArr[row]));
    8685
    87           sumY += wi * yi;
    88           sumDiff += wi * Math.Abs(yi) * (1 - Math.Abs(yi));
    89 
     86          sumY += pseudoResponse;
     87          sumDiff += Math.Abs(pseudoResponse) * (2 - Math.Abs(pseudoResponse));
    9088        }
    9189        // prevent divByZero
    9290        sumDiff = Math.Max(1E-12, sumDiff);
    93         return 0.5 * sumY / sumDiff;
     91        return sumY / sumDiff;
    9492      };
    9593      return lineSearch;
Note: See TracChangeset for help on using the changeset viewer.