Free cookie consent management tool by TermsFeed Policy Generator

source: branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/LossFunctions/LogisticRegressionLoss.cs @ 12607

Last change on this file since 12607 was 12607, checked in by gkronber, 9 years ago

#2261: also use line search function for the initial estimation f0, changed logistic regression loss function to match description in GBM paper, comments and code improvements

File size: 4.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 * and the BEACON Center for the Study of Evolution in Action.
5 *
6 * This file is part of HeuristicLab.
7 *
8 * HeuristicLab is free software: you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation, either version 3 of the License, or
11 * (at your option) any later version.
12 *
13 * HeuristicLab is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
20 */
21#endregion
22
23using System;
24using System.Collections.Generic;
25using System.Diagnostics;
26using System.Linq;
27using HeuristicLab.Common;
28
29namespace HeuristicLab.Algorithms.DataAnalysis {
30  // Greedy Function Approximation: A Gradient Boosting Machine (page 9)
31  public class LogisticRegressionLoss : ILossFunction {
32    public double GetLoss(IEnumerable<double> target, IEnumerable<double> pred, IEnumerable<double> weight) {
33      var targetEnum = target.GetEnumerator();
34      var predEnum = pred.GetEnumerator();
35      var weightEnum = weight.GetEnumerator();
36
37      double s = 0;
38      while (targetEnum.MoveNext() & predEnum.MoveNext() & weightEnum.MoveNext()) {
39        Debug.Assert(targetEnum.Current.IsAlmost(0.0) || targetEnum.Current.IsAlmost(1.0), "labels must be 0 or 1 for logistic regression loss");
40
41        var y = targetEnum.Current * 2 - 1; // y in {-1,1}
42        s += weightEnum.Current * Math.Log(1 + Math.Exp(-2 * y * predEnum.Current));
43      }
44      if (targetEnum.MoveNext() | predEnum.MoveNext() | weightEnum.MoveNext())
45        throw new ArgumentException("target, pred and weight have different lengths");
46
47      return s;
48    }
49
50    public IEnumerable<double> GetLossGradient(IEnumerable<double> target, IEnumerable<double> pred, IEnumerable<double> weight) {
51      var targetEnum = target.GetEnumerator();
52      var predEnum = pred.GetEnumerator();
53      var weightEnum = weight.GetEnumerator();
54
55      while (targetEnum.MoveNext() & predEnum.MoveNext() & weightEnum.MoveNext()) {
56        Debug.Assert(targetEnum.Current.IsAlmost(0.0) || targetEnum.Current.IsAlmost(1.0), "labels must be 0 or 1 for logistic regression loss");
57        var y = targetEnum.Current * 2 - 1; // y in {-1,1}
58
59        yield return weightEnum.Current * 2 * y / (1 + Math.Exp(2 * y * predEnum.Current));
60
61      }
62      if (targetEnum.MoveNext() | predEnum.MoveNext() | weightEnum.MoveNext())
63        throw new ArgumentException("target, pred and weight have different lengths");
64    }
65
66    public LineSearchFunc GetLineSearchFunc(IEnumerable<double> target, IEnumerable<double> pred, IEnumerable<double> weight) {
67      var targetArr = target.ToArray();
68      var predArr = pred.ToArray();
69      var weightArr = weight.ToArray();
70      // weights are not supported yet
71      // when weights are supported we need to calculate a weighted median
72      Debug.Assert(weightArr.All(w => w.IsAlmost(1.0)));
73
74      if (targetArr.Length != predArr.Length || predArr.Length != weightArr.Length)
75        throw new ArgumentException("target, pred and weight have different lengths");
76
77      // "Simple Newton-Raphson step" of eqn. 23
78      LineSearchFunc lineSearch = (idx, startIdx, endIdx) => {
79        double sumY = 0.0;
80        double sumDiff = 0.0;
81        for (int i = startIdx; i <= endIdx; i++) {
82          var row = idx[i];
83          var y = targetArr[row] * 2 - 1; // y in {-1,1}
84          var pseudoResponse = weightArr[row] * 2 * y / (1 + Math.Exp(2 * y * predArr[row]));
85
86          sumY += pseudoResponse;
87          sumDiff += Math.Abs(pseudoResponse) * (2 - Math.Abs(pseudoResponse));
88        }
89        // prevent divByZero
90        sumDiff = Math.Max(1E-12, sumDiff);
91        return sumY / sumDiff;
92      };
93      return lineSearch;
94
95    }
96
97    public override string ToString() {
98      return "Logistic regression loss";
99    }
100  }
101}
Note: See TracBrowser for help on using the repository browser.