Free cookie consent management tool by TermsFeed Policy Generator

Changeset 12590


Ignore:
Timestamp:
07/04/15 16:03:36 (9 years ago)
Author:
gkronber
Message:

#2261: preparations for trunk integration (adapt to current trunk version, add license headers, add comments, improve code quality)

Location:
branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees
Files:
10 edited

Legend:

Unmodified
Added
Removed
  • branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithm.cs

    r12373 r12590  
    2222
    2323using System;
    24 using System.Collections.Generic;
    25 using System.ComponentModel;
    26 using System.Diagnostics.Contracts;
    2724using System.Linq;
    2825using System.Threading;
    29 using GradientBoostedTrees;
    3026using HeuristicLab.Analysis;
    3127using HeuristicLab.Common;
     
    3733using HeuristicLab.PluginInfrastructure;
    3834using HeuristicLab.Problems.DataAnalysis;
    39 using HeuristicLab.Random;
    4035
    4136namespace HeuristicLab.Algorithms.DataAnalysis {
    42   [Item("Gradient Boosted Trees", "")]
     37  [Item("Gradient Boosted Trees", "Gradient boosted trees algorithm. Friedman, J. \"Greedy Function Approximation: A Gradient Boosting Machine\", IMS 1999 Reitz Lecture.")]
    4338  [StorableClass]
    44   [Creatable("Algorithms")]
     39  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 125)]
    4540  public class GradientBoostedTreesAlgorithm : BasicAlgorithm {
    4641    public override Type ProblemType {
     
    170165
    171166      var lossFunctionNames = ApplicationManager.Manager.GetInstances<ILossFunction>().Select(l => new StringValue(l.ToString()).AsReadOnly());
    172       var defaultLossFunction = lossFunctionNames.First(l => l.Value.Contains("Squared")); // squared error loss is the default
    173       Parameters.Add(new ConstrainedValueParameter<StringValue>(LossFunctionParameterName, "The loss function", new ItemSet<StringValue>(lossFunctionNames), defaultLossFunction));
     167      Parameters.Add(new ConstrainedValueParameter<StringValue>(LossFunctionParameterName, "The loss function", new ItemSet<StringValue>(lossFunctionNames)));
     168      LossFunctionParameter.ActualValue = LossFunctionParameter.ValidValues.First(l => l.Value.Contains("Squared")); // squared error loss is the default
    174169    }
    175170
     
    178173      // Set up the algorithm
    179174      if (SetSeedRandomly) Seed = new System.Random().Next();
    180       // random.Reset(Seed);
    181175
    182176      // Set up the results display
  • branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithmStatic.cs

    r12371 r12590  
    2525using System.Diagnostics.Contracts;
    2626using System.Linq;
    27 using GradientBoostedTrees;
    2827using HeuristicLab.Problems.DataAnalysis;
    2928using HeuristicLab.Random;
     
    3433
    3534    public interface IGbmState {
    36 
    3735      IRegressionModel GetModel();
    3836      double GetTrainLoss();
     
    4240
    4341    // created through factory method
     42    // GbmState details are private API users can only use methods from IGbmState
    4443    private class GbmState : IGbmState {
    4544      internal IRegressionProblemData problemData { get; set; }
    46       internal MersenneTwister random { get; set; }
     45      internal MersenneTwister random { get; private set; }
    4746      internal ILossFunction lossFunction { get; set; }
    4847      internal int maxDepth { get; set; }
     
    5049      internal double r { get; set; }
    5150      internal double m { get; set; }
    52       internal RegressionTreeBuilder treeBuilder;
     51      internal readonly RegressionTreeBuilder treeBuilder;
    5352
    5453
     
    156155    }
    157156
    158     // allow dynamic adaptation of maxDepth, nu and r
     157    // allow dynamic adaptation of maxDepth, nu and r (even though this is not used)
    159158    public static void MakeStep(IGbmState state, int maxDepth, double nu, double r, double m) {
    160159      var gbmState = state as GbmState;
  • branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesModel.cs

    r12589 r12590  
    1 using System;
     1#region License Information
     2/* HeuristicLab
     3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     4 * and the BEACON Center for the Study of Evolution in Action.
     5 *
     6 * This file is part of HeuristicLab.
     7 *
     8 * HeuristicLab is free software: you can redistribute it and/or modify
     9 * it under the terms of the GNU General Public License as published by
     10 * the Free Software Foundation, either version 3 of the License, or
     11 * (at your option) any later version.
     12 *
     13 * HeuristicLab is distributed in the hope that it will be useful,
     14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16 * GNU General Public License for more details.
     17 *
     18 * You should have received a copy of the GNU General Public License
     19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
     20 */
     21#endregion
     22
     23using System;
    224using System.Collections.Generic;
    325using System.Linq;
     
    729using HeuristicLab.Problems.DataAnalysis;
    830
    9 namespace GradientBoostedTrees {
     31namespace HeuristicLab.Algorithms.DataAnalysis {
    1032  [StorableClass]
    1133  [Item("Gradient boosted tree model", "")]
     34  // this is essentially a collection of weighted regression models
    1235  public sealed class GradientBoostedTreesModel : NamedItem, IRegressionModel {
    13 
    1436    [Storable]
    1537    private readonly IList<IRegressionModel> models;
     
    4062
    4163    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    42       var tuples = (from idx in Enumerable.Range(0, models.Count)
    43                     let model = models[idx]
    44                     let weight = weights[idx]
    45                     select new { weight, enumerator = model.GetEstimatedValues(dataset, rows).GetEnumerator() }).ToArray();
    46 
    47 
    48       while (tuples.All(t => t.enumerator.MoveNext())) {
    49         yield return tuples.Sum(t => t.weight * t.enumerator.Current);
     64      // allocate target array go over all models and add up weighted estimation for each row
     65      var res = new double[rows.Count()];
     66      for (int i = 0; i < models.Count; i++) {
     67        var w = weights[i];
     68        var m = models[i];
     69        int r = 0;
     70        foreach (var est in m.GetEstimatedValues(dataset, rows)) {
     71          res[r++] += w * est;
     72        }
    5073      }
     74      return res;
    5175    }
    5276
  • branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/LossFunctions/AbsoluteErrorLoss.cs

    r12374 r12590  
    1 using System;
     1#region License Information
     2/* HeuristicLab
     3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     4 * and the BEACON Center for the Study of Evolution in Action.
     5 *
     6 * This file is part of HeuristicLab.
     7 *
     8 * HeuristicLab is free software: you can redistribute it and/or modify
     9 * it under the terms of the GNU General Public License as published by
     10 * the Free Software Foundation, either version 3 of the License, or
     11 * (at your option) any later version.
     12 *
     13 * HeuristicLab is distributed in the hope that it will be useful,
     14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16 * GNU General Public License for more details.
     17 *
     18 * You should have received a copy of the GNU General Public License
     19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
     20 */
     21#endregion
     22
     23using System;
    224using System.Collections.Generic;
    325using System.Diagnostics;
    426using System.Linq;
    5 using System.Text;
    6 using System.Threading.Tasks;
    727using HeuristicLab.Common;
    8 using HeuristicLab.Core;
    928
    10 namespace GradientBoostedTrees {
     29namespace HeuristicLab.Algorithms.DataAnalysis {
     30  // loss function for the weighted absolute error
    1131  public class AbsoluteErrorLoss : ILossFunction {
    1232    public double GetLoss(IEnumerable<double> target, IEnumerable<double> pred, IEnumerable<double> weight) {
     
    5474      // line search for abs error
    5575      LineSearchFunc lineSearch = (idx, startIdx, endIdx) => {
     76        // Median() is allocating an array anyway
     77        // It would be possible to pre-allocated an array for the residuals if Median() would allow specification of a sub-range
    5678        int nRows = endIdx - startIdx + 1;
    57         var res = from offset in Enumerable.Range(0, nRows)
    58                   let i = startIdx + offset
    59                   let row = idx[i]
    60                   select (targetArr[row] - predArr[row]);
     79        var res = new double[nRows];
     80        for (int offset = 0; offset < nRows; offset++) {
     81          var i = startIdx + offset;
     82          var row = idx[i];
     83          res[offset] = targetArr[row] - predArr[row];
     84        }
    6185        return res.Median();
    62 
    63         // old code for weighted median calculation
    64         // int nRows = endIdx - startIdx + 1; // startIdx and endIdx are inclusive
    65         // if (nRows == 1) return targetArr[idx[startIdx]] - predArr[idx[startIdx]];
    66         // else if (nRows == 2) {
    67         //   if (weightArr[idx[startIdx]] > weightArr[idx[endIdx]]) {
    68         //     return targetArr[idx[startIdx]] - predArr[idx[startIdx]];
    69         //   } else if (weightArr[idx[startIdx]] < weightArr[idx[endIdx]]) {
    70         //     return targetArr[idx[endIdx]] - predArr[idx[endIdx]];
    71         //   } else {
    72         //     // same weight
    73         //     return ((targetArr[idx[startIdx]] - predArr[idx[startIdx]]) +
    74         //        (targetArr[idx[endIdx]] - predArr[idx[endIdx]])) / 2;
    75         //   }
    76         // } else {
    77         //   var ts = from offset in Enumerable.Range(0, nRows)
    78         //            let i = startIdx + offset
    79         //            select new { res = targetArr[idx[i]] - predArr[idx[i]], weight = weightArr[idx[i]] };
    80         //   ts = ts.OrderBy(t => t.res);
    81         //   var totalWeight = ts.Sum(t => t.weight);
    82         //   var tsEnumerator = ts.GetEnumerator();
    83         //   tsEnumerator.MoveNext();
    84         //
    85         //   double aggWeight = tsEnumerator.Current.weight; // weight of first
    86         //
    87         //   while (aggWeight < totalWeight / 2) {
    88         //     tsEnumerator.MoveNext();
    89         //     aggWeight += tsEnumerator.Current.weight;
    90         //   }
    91         //   return tsEnumerator.Current.res;
    92         // }
    9386      };
    9487      return lineSearch;
  • branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/LossFunctions/ILossFunction.cs

    r12332 r12590  
    1 using System;
     1#region License Information
     2/* HeuristicLab
     3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     4 * and the BEACON Center for the Study of Evolution in Action.
     5 *
     6 * This file is part of HeuristicLab.
     7 *
     8 * HeuristicLab is free software: you can redistribute it and/or modify
     9 * it under the terms of the GNU General Public License as published by
     10 * the Free Software Foundation, either version 3 of the License, or
     11 * (at your option) any later version.
     12 *
     13 * HeuristicLab is distributed in the hope that it will be useful,
     14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16 * GNU General Public License for more details.
     17 *
     18 * You should have received a copy of the GNU General Public License
     19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
     20 */
     21#endregion
     22
    223using System.Collections.Generic;
    3 using System.Linq;
    4 using System.Text;
    5 using System.Threading.Tasks;
    6 using HeuristicLab.Core;
    724
    8 namespace GradientBoostedTrees {
     25namespace HeuristicLab.Algorithms.DataAnalysis {
    926  public delegate double LineSearchFunc(int[] idx, int startIdx, int endIdx);
    1027
    1128  public interface ILossFunction {
     29    // returns the weighted loss
    1230    double GetLoss(IEnumerable<double> target, IEnumerable<double> pred, IEnumerable<double> weight);
     31
     32    // returns an enumerable of the weighted loss gradient for each row
    1333    IEnumerable<double> GetLossGradient(IEnumerable<double> target, IEnumerable<double> pred, IEnumerable<double> weight);
     34
     35    // returns a function that returns the optimal prediction value for a subset of rows from target and pred (see LineSearchFunc delegate above)
    1436    LineSearchFunc GetLineSearchFunc(IEnumerable<double> target, IEnumerable<double> pred, IEnumerable<double> weight);
    1537  }
  • branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/LossFunctions/LogisticRegressionLoss.cs

    r12589 r12590  
    1 using System;
     1#region License Information
     2/* HeuristicLab
     3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     4 * and the BEACON Center for the Study of Evolution in Action.
     5 *
     6 * This file is part of HeuristicLab.
     7 *
     8 * HeuristicLab is free software: you can redistribute it and/or modify
     9 * it under the terms of the GNU General Public License as published by
     10 * the Free Software Foundation, either version 3 of the License, or
     11 * (at your option) any later version.
     12 *
     13 * HeuristicLab is distributed in the hope that it will be useful,
     14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16 * GNU General Public License for more details.
     17 *
     18 * You should have received a copy of the GNU General Public License
     19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
     20 */
     21#endregion
     22
     23using System;
    224using System.Collections.Generic;
    325using System.Diagnostics;
    426using System.Linq;
    5 using System.Text;
    6 using System.Threading.Tasks;
    727using HeuristicLab.Common;
    8 using HeuristicLab.Core;
    928
    10 namespace GradientBoostedTrees {
     29namespace HeuristicLab.Algorithms.DataAnalysis {
    1130  public class LogisticRegressionLoss : ILossFunction {
    1231    public double GetLoss(IEnumerable<double> target, IEnumerable<double> pred, IEnumerable<double> weight) {
  • branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/LossFunctions/RelativeErrorLoss.cs

    r12374 r12590  
    1 using System;
     1#region License Information
     2/* HeuristicLab
     3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     4 * and the BEACON Center for the Study of Evolution in Action.
     5 *
     6 * This file is part of HeuristicLab.
     7 *
     8 * HeuristicLab is free software: you can redistribute it and/or modify
     9 * it under the terms of the GNU General Public License as published by
     10 * the Free Software Foundation, either version 3 of the License, or
     11 * (at your option) any later version.
     12 *
     13 * HeuristicLab is distributed in the hope that it will be useful,
     14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16 * GNU General Public License for more details.
     17 *
     18 * You should have received a copy of the GNU General Public License
     19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
     20 */
     21#endregion
     22
     23using System;
    224using System.Collections.Generic;
    325using System.Diagnostics;
    426using System.Linq;
    5 using System.Text;
    6 using System.Threading.Tasks;
    727using HeuristicLab.Common;
    8 using HeuristicLab.Core;
    928
    10 namespace GradientBoostedTrees {
     29namespace HeuristicLab.Algorithms.DataAnalysis {
     30  // relative error loss is a special case of weighted absolute error loss
    1131  public class RelativeErrorLoss : ILossFunction {
    1232    public double GetLoss(IEnumerable<double> target, IEnumerable<double> pred, IEnumerable<double> weight) {
     
    5070        throw new ArgumentException("target, pred and weight have differing lengths");
    5171
    52       // line search for abs error
     72      // line search for relative error
     73      // TODO: check and improve?
    5374      LineSearchFunc lineSearch = (idx, startIdx, endIdx) => {
    5475        // weighted median calculation
  • branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/LossFunctions/SquaredErrorLoss.cs

    r12332 r12590  
    1 using System;
     1#region License Information
     2/* HeuristicLab
     3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     4 * and the BEACON Center for the Study of Evolution in Action.
     5 *
     6 * This file is part of HeuristicLab.
     7 *
     8 * HeuristicLab is free software: you can redistribute it and/or modify
     9 * it under the terms of the GNU General Public License as published by
     10 * the Free Software Foundation, either version 3 of the License, or
     11 * (at your option) any later version.
     12 *
     13 * HeuristicLab is distributed in the hope that it will be useful,
     14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16 * GNU General Public License for more details.
     17 *
     18 * You should have received a copy of the GNU General Public License
     19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
     20 */
     21#endregion
     22
     23using System;
    224using System.Collections.Generic;
    325using System.Linq;
    4 using System.Text;
    5 using System.Threading.Tasks;
    6 using HeuristicLab.Common;
    7 using HeuristicLab.Core;
    826
    9 namespace GradientBoostedTrees {
     27namespace HeuristicLab.Algorithms.DataAnalysis {
    1028  public class SquaredErrorLoss : ILossFunction {
    1129    public double GetLoss(IEnumerable<double> target, IEnumerable<double> pred, IEnumerable<double> weight) {
     
    4462        throw new ArgumentException("target, pred and weight have differing lengths");
    4563
    46       // line search for
     64      // line search for squared error loss => return the average value
    4765      LineSearchFunc lineSearch = (idx, startIdx, endIdx) => {
    4866        double s = 0.0;
  • branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/RegressionTreeBuilder.cs

    r12372 r12590  
    1 using System;
     1#region License Information
     2/* HeuristicLab
     3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     4 * and the BEACON Center for the Study of Evolution in Action.
     5 *
     6 * This file is part of HeuristicLab.
     7 *
     8 * HeuristicLab is free software: you can redistribute it and/or modify
     9 * it under the terms of the GNU General Public License as published by
     10 * the Free Software Foundation, either version 3 of the License, or
     11 * (at your option) any later version.
     12 *
     13 * HeuristicLab is distributed in the hope that it will be useful,
     14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16 * GNU General Public License for more details.
     17 *
     18 * You should have received a copy of the GNU General Public License
     19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
     20 */
     21#endregion
     22
     23using System;
    224using System.Collections.Generic;
    325using System.Diagnostics;
    426using System.Diagnostics.Contracts;
    527using System.Linq;
    6 using HeuristicLab.Common;
    728using HeuristicLab.Core;
    829using HeuristicLab.Problems.DataAnalysis;
    930
    10 namespace GradientBoostedTrees {
     31namespace HeuristicLab.Algorithms.DataAnalysis {
     32  // This class implements a greedy decision tree learner which selects splits with the maximum reduction in sum of squared errors.
     33  // The tree builder also tracks variable relevance metrics based on the splits and improvement after the split.
     34  // The implementation is tuned for gradient boosting where multiple trees have to be calculated for the same training data
     35  // each time with a different target vector. Vectors of idx to allow iteration of intput variables in sorted order are
     36  // pre-calculated so that optimal thresholds for splits can be calculated in O(n) for each input variable.
     37  // After each split the row idx are partitioned in a left an right part.
    1138  public class RegressionTreeBuilder {
    1239    private readonly IRandom random;
     
    1441
    1542    private readonly int nCols;
    16     private readonly double[][] x; // all training data (original order from problemData)
    17     private double[] y; // training labels (original order from problemData)
     43    private readonly double[][] x; // all training data (original order from problemData), x is constant
     44    private double[] y; // training labels (original order from problemData), y can be changed
    1845
    1946    private Dictionary<string, double> sumImprovements; // for variable relevance calculation
     
    76103    }
    77104
     105    // simple API produces a single regression tree optimizing sum of squared errors
     106    // this can be used if only a simple regression tree should be produced
     107    // for a set of trees use the method CreateRegressionTreeForGradientBoosting below
     108    //
    78109    // r and m work in the same way as for alglib random forest
    79110    // r is fraction of rows to use for training
     
    92123    }
    93124
    94     // specific interface that allows to specify the target labels and the training rows which is necessary when this functionality is called by the gradient boosting routine
     125    // specific interface that allows to specify the target labels and the training rows which is necessary when for gradient boosted trees
    95126    public IRegressionModel CreateRegressionTreeForGradientBoosting(double[] y, int maxDepth, int[] idx, LineSearchFunc lineSearch, double r = 0.5, double m = 0.5) {
    96127      Contract.Assert(maxDepth > 0);
     
    111142      HeuristicLab.Random.ListExtensions.ShuffleInPlace(allowedVariables, random);
    112143
     144      // only select a part of the rows and columns randomly
    113145      effectiveRows = (int)Math.Ceiling(nRows * r);
    114146      effectiveVars = (int)Math.Ceiling(nCols * m);
    115147
     148      // the which array is used for partining row idxs
    116149      Array.Clear(which, 0, which.Length);
    117150
    118151      // mark selected rows
    119152      for (int row = 0; row < effectiveRows; row++) {
    120         which[idx[row]] = 1;
     153        which[idx[row]] = 1; // we use the which vector as a temporary variable here
    121154        internalIdx[row] = idx[row];
    122155      }
     
    126159        for (int row = 0; row < nRows; row++) {
    127160          if (which[sortedIdxAll[col][row]] > 0) {
    128             Trace.Assert(i < effectiveRows);
     161            Debug.Assert(i < effectiveRows);
    129162            sortedIdx[col][i] = sortedIdxAll[col][row];
    130163            i++;
     
    135168      // prepare array for the tree nodes (a tree of maxDepth=1 has 1 node, a tree of maxDepth=d has 2^d - 1 nodes)     
    136169      int numNodes = (int)Math.Pow(2, maxDepth) - 1;
    137       //this.tree = new RegressionTreeModel.TreeNode[numNodes];
    138       this.tree = Enumerable.Range(0, numNodes).Select(_=>new RegressionTreeModel.TreeNode()).ToArray();
     170      this.tree = new RegressionTreeModel.TreeNode[numNodes];
    139171      this.curTreeNodeIdx = 0;
    140172
     
    144176    }
    145177
     178    // recursive routine for building the tree for the row idx stored in internalIdx between startIdx and endIdx
     179    // the lineSearch function calculates the optimal prediction value for tree leaf nodes
     180    // (in the case of squared errors it is the average of target values for the rows represented by the node)
    146181    // startIdx and endIdx are inclusive
    147182    private void CreateRegressionTreeForIdx(int maxDepth, int startIdx, int endIdx, LineSearchFunc lineSearch) {
     
    214249            else if (which[internalIdx[j]] > 0) j--;
    215250            else {
    216               Trace.Assert(which[internalIdx[i]] > 0);
    217               Trace.Assert(which[internalIdx[j]] < 0);
     251              Debug.Assert(which[internalIdx[i]] > 0);
     252              Debug.Assert(which[internalIdx[j]] < 0);
    218253              // swap
    219254              int tmp = internalIdx[i];
     
    283318
    284319      threshold = bestThreshold;
    285 
    286       // Contract.Assert(bestImprovement > 0);
    287       // Contract.Assert(bestImprovement < double.PositiveInfinity);
    288       // Contract.Assert(bestVar != string.Empty);
    289       // Contract.Assert(allowedVariables.Contains(bestVar));
    290320    }
    291321
     
    351381
    352382    public IEnumerable<KeyValuePair<string, double>> GetVariableRelevance() {
     383      // values are scaled: the most important variable has relevance = 100
    353384      double scaling = 100 / sumImprovements.Max(t => t.Value);
    354385      return
  • branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/RegressionTreeModel.cs

    r12589 r12590  
    1 using System;
     1#region License Information
     2/* HeuristicLab
     3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     4 * and the BEACON Center for the Study of Evolution in Action.
     5 *
     6 * This file is part of HeuristicLab.
     7 *
     8 * HeuristicLab is free software: you can redistribute it and/or modify
     9 * it under the terms of the GNU General Public License as published by
     10 * the Free Software Foundation, either version 3 of the License, or
     11 * (at your option) any later version.
     12 *
     13 * HeuristicLab is distributed in the hope that it will be useful,
     14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16 * GNU General Public License for more details.
     17 *
     18 * You should have received a copy of the GNU General Public License
     19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
     20 */
     21#endregion
     22
    223using System.Collections.Generic;
    324using System.Linq;
     
    728using HeuristicLab.Problems.DataAnalysis;
    829
    9 namespace GradientBoostedTrees {
     30namespace HeuristicLab.Algorithms.DataAnalysis {
    1031  [StorableClass]
    1132  [Item("RegressionTreeModel", "Represents a decision tree for regression.")]
    12   // TODO: Implement a view for this
    1333  public class RegressionTreeModel : NamedItem, IRegressionModel {
    1434
    1535    // trees are represented as a flat array
    16     // object-graph-travesal has problems if this is defined as a struct. TODO investigate...
    17     //[StorableClass]
    1836    public struct TreeNode {
    1937      public readonly static string NO_VARIABLE = string.Empty;
    20       //[Storable]
    2138      public string varName; // name of the variable for splitting or -1 if terminal node
    22       //[Storable]
    2339      public double val; // threshold
    24       //[Storable]
    2540      public int leftIdx;
    26       //[Storable]
    2741      public int rightIdx;
    2842
    29       //public TreeNode() {
    30       //  varName = NO_VARIABLE;
    31       //  leftIdx = -1;
    32       //  rightIdx = -1;
    33       //}
    34       //[StorableConstructor]
    35       //private TreeNode(bool deserializing) { }
    36       public override int GetHashCode()
    37       {
    38         return (leftIdx * rightIdx) ^ val.GetHashCode();
     43      public override int GetHashCode() {
     44        return leftIdx ^ rightIdx ^ val.GetHashCode();
    3945      }
    4046    }
Note: See TracChangeset for help on using the changeset viewer.