Changeset 12590
- Timestamp:
- 07/04/15 16:03:36 (9 years ago)
- Location:
- branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees
- Files:
-
- 10 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithm.cs
r12373 r12590 22 22 23 23 using System; 24 using System.Collections.Generic;25 using System.ComponentModel;26 using System.Diagnostics.Contracts;27 24 using System.Linq; 28 25 using System.Threading; 29 using GradientBoostedTrees;30 26 using HeuristicLab.Analysis; 31 27 using HeuristicLab.Common; … … 37 33 using HeuristicLab.PluginInfrastructure; 38 34 using HeuristicLab.Problems.DataAnalysis; 39 using HeuristicLab.Random;40 35 41 36 namespace HeuristicLab.Algorithms.DataAnalysis { 42 [Item("Gradient Boosted Trees", " ")]37 [Item("Gradient Boosted Trees", "Gradient boosted trees algorithm. Friedman, J. \"Greedy Function Approximation: A Gradient Boosting Machine\", IMS 1999 Reitz Lecture.")] 43 38 [StorableClass] 44 [Creatable( "Algorithms")]39 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 125)] 45 40 public class GradientBoostedTreesAlgorithm : BasicAlgorithm { 46 41 public override Type ProblemType { … … 170 165 171 166 var lossFunctionNames = ApplicationManager.Manager.GetInstances<ILossFunction>().Select(l => new StringValue(l.ToString()).AsReadOnly()); 172 var defaultLossFunction = lossFunctionNames.First(l => l.Value.Contains("Squared")); // squared error loss is the default173 Parameters.Add(new ConstrainedValueParameter<StringValue>(LossFunctionParameterName, "The loss function", new ItemSet<StringValue>(lossFunctionNames), defaultLossFunction));167 Parameters.Add(new ConstrainedValueParameter<StringValue>(LossFunctionParameterName, "The loss function", new ItemSet<StringValue>(lossFunctionNames))); 168 LossFunctionParameter.ActualValue = LossFunctionParameter.ValidValues.First(l => l.Value.Contains("Squared")); // squared error loss is the default 174 169 } 175 170 … … 178 173 // Set up the algorithm 179 174 if (SetSeedRandomly) Seed = new System.Random().Next(); 180 // random.Reset(Seed);181 175 182 176 // Set up the results display -
branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithmStatic.cs
r12371 r12590 25 25 using System.Diagnostics.Contracts; 26 26 using System.Linq; 27 using GradientBoostedTrees;28 27 using HeuristicLab.Problems.DataAnalysis; 29 28 using HeuristicLab.Random; … … 34 33 35 34 public interface IGbmState { 36 37 35 IRegressionModel GetModel(); 38 36 double GetTrainLoss(); … … 42 40 43 41 // created through factory method 42 // GbmState details are private API users can only use methods from IGbmState 44 43 private class GbmState : IGbmState { 45 44 internal IRegressionProblemData problemData { get; set; } 46 internal MersenneTwister random { get; set; }45 internal MersenneTwister random { get; private set; } 47 46 internal ILossFunction lossFunction { get; set; } 48 47 internal int maxDepth { get; set; } … … 50 49 internal double r { get; set; } 51 50 internal double m { get; set; } 52 internal RegressionTreeBuilder treeBuilder;51 internal readonly RegressionTreeBuilder treeBuilder; 53 52 54 53 … … 156 155 } 157 156 158 // allow dynamic adaptation of maxDepth, nu and r 157 // allow dynamic adaptation of maxDepth, nu and r (even though this is not used) 159 158 public static void MakeStep(IGbmState state, int maxDepth, double nu, double r, double m) { 160 159 var gbmState = state as GbmState; -
branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesModel.cs
r12589 r12590 1 using System; 1 #region License Information 2 /* HeuristicLab 3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 * and the BEACON Center for the Study of Evolution in Action. 5 * 6 * This file is part of HeuristicLab. 7 * 8 * HeuristicLab is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * HeuristicLab is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 #endregion 22 23 using System; 2 24 using System.Collections.Generic; 3 25 using System.Linq; … … 7 29 using HeuristicLab.Problems.DataAnalysis; 8 30 9 namespace GradientBoostedTrees {31 namespace HeuristicLab.Algorithms.DataAnalysis { 10 32 [StorableClass] 11 33 [Item("Gradient boosted tree model", "")] 34 // this is essentially a collection of weighted regression models 12 35 public sealed class GradientBoostedTreesModel : NamedItem, IRegressionModel { 13 14 36 [Storable] 15 37 private readonly IList<IRegressionModel> models; … … 40 62 41 63 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 42 var tuples = (from idx in Enumerable.Range(0, models.Count) 43 let model = models[idx] 44 let weight = weights[idx] 45 select new { weight, enumerator = model.GetEstimatedValues(dataset, rows).GetEnumerator() }).ToArray(); 46 47 48 while (tuples.All(t => t.enumerator.MoveNext())) { 49 yield return tuples.Sum(t => t.weight * t.enumerator.Current); 64 // allocate target array go over all models and add up weighted estimation for each row 65 var res = new double[rows.Count()]; 66 for (int i = 0; i < models.Count; i++) { 67 var w = weights[i]; 68 var m = models[i]; 69 int r = 0; 70 foreach (var est in m.GetEstimatedValues(dataset, rows)) { 71 res[r++] += w * est; 72 } 50 73 } 74 return res; 51 75 } 52 76 -
branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/LossFunctions/AbsoluteErrorLoss.cs
r12374 r12590 1 using System; 1 #region License Information 2 /* HeuristicLab 3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 * and the BEACON Center for the Study of Evolution in Action. 5 * 6 * This file is part of HeuristicLab. 7 * 8 * HeuristicLab is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * HeuristicLab is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 #endregion 22 23 using System; 2 24 using System.Collections.Generic; 3 25 using System.Diagnostics; 4 26 using System.Linq; 5 using System.Text;6 using System.Threading.Tasks;7 27 using HeuristicLab.Common; 8 using HeuristicLab.Core;9 28 10 namespace GradientBoostedTrees { 29 namespace HeuristicLab.Algorithms.DataAnalysis { 30 // loss function for the weighted absolute error 11 31 public class AbsoluteErrorLoss : ILossFunction { 12 32 public double GetLoss(IEnumerable<double> target, IEnumerable<double> pred, IEnumerable<double> weight) { … … 54 74 // line search for abs error 55 75 LineSearchFunc lineSearch = (idx, startIdx, endIdx) => { 76 // Median() is allocating an array anyway 77 // It would be possible to pre-allocated an array for the residuals if Median() would allow specification of a sub-range 56 78 int nRows = endIdx - startIdx + 1; 57 var res = from offset in Enumerable.Range(0, nRows) 58 let i = startIdx + offset 59 let row = idx[i] 60 select (targetArr[row] - predArr[row]); 79 var res = new double[nRows]; 80 for (int offset = 0; offset < nRows; offset++) { 81 var i = startIdx + offset; 82 var row = idx[i]; 83 res[offset] = targetArr[row] - predArr[row]; 84 } 61 85 return res.Median(); 62 63 // old code for weighted median calculation64 // int nRows = endIdx - startIdx + 1; // startIdx and endIdx are inclusive65 // if (nRows == 1) return targetArr[idx[startIdx]] - predArr[idx[startIdx]];66 // else if (nRows == 2) {67 // if (weightArr[idx[startIdx]] > weightArr[idx[endIdx]]) {68 // return targetArr[idx[startIdx]] - predArr[idx[startIdx]];69 // } else if (weightArr[idx[startIdx]] < weightArr[idx[endIdx]]) {70 // return targetArr[idx[endIdx]] - predArr[idx[endIdx]];71 // } else {72 // // same weight73 // return ((targetArr[idx[startIdx]] - predArr[idx[startIdx]]) +74 // (targetArr[idx[endIdx]] - predArr[idx[endIdx]])) / 2;75 // }76 // } else {77 // var ts = from offset in Enumerable.Range(0, nRows)78 // let i = startIdx + offset79 // select new { res = targetArr[idx[i]] - predArr[idx[i]], weight = weightArr[idx[i]] };80 // ts = ts.OrderBy(t => t.res);81 // var totalWeight = ts.Sum(t => t.weight);82 // var tsEnumerator = ts.GetEnumerator();83 // tsEnumerator.MoveNext();84 //85 // double aggWeight = tsEnumerator.Current.weight; // weight of first86 //87 // while (aggWeight < totalWeight / 2) {88 // tsEnumerator.MoveNext();89 // aggWeight += tsEnumerator.Current.weight;90 // }91 // return tsEnumerator.Current.res;92 // }93 86 }; 94 87 return lineSearch; -
branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/LossFunctions/ILossFunction.cs
r12332 r12590 1 using System; 1 #region License Information 2 /* HeuristicLab 3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 * and the BEACON Center for the Study of Evolution in Action. 5 * 6 * This file is part of HeuristicLab. 7 * 8 * HeuristicLab is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * HeuristicLab is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 #endregion 22 2 23 using System.Collections.Generic; 3 using System.Linq;4 using System.Text;5 using System.Threading.Tasks;6 using HeuristicLab.Core;7 24 8 namespace GradientBoostedTrees {25 namespace HeuristicLab.Algorithms.DataAnalysis { 9 26 public delegate double LineSearchFunc(int[] idx, int startIdx, int endIdx); 10 27 11 28 public interface ILossFunction { 29 // returns the weighted loss 12 30 double GetLoss(IEnumerable<double> target, IEnumerable<double> pred, IEnumerable<double> weight); 31 32 // returns an enumerable of the weighted loss gradient for each row 13 33 IEnumerable<double> GetLossGradient(IEnumerable<double> target, IEnumerable<double> pred, IEnumerable<double> weight); 34 35 // returns a function that returns the optimal prediction value for a subset of rows from target and pred (see LineSearchFunc delegate above) 14 36 LineSearchFunc GetLineSearchFunc(IEnumerable<double> target, IEnumerable<double> pred, IEnumerable<double> weight); 15 37 } -
branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/LossFunctions/LogisticRegressionLoss.cs
r12589 r12590 1 using System; 1 #region License Information 2 /* HeuristicLab 3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 * and the BEACON Center for the Study of Evolution in Action. 5 * 6 * This file is part of HeuristicLab. 7 * 8 * HeuristicLab is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * HeuristicLab is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 #endregion 22 23 using System; 2 24 using System.Collections.Generic; 3 25 using System.Diagnostics; 4 26 using System.Linq; 5 using System.Text;6 using System.Threading.Tasks;7 27 using HeuristicLab.Common; 8 using HeuristicLab.Core;9 28 10 namespace GradientBoostedTrees {29 namespace HeuristicLab.Algorithms.DataAnalysis { 11 30 public class LogisticRegressionLoss : ILossFunction { 12 31 public double GetLoss(IEnumerable<double> target, IEnumerable<double> pred, IEnumerable<double> weight) { -
branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/LossFunctions/RelativeErrorLoss.cs
r12374 r12590 1 using System; 1 #region License Information 2 /* HeuristicLab 3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 * and the BEACON Center for the Study of Evolution in Action. 5 * 6 * This file is part of HeuristicLab. 7 * 8 * HeuristicLab is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * HeuristicLab is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 #endregion 22 23 using System; 2 24 using System.Collections.Generic; 3 25 using System.Diagnostics; 4 26 using System.Linq; 5 using System.Text;6 using System.Threading.Tasks;7 27 using HeuristicLab.Common; 8 using HeuristicLab.Core;9 28 10 namespace GradientBoostedTrees { 29 namespace HeuristicLab.Algorithms.DataAnalysis { 30 // relative error loss is a special case of weighted absolute error loss 11 31 public class RelativeErrorLoss : ILossFunction { 12 32 public double GetLoss(IEnumerable<double> target, IEnumerable<double> pred, IEnumerable<double> weight) { … … 50 70 throw new ArgumentException("target, pred and weight have differing lengths"); 51 71 52 // line search for abs error 72 // line search for relative error 73 // TODO: check and improve? 53 74 LineSearchFunc lineSearch = (idx, startIdx, endIdx) => { 54 75 // weighted median calculation -
branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/LossFunctions/SquaredErrorLoss.cs
r12332 r12590 1 using System; 1 #region License Information 2 /* HeuristicLab 3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 * and the BEACON Center for the Study of Evolution in Action. 5 * 6 * This file is part of HeuristicLab. 7 * 8 * HeuristicLab is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * HeuristicLab is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 #endregion 22 23 using System; 2 24 using System.Collections.Generic; 3 25 using System.Linq; 4 using System.Text;5 using System.Threading.Tasks;6 using HeuristicLab.Common;7 using HeuristicLab.Core;8 26 9 namespace GradientBoostedTrees {27 namespace HeuristicLab.Algorithms.DataAnalysis { 10 28 public class SquaredErrorLoss : ILossFunction { 11 29 public double GetLoss(IEnumerable<double> target, IEnumerable<double> pred, IEnumerable<double> weight) { … … 44 62 throw new ArgumentException("target, pred and weight have differing lengths"); 45 63 46 // line search for 64 // line search for squared error loss => return the average value 47 65 LineSearchFunc lineSearch = (idx, startIdx, endIdx) => { 48 66 double s = 0.0; -
branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/RegressionTreeBuilder.cs
r12372 r12590 1 using System; 1 #region License Information 2 /* HeuristicLab 3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 * and the BEACON Center for the Study of Evolution in Action. 5 * 6 * This file is part of HeuristicLab. 7 * 8 * HeuristicLab is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * HeuristicLab is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 #endregion 22 23 using System; 2 24 using System.Collections.Generic; 3 25 using System.Diagnostics; 4 26 using System.Diagnostics.Contracts; 5 27 using System.Linq; 6 using HeuristicLab.Common;7 28 using HeuristicLab.Core; 8 29 using HeuristicLab.Problems.DataAnalysis; 9 30 10 namespace GradientBoostedTrees { 31 namespace HeuristicLab.Algorithms.DataAnalysis { 32 // This class implements a greedy decision tree learner which selects splits with the maximum reduction in sum of squared errors. 33 // The tree builder also tracks variable relevance metrics based on the splits and improvement after the split. 34 // The implementation is tuned for gradient boosting where multiple trees have to be calculated for the same training data 35 // each time with a different target vector. Vectors of idx to allow iteration of intput variables in sorted order are 36 // pre-calculated so that optimal thresholds for splits can be calculated in O(n) for each input variable. 37 // After each split the row idx are partitioned in a left an right part. 11 38 public class RegressionTreeBuilder { 12 39 private readonly IRandom random; … … 14 41 15 42 private readonly int nCols; 16 private readonly double[][] x; // all training data (original order from problemData) 17 private double[] y; // training labels (original order from problemData) 43 private readonly double[][] x; // all training data (original order from problemData), x is constant 44 private double[] y; // training labels (original order from problemData), y can be changed 18 45 19 46 private Dictionary<string, double> sumImprovements; // for variable relevance calculation … … 76 103 } 77 104 105 // simple API produces a single regression tree optimizing sum of squared errors 106 // this can be used if only a simple regression tree should be produced 107 // for a set of trees use the method CreateRegressionTreeForGradientBoosting below 108 // 78 109 // r and m work in the same way as for alglib random forest 79 110 // r is fraction of rows to use for training … … 92 123 } 93 124 94 // specific interface that allows to specify the target labels and the training rows which is necessary when this functionality is called by the gradient boosting routine125 // specific interface that allows to specify the target labels and the training rows which is necessary when for gradient boosted trees 95 126 public IRegressionModel CreateRegressionTreeForGradientBoosting(double[] y, int maxDepth, int[] idx, LineSearchFunc lineSearch, double r = 0.5, double m = 0.5) { 96 127 Contract.Assert(maxDepth > 0); … … 111 142 HeuristicLab.Random.ListExtensions.ShuffleInPlace(allowedVariables, random); 112 143 144 // only select a part of the rows and columns randomly 113 145 effectiveRows = (int)Math.Ceiling(nRows * r); 114 146 effectiveVars = (int)Math.Ceiling(nCols * m); 115 147 148 // the which array is used for partining row idxs 116 149 Array.Clear(which, 0, which.Length); 117 150 118 151 // mark selected rows 119 152 for (int row = 0; row < effectiveRows; row++) { 120 which[idx[row]] = 1; 153 which[idx[row]] = 1; // we use the which vector as a temporary variable here 121 154 internalIdx[row] = idx[row]; 122 155 } … … 126 159 for (int row = 0; row < nRows; row++) { 127 160 if (which[sortedIdxAll[col][row]] > 0) { 128 Trace.Assert(i < effectiveRows);161 Debug.Assert(i < effectiveRows); 129 162 sortedIdx[col][i] = sortedIdxAll[col][row]; 130 163 i++; … … 135 168 // prepare array for the tree nodes (a tree of maxDepth=1 has 1 node, a tree of maxDepth=d has 2^d - 1 nodes) 136 169 int numNodes = (int)Math.Pow(2, maxDepth) - 1; 137 //this.tree = new RegressionTreeModel.TreeNode[numNodes]; 138 this.tree = Enumerable.Range(0, numNodes).Select(_=>new RegressionTreeModel.TreeNode()).ToArray(); 170 this.tree = new RegressionTreeModel.TreeNode[numNodes]; 139 171 this.curTreeNodeIdx = 0; 140 172 … … 144 176 } 145 177 178 // recursive routine for building the tree for the row idx stored in internalIdx between startIdx and endIdx 179 // the lineSearch function calculates the optimal prediction value for tree leaf nodes 180 // (in the case of squared errors it is the average of target values for the rows represented by the node) 146 181 // startIdx and endIdx are inclusive 147 182 private void CreateRegressionTreeForIdx(int maxDepth, int startIdx, int endIdx, LineSearchFunc lineSearch) { … … 214 249 else if (which[internalIdx[j]] > 0) j--; 215 250 else { 216 Trace.Assert(which[internalIdx[i]] > 0);217 Trace.Assert(which[internalIdx[j]] < 0);251 Debug.Assert(which[internalIdx[i]] > 0); 252 Debug.Assert(which[internalIdx[j]] < 0); 218 253 // swap 219 254 int tmp = internalIdx[i]; … … 283 318 284 319 threshold = bestThreshold; 285 286 // Contract.Assert(bestImprovement > 0);287 // Contract.Assert(bestImprovement < double.PositiveInfinity);288 // Contract.Assert(bestVar != string.Empty);289 // Contract.Assert(allowedVariables.Contains(bestVar));290 320 } 291 321 … … 351 381 352 382 public IEnumerable<KeyValuePair<string, double>> GetVariableRelevance() { 383 // values are scaled: the most important variable has relevance = 100 353 384 double scaling = 100 / sumImprovements.Max(t => t.Value); 354 385 return -
branches/GBT-trunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/RegressionTreeModel.cs
r12589 r12590 1 using System; 1 #region License Information 2 /* HeuristicLab 3 * Copyright (C) 2002-2015 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 * and the BEACON Center for the Study of Evolution in Action. 5 * 6 * This file is part of HeuristicLab. 7 * 8 * HeuristicLab is free software: you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation, either version 3 of the License, or 11 * (at your option) any later version. 12 * 13 * HeuristicLab is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 #endregion 22 2 23 using System.Collections.Generic; 3 24 using System.Linq; … … 7 28 using HeuristicLab.Problems.DataAnalysis; 8 29 9 namespace GradientBoostedTrees {30 namespace HeuristicLab.Algorithms.DataAnalysis { 10 31 [StorableClass] 11 32 [Item("RegressionTreeModel", "Represents a decision tree for regression.")] 12 // TODO: Implement a view for this13 33 public class RegressionTreeModel : NamedItem, IRegressionModel { 14 34 15 35 // trees are represented as a flat array 16 // object-graph-travesal has problems if this is defined as a struct. TODO investigate...17 //[StorableClass]18 36 public struct TreeNode { 19 37 public readonly static string NO_VARIABLE = string.Empty; 20 //[Storable]21 38 public string varName; // name of the variable for splitting or -1 if terminal node 22 //[Storable]23 39 public double val; // threshold 24 //[Storable]25 40 public int leftIdx; 26 //[Storable]27 41 public int rightIdx; 28 42 29 //public TreeNode() { 30 // varName = NO_VARIABLE; 31 // leftIdx = -1; 32 // rightIdx = -1; 33 //} 34 //[StorableConstructor] 35 //private TreeNode(bool deserializing) { } 36 public override int GetHashCode() 37 { 38 return (leftIdx * rightIdx) ^ val.GetHashCode(); 43 public override int GetHashCode() { 44 return leftIdx ^ rightIdx ^ val.GetHashCode(); 39 45 } 40 46 }
Note: See TracChangeset
for help on using the changeset viewer.