Changeset 12620
 Timestamp:
 07/06/15 20:38:56 (8 years ago)
 Location:
 branches/GBTtrunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4
 Files:

 1 added
 3 edited
Legend:
 Unmodified
 Added
 Removed

branches/GBTtrunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithm.cs
r12619 r12620 186 186 187 187 // init 188 var problemData = Problem.ProblemData;188 var problemData = (IRegressionProblemData)Problem.ProblemData.Clone(); 189 189 var lossFunction = ApplicationManager.Manager.GetInstances<ILossFunction>() 190 190 .Single(l => l.ToString() == LossFunctionParameter.Value.Value); … … 245 245 } else { 246 246 // otherwise we produce a regression solution 247 Results.Add(new Result("Solution", new RegressionSolution(state.GetModel(), (IRegressionProblemData)problemData.Clone())));247 Results.Add(new Result("Solution", new RegressionSolution(state.GetModel(), problemData))); 248 248 } 249 249 } 
branches/GBTtrunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/RegressionTreeBuilder.cs
r12619 r12620 22 22 23 23 using System; 24 using System.Collections; 24 25 using System.Collections.Generic; 25 26 using System.Diagnostics; … … 53 54 private readonly int[][] sortedIdx; // random selection from sortedIdxAll (for r < 1.0) 54 55 55 56 private int calls = 0; 56 57 57 58 // helper arrays which are allocated to maximal necessary size only once in the ctor … … 64 65 65 66 private class Partition { 66 public int TreeNodeIdx { get; set; }67 public int ParentNodeIdx { get; set; } 67 68 public int Depth { get; set; } 68 69 public int StartIdx { get; set; } … … 70 71 public bool Left { get; set; } 71 72 } 72 private readonly IList<Partition> queue;73 private readonly SortedList<double, Partition> queue; 73 74 74 75 // prepare and allocate buffer variables in ctor … … 94 95 outx = new double[rows]; 95 96 outSortedIdx = new int[rows]; 96 queue = new List<Partition>();97 queue = new SortedList<double, Partition>(); 97 98 98 99 x = new double[nCols][]; … … 179 180 180 181 // start and end idx are inclusive 181 queue.Add( new Partition() { TreeNodeIdx = 1, Depth = maxDepth, StartIdx = 0, EndIndex = effectiveRows  1 });182 queue.Add(calls++, new Partition() { ParentNodeIdx = 1, Depth = maxDepth, StartIdx = 0, EndIndex = effectiveRows  1 }); 182 183 CreateRegressionTreeForIdx(lineSearch); 183 184 … … 187 188 private void CreateRegressionTreeForIdx(LineSearchFunc lineSearch) { 188 189 while (queue.Any()) { 189 var f = queue [0]; // actually a stack190 var f = queue.First().Value; // actually a stack 190 191 queue.RemoveAt(0); 191 192 … … 204 205 if (depth <= 1  endIdx  startIdx == 0  !FindBestVariableAndThreshold(startIdx, endIdx, out threshold, out bestVariableName)) { 205 206 CreateLeafNode(startIdx, endIdx, lineSearch); 206 if (f. TreeNodeIdx >= 0) if (f.Left) {207 tree[f. TreeNodeIdx].leftIdx = curTreeNodeIdx;207 if (f.ParentNodeIdx >= 0) if (f.Left) { 208 tree[f.ParentNodeIdx].leftIdx = curTreeNodeIdx; 208 209 } else { 209 tree[f. TreeNodeIdx].rightIdx = curTreeNodeIdx;210 tree[f.ParentNodeIdx].rightIdx = curTreeNodeIdx; 210 211 } 211 212 curTreeNodeIdx++; … … 215 216 216 217 // connect to parent tree 217 if (f. TreeNodeIdx >= 0) if (f.Left) {218 tree[f. TreeNodeIdx].leftIdx = curTreeNodeIdx;218 if (f.ParentNodeIdx >= 0) if (f.Left) { 219 tree[f.ParentNodeIdx].leftIdx = curTreeNodeIdx; 219 220 } else { 220 tree[f. TreeNodeIdx].rightIdx = curTreeNodeIdx;221 tree[f.ParentNodeIdx].rightIdx = curTreeNodeIdx; 221 222 } 222 223 … … 224 225 Debug.Assert(startIdx <= splitIdx); 225 226 226 queue. Insert(0, new Partition() { TreeNodeIdx = curTreeNodeIdx, Left = false, Depth = depth  1, StartIdx = splitIdx + 1, EndIndex = endIdx });227 queue. Insert(0, new Partition() { TreeNodeIdx = curTreeNodeIdx, Left = true, Depth = depth  1, StartIdx = startIdx, EndIndex = splitIdx }); // left part before right part (stack organization)227 queue.Add(calls++, new Partition() { ParentNodeIdx = curTreeNodeIdx, Left = true, Depth = depth  1, StartIdx = startIdx, EndIndex = splitIdx }); // left part before right part (stack organization) 228 queue.Add(calls++, new Partition() { ParentNodeIdx = curTreeNodeIdx, Left = false, Depth = depth  1, StartIdx = splitIdx + 1, EndIndex = endIdx }); 228 229 curTreeNodeIdx++; 229 230 … … 318 319 } 319 320 320 double bestImprovement = 0.0;321 double bestImprovement = 1.0 / rows * sumY * sumY; 321 322 double bestThreshold = double.PositiveInfinity; 322 323 bestVar = RegressionTreeModel.TreeNode.NO_VARIABLE; … … 384 385 double nr = rows; 385 386 386 bestImprovement = 0.0;387 bestImprovement = 1.0 / rows * sumY * sumY; 387 388 bestThreshold = double.NegativeInfinity; 388 389 // for all thresholds … … 398 399 399 400 if (x[i] < x[i + 1]) { // don't try to split when two elements are equal 401 402 // goal is to find the split with leading to minimal total variance of left and right parts 403 // without partitioning the variance is var(y) = E(y²)  E(y)² 404 // = 1/n * sum(y²)  (1/n * sum(y))² 405 //  406 // if we split into right and left part the overall variance is the weigthed combination nl/n * var(y_l) + nr/n * var(y_r) 407 // = nl/n * (1/nl * sum(y_l²)  (1/nl * sum(y_l))²) + nr/n * (1/nr * sum(y_r²)  (1/nr * sum(y_r))²) 408 // = 1/n * sum(y_l²)  1/nl * 1/n * sum(y_l)² + 1/n * sum(y_r²)  1/nr * 1/n * sum(y_r)² 409 // = 1/n * (sum(y_l²) + sum(y_r²))  1/n * (sum(y_l)² / nl + sum(y_r)² / nr) 410 // = 1/n * sum(y²)  1/n * (sum(y_l)² / nl + sum(y_r)² / nr) 411 //  412 // not changed by split (and the same for total variance without partitioning) 413 // 414 // therefore we need to find the maximum value (sum(y_l)² / nl + sum(y_r)² / nr) (ignoring the factor 1/n) 415 // and this value must be larger than 1/n * sum(y)² to be an improvement over no split 416 400 417 double curQuality = sl * sl / nl + sr * sr / nr; 401 // curQuality = nl*nr / (nl+nr) * Sqr(sl / nl  sr / nr) // greedy function approximation page 12 eqn (35)402 418 403 419 if (curQuality > bestImprovement) { 
branches/GBTtrunkintegration/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis3.4.csproj
r12588 r12620 199 199 <Private>False</Private> 200 200 </Reference> 201 <Reference Include="Microsoft.VisualStudio.QualityTools.UnitTestFramework, Version=10.1.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL" /> 201 202 <Reference Include="System" /> 202 203 <Reference Include="System.Core"> … … 289 290 <Compile Include="GradientBoostedTrees\RegressionTreeBuilder.cs" /> 290 291 <Compile Include="GradientBoostedTrees\RegressionTreeModel.cs" /> 292 <Compile Include="GradientBoostedTrees\Test.cs" /> 291 293 <Compile Include="Interfaces\IGaussianProcessClassificationModelCreator.cs" /> 292 294 <Compile Include="Interfaces\IGaussianProcessRegressionModelCreator.cs" />
Note: See TracChangeset
for help on using the changeset viewer.