[15830] | 1 | #region License Information
|
---|
| 2 | /* HeuristicLab
|
---|
[17181] | 3 | * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
[15830] | 4 | *
|
---|
| 5 | * This file is part of HeuristicLab.
|
---|
| 6 | *
|
---|
| 7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
| 8 | * it under the terms of the GNU General Public License as published by
|
---|
| 9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
| 10 | * (at your option) any later version.
|
---|
| 11 | *
|
---|
| 12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 15 | * GNU General Public License for more details.
|
---|
| 16 | *
|
---|
| 17 | * You should have received a copy of the GNU General Public License
|
---|
| 18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 19 | */
|
---|
| 20 | #endregion
|
---|
| 21 |
|
---|
| 22 | using System;
|
---|
| 23 | using System.Collections.Generic;
|
---|
| 24 | using System.Linq;
|
---|
| 25 | using System.Threading;
|
---|
| 26 | using HeuristicLab.Common;
|
---|
| 27 | using HeuristicLab.Core;
|
---|
| 28 | using HeuristicLab.Problems.DataAnalysis;
|
---|
[16847] | 29 | using HEAL.Attic;
|
---|
[15830] | 30 |
|
---|
| 31 | namespace HeuristicLab.Algorithms.DataAnalysis {
|
---|
[16847] | 32 | [StorableType("22DCCF28-8943-4622-BBD3-B2AB04F28C36")]
|
---|
| 33 | [Item("SplitterBase", "Abstract base class for splitters")]
|
---|
[15830] | 34 | public abstract class SplitterBase : ParameterizedNamedItem, ISplitter {
|
---|
[16852] | 35 | public const string SplittingStateVariableName = "SplittingState";
|
---|
[15830] | 36 |
|
---|
| 37 | #region Constructors & Cloning
|
---|
| 38 | [StorableConstructor]
|
---|
[16847] | 39 | protected SplitterBase(StorableConstructorFlag _) { }
|
---|
[15830] | 40 | protected SplitterBase(SplitterBase original, Cloner cloner) : base(original, cloner) { }
|
---|
| 41 | public SplitterBase() { }
|
---|
| 42 | #endregion
|
---|
| 43 |
|
---|
| 44 | #region ISplitType
|
---|
| 45 | public void Initialize(IScope states) {
|
---|
| 46 | states.Variables.Add(new Variable(SplittingStateVariableName, new SplittingState()));
|
---|
| 47 | }
|
---|
[16847] | 48 |
|
---|
[15830] | 49 | public void Split(RegressionNodeTreeModel tree, IReadOnlyList<int> trainingRows, IScope stateScope, CancellationToken cancellationToken) {
|
---|
[17080] | 50 | var regressionTreeParams = (RegressionTreeParameters)stateScope.Variables[DecisionTreeRegression.RegressionTreeParameterVariableName].Value;
|
---|
[15830] | 51 | var splittingState = (SplittingState)stateScope.Variables[SplittingStateVariableName].Value;
|
---|
| 52 | var variables = regressionTreeParams.AllowedInputVariables.ToArray();
|
---|
| 53 | var target = regressionTreeParams.TargetVariable;
|
---|
| 54 |
|
---|
| 55 | if (splittingState.Code <= 0) {
|
---|
| 56 | splittingState.nodeQueue.Enqueue(tree.Root);
|
---|
| 57 | splittingState.trainingRowsQueue.Enqueue(trainingRows);
|
---|
| 58 | splittingState.Code = 1;
|
---|
| 59 | }
|
---|
| 60 | while (splittingState.nodeQueue.Count != 0) {
|
---|
| 61 | var n = splittingState.nodeQueue.Dequeue();
|
---|
| 62 | var rows = splittingState.trainingRowsQueue.Dequeue();
|
---|
| 63 |
|
---|
| 64 | string attr;
|
---|
| 65 | double splitValue;
|
---|
| 66 | var isLeaf = !DecideSplit(new RegressionProblemData(RegressionTreeUtilities.ReduceDataset(regressionTreeParams.Data, rows, variables, target), variables, target), regressionTreeParams.MinLeafSize, out attr, out splitValue);
|
---|
| 67 | if (isLeaf) continue;
|
---|
| 68 |
|
---|
| 69 | IReadOnlyList<int> leftRows, rightRows;
|
---|
| 70 | RegressionTreeUtilities.SplitRows(rows, regressionTreeParams.Data, attr, splitValue, out leftRows, out rightRows);
|
---|
| 71 | n.Split(regressionTreeParams, attr, splitValue, rows.Count);
|
---|
| 72 |
|
---|
| 73 | splittingState.nodeQueue.Enqueue(n.Left);
|
---|
| 74 | splittingState.nodeQueue.Enqueue(n.Right);
|
---|
| 75 | splittingState.trainingRowsQueue.Enqueue(leftRows);
|
---|
| 76 | splittingState.trainingRowsQueue.Enqueue(rightRows);
|
---|
| 77 | cancellationToken.ThrowIfCancellationRequested();
|
---|
| 78 | }
|
---|
| 79 | }
|
---|
| 80 |
|
---|
| 81 | protected virtual bool DecideSplit(IRegressionProblemData splitData, int minLeafSize, out string splitAttr, out double splitValue) {
|
---|
[16852] | 82 | var bestPos = -1;
|
---|
[15830] | 83 | var bestImpurity = double.MinValue;
|
---|
| 84 | var bestSplitValue = 0.0;
|
---|
| 85 | var bestSplitAttr = string.Empty;
|
---|
| 86 | splitAttr = bestSplitAttr;
|
---|
| 87 | splitValue = bestSplitValue;
|
---|
| 88 | if (splitData.Dataset.Rows < minLeafSize) return false;
|
---|
| 89 |
|
---|
[16847] | 90 | // find best attribute for the splitter
|
---|
[15830] | 91 | foreach (var attr in splitData.AllowedInputVariables) {
|
---|
| 92 | int pos;
|
---|
| 93 | double impurity, sValue;
|
---|
| 94 | var sortedData = splitData.Dataset.GetDoubleValues(attr).Zip(splitData.Dataset.GetDoubleValues(splitData.TargetVariable), Tuple.Create).OrderBy(x => x.Item1).ToArray();
|
---|
| 95 | AttributeSplit(sortedData.Select(x => x.Item1).ToArray(), sortedData.Select(x => x.Item2).ToArray(), minLeafSize, out pos, out impurity, out sValue);
|
---|
| 96 | if (!(bestImpurity < impurity)) continue;
|
---|
| 97 | bestImpurity = impurity;
|
---|
| 98 | bestPos = pos;
|
---|
| 99 | bestSplitValue = sValue;
|
---|
| 100 | bestSplitAttr = attr;
|
---|
| 101 | }
|
---|
| 102 |
|
---|
| 103 | splitAttr = bestSplitAttr;
|
---|
| 104 | splitValue = bestSplitValue;
|
---|
| 105 | //if no suitable split exists => leafNode
|
---|
| 106 | return bestPos + 1 >= minLeafSize && bestPos <= splitData.Dataset.Rows - minLeafSize;
|
---|
| 107 | }
|
---|
| 108 |
|
---|
| 109 | protected abstract void AttributeSplit(IReadOnlyList<double> attValues, IReadOnlyList<double> targetValues, int minLeafSize, out int position, out double maxImpurity, out double splitValue);
|
---|
| 110 | #endregion
|
---|
| 111 |
|
---|
[16847] | 112 | [StorableType("BC1149FD-370E-4F3A-92F5-6E519736D09A")]
|
---|
[15830] | 113 | public class SplittingState : Item {
|
---|
[17159] | 114 | public Queue<RegressionNodeModel> nodeQueue;
|
---|
[15830] | 115 | [Storable]
|
---|
[17159] | 116 | private RegressionNodeModel[] storableNodeQueue {
|
---|
| 117 | get { return nodeQueue.ToArray(); }
|
---|
| 118 | set { nodeQueue = new Queue<RegressionNodeModel>(value); }
|
---|
| 119 | }
|
---|
| 120 |
|
---|
| 121 | public Queue<IReadOnlyList<int>> trainingRowsQueue;
|
---|
[15830] | 122 | [Storable]
|
---|
[17159] | 123 | private IReadOnlyList<int>[] storableTrainingRowsQueue {
|
---|
| 124 | get { return trainingRowsQueue.ToArray(); }
|
---|
| 125 | set { trainingRowsQueue = new Queue<IReadOnlyList<int>>(value); }
|
---|
| 126 | }
|
---|
[15830] | 127 |
|
---|
[17159] | 128 |
|
---|
[15830] | 129 | //State.Code values denote the current action (for pausing)
|
---|
| 130 | //0...nothing has been done;
|
---|
| 131 | //1...splitting nodes;
|
---|
| 132 | [Storable]
|
---|
| 133 | public int Code = 0;
|
---|
| 134 |
|
---|
| 135 | #region HLConstructors & Cloning
|
---|
| 136 | [StorableConstructor]
|
---|
[16847] | 137 | protected SplittingState(StorableConstructorFlag _) : base(_) { }
|
---|
[15830] | 138 | protected SplittingState(SplittingState original, Cloner cloner) : base(original, cloner) {
|
---|
| 139 | nodeQueue = new Queue<RegressionNodeModel>(original.nodeQueue.Select(cloner.Clone));
|
---|
| 140 | trainingRowsQueue = new Queue<IReadOnlyList<int>>(original.trainingRowsQueue.Select(x => (IReadOnlyList<int>)x.ToArray()));
|
---|
| 141 | Code = original.Code;
|
---|
| 142 | }
|
---|
[17159] | 143 | public SplittingState() : base() {
|
---|
| 144 | nodeQueue = new Queue<RegressionNodeModel>();
|
---|
| 145 | trainingRowsQueue = new Queue<IReadOnlyList<int>>();
|
---|
| 146 | }
|
---|
[15830] | 147 | public override IDeepCloneable Clone(Cloner cloner) {
|
---|
| 148 | return new SplittingState(this, cloner);
|
---|
| 149 | }
|
---|
| 150 | #endregion
|
---|
| 151 | }
|
---|
| 152 | }
|
---|
| 153 | } |
---|