source: branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Spliting/M5Splitter.cs @ 15830

Last change on this file since 15830 was 15830, checked in by bwerth, 18 months ago

#2847 adapted project to new rep structure; major changes to interfaces; restructures splitting and pruning

File size: 3.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2017 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using HeuristicLab.Common;
25using HeuristicLab.Core;
26using HeuristicLab.Data;
27using HeuristicLab.Parameters;
28using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
29
30namespace HeuristicLab.Algorithms.DataAnalysis {
31  [StorableClass]
32  [Item("M5Splitter", "A split selector that uses the ratio between Variances^(1/Order) to determine good splits")]
33  public class M5Splitter : SplitterBase {
34    public const string OrderParameterName = "Order";
35    public IFixedValueParameter<DoubleValue> OrderParameter {
36      get { return (IFixedValueParameter<DoubleValue>)Parameters[OrderParameterName]; }
37    }
38    public double Order {
39      get { return OrderParameter.Value.Value; }
40    }
41
42    #region Constructors & Cloning
43    [StorableConstructor]
44    private M5Splitter(bool deserializing) { }
45    private M5Splitter(M5Splitter original, Cloner cloner) : base(original, cloner) { }
46    public M5Splitter() {
47      Parameters.Add(new FixedValueParameter<DoubleValue>(OrderParameterName, "The exponent in the split calculation sum (x_i - x_avg)^Order.", new DoubleValue(5)));
48    }
49    public override IDeepCloneable Clone(Cloner cloner) {
50      return new M5Splitter(this, cloner);
51    }
52    #endregion
53
54    protected override void AttributeSplit(IReadOnlyList<double> attValues, IReadOnlyList<double> targetValues, int minLeafSize, out int position, out double maxImpurity, out double splitValue) {
55      position = 0;
56      maxImpurity = -1E20;
57      splitValue = 0.0;
58      var length = targetValues.Count;
59
60      // weka code
61      var low = 0;
62      var high = length - 1;
63      if (high - low + 1 < 4) return;
64      var len = Math.Max(minLeafSize - 1, high - low + 1 < 5 ? 1 : (high - low + 1) / 5);
65      position = low;
66      var part = low + len - 1;
67      var imp = new OrderImpurityCalculator(part + 1, targetValues, Order);
68
69      //if (imp.Impurity > maxImpurity && !attValues[part - 1].IsAlmost(attValues[part])) {
70      //  maxImpurity = imp.Impurity;
71      //  splitValue = (attValues[part - 1] + attValues[part]) / 2;
72      //  position = part;
73      //}
74
75      for (var i = low + len; i < high - len; i++) {
76        imp.Increment(targetValues[i], OrderImpurityCalculator.IncrementType.Left);
77        if (attValues[i].IsAlmost(attValues[i + 1])) continue; //splits can not be made between to equal points
78        if (imp.Impurity < maxImpurity) continue;
79        maxImpurity = imp.Impurity;
80        splitValue = (attValues[i] + attValues[i + 1]) / 2;
81        position = i;
82      }
83    }
84  }
85}
Note: See TracBrowser for help on using the repository browser.