source: branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/Spliting/OrderImpurityCalculator.cs @ 16847

Last change on this file since 16847 was 16847, checked in by gkronber, 4 months ago

#2847: made some minor changes while reviewing

File size: 4.0 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2017 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26
27namespace HeuristicLab.Algorithms.DataAnalysis {
28  /// <summary>
29  /// Helper class for incremental split calculation.
30  /// Used while moving a potential splitter along the ordered training instances
31  /// </summary>
32  internal class OrderImpurityCalculator {
33    internal enum IncrementType {
34      Left,
35      Right,
36      None
37    }
38
39    #region Properties
40    private double SqSumLeft { get; set; }
41    private double SqSumRight { get; set; }
42    private double VarLeft { get; set; }
43    private double VarRight { get; set; }
44    private double Order { get; set; }
45    private double VarTotal { get; set; }
46    private int NoInstances { get; set; }
47
48    private double NoLeft { get; set; }
49    private double NoRight { get; set; }
50    private double SumLeft { get; set; }
51    private double SumRight { get; set; }
52    public double Impurity { get; private set; }
53    #endregion
54
55    #region Constructors
56    public OrderImpurityCalculator(int partition, IReadOnlyCollection<double> data, double order) {
57      var values = data;
58      NoInstances = data.Count;
59      VarTotal = values.VariancePop();
60
61      values = data.Take(partition).ToArray();
62      NoLeft = partition;
63      SumLeft = values.Sum();
64      SqSumLeft = values.Sum(x => x * x);
65
66      values = data.Skip(partition).ToArray();
67      NoRight = NoInstances - NoLeft;
68      SumRight = values.Sum();
69      SqSumRight = values.Sum(x => x * x);
70
71      Order = order;
72      Increment(0.0, IncrementType.None);
73    }
74    #endregion
75
76    #region IImpurityCalculator
77    public void Increment(double value, IncrementType type) {
78      double y, yl, yr;
79      var valSq = value * value;
80
81      switch (type) {
82        case IncrementType.Left:
83          NoLeft++;
84          NoRight--;
85          SumLeft += value;
86          SumRight -= value;
87          SqSumLeft += valSq;
88          SqSumRight -= valSq;
89          break;
90        case IncrementType.Right:
91          NoLeft--;
92          NoRight++;
93          SumLeft -= value;
94          SumRight += value;
95          SqSumLeft -= valSq;
96          SqSumRight += valSq;
97          break;
98        case IncrementType.None:
99          break;
100        default:
101          throw new ArgumentOutOfRangeException(type.ToString(), type, null);
102      }
103
104      VarLeft = NoLeft <= 0 ? 0 : Math.Abs(NoLeft * SqSumLeft - SumLeft * SumLeft) / (NoLeft * NoLeft);
105      VarRight = NoRight <= 0 ? 0 : Math.Abs(NoRight * SqSumRight - SumRight * SumRight) / (NoRight * NoRight);
106
107      if (Order <= 0) throw new ArgumentException("Splitter order must be larger than 0");
108      if (Order.IsAlmost(1)) {
109        y = VarTotal;
110        yl = VarLeft;
111        yr = VarRight;
112      }
113      else {
114        y = Math.Pow(VarTotal, 1.0 / Order);
115        yl = Math.Pow(VarLeft, 1.0 / Order);
116        yr = Math.Pow(VarRight, 1.0 / Order);
117      }
118      if (NoLeft <= 0.0 || NoRight <= 0.0) Impurity = double.MinValue; //Splitter = 0;
119      else Impurity = y - (NoLeft * yl + NoRight * yr) / (NoRight + NoLeft);
120    }
121    #endregion
122  }
123}
Note: See TracBrowser for help on using the repository browser.