Free cookie consent management tool by TermsFeed Policy Generator

source: branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/MetaModels/M5RuleSetModel.cs @ 15470

Last change on this file since 15470 was 15430, checked in by bwerth, 6 years ago

#2847 first implementation of M5'-regression

File size: 6.6 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2017 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Threading;
26using HeuristicLab.Common;
27using HeuristicLab.Data;
28using HeuristicLab.Optimization;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Problems.DataAnalysis;
31
32namespace HeuristicLab.Algorithms.DataAnalysis {
33  [StorableClass]
34  public class M5RuleSetModel : RegressionModel, IM5MetaModel {
35    private const string NoRulesResultName = "Number of Rules";
36    private const string CoveredInstancesResultName = "Covered Instances";
37
38    #region Properties
39    [Storable]
40    internal List<M5RuleModel> Rules { get; private set; }
41    #endregion
42
43    #region HLConstructors & Cloning
44    [StorableConstructor]
45    protected M5RuleSetModel(bool deserializing) : base(deserializing) { }
46    protected M5RuleSetModel(M5RuleSetModel original, Cloner cloner) : base(original, cloner) {
47      if (original.Rules != null) Rules = original.Rules.Select(cloner.Clone).ToList();
48    }
49    protected M5RuleSetModel(string targetVariable) : base(targetVariable) { }
50    public override IDeepCloneable Clone(Cloner cloner) {
51      return new M5RuleSetModel(this, cloner);
52    }
53    #endregion
54
55    internal static M5RuleSetModel CreateRuleModel(string targetAttr, M5CreationParameters m5CreationParams) {
56      return m5CreationParams.LeafType is ILeafType<IConfidenceRegressionModel> ? new ConfidenceM5RuleSetModel(targetAttr) : new M5RuleSetModel(targetAttr);
57    }
58
59    #region RegressionModel
60    public override IEnumerable<string> VariablesUsedForPrediction {
61      get {
62        var f = Rules.FirstOrDefault();
63        return f != null ? (f.VariablesUsedForPrediction ?? new List<string>()) : new List<string>();
64      }
65    }
66    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
67      if (Rules == null) throw new NotSupportedException("The classifier has not been built yet");
68      return rows.Select(row => GetEstimatedValue(dataset, row));
69    }
70    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
71      return new RegressionSolution(this, problemData);
72    }
73    #endregion
74
75    #region IM5Component
76    void IM5MetaModel.BuildClassifier(IReadOnlyList<int> trainingRows, IReadOnlyList<int> holdoutRows, M5CreationParameters m5CreationParams, CancellationToken cancellation) {
77      Rules = new List<M5RuleModel>();
78      var tempTraining = trainingRows;
79      var tempHoldout = holdoutRows;
80      do {
81        var tempRule = M5RuleModel.CreateRuleModel(m5CreationParams.TargetVariable, m5CreationParams);
82        cancellation.ThrowIfCancellationRequested();
83
84        if (!m5CreationParams.Results.ContainsKey(NoRulesResultName)) m5CreationParams.Results.Add(new Result(NoRulesResultName, new IntValue(0)));
85        if (!m5CreationParams.Results.ContainsKey(CoveredInstancesResultName)) m5CreationParams.Results.Add(new Result(CoveredInstancesResultName, new IntValue(0)));
86
87        var t1 = tempTraining.Count;
88        tempRule.BuildClassifier(tempTraining, tempHoldout, m5CreationParams, cancellation);
89        tempTraining = tempTraining.Where(i => !tempRule.Covers(m5CreationParams.Data, i)).ToArray();
90        tempHoldout = tempHoldout.Where(i => !tempRule.Covers(m5CreationParams.Data, i)).ToArray();
91        Rules.Add(tempRule);
92        ((IntValue) m5CreationParams.Results[NoRulesResultName].Value).Value++;
93        ((IntValue) m5CreationParams.Results[CoveredInstancesResultName].Value).Value += t1 - tempTraining.Count;
94      }
95      while (tempTraining.Count > 0);
96    }
97
98    void IM5MetaModel.UpdateModel(IReadOnlyList<int> rows, M5UpdateParameters m5UpdateParameters, CancellationToken cancellation) {
99      foreach (var rule in Rules) rule.UpdateModel(rows, m5UpdateParameters, cancellation);
100    }
101    #endregion
102
103    #region Helpers
104    private double GetEstimatedValue(IDataset dataset, int row) {
105      foreach (var rule in Rules) {
106        var prediction = rule.GetEstimatedValues(dataset, row.ToEnumerable()).Single();
107        if (rule.Covers(dataset, row)) return prediction;
108      }
109      throw new ArgumentException("Instance is not covered by any rule");
110    }
111    #endregion
112
113    [StorableClass]
114    private class ConfidenceM5RuleSetModel : M5RuleSetModel, IConfidenceRegressionModel {
115      #region HLConstructors & Cloning
116      [StorableConstructor]
117      protected ConfidenceM5RuleSetModel(bool deserializing) : base(deserializing) { }
118      private ConfidenceM5RuleSetModel(ConfidenceM5RuleSetModel original, Cloner cloner) : base(original, cloner) { }
119      public ConfidenceM5RuleSetModel(string targetVariable) : base(targetVariable) { }
120      public override IDeepCloneable Clone(Cloner cloner) {
121        return new ConfidenceM5RuleSetModel(this, cloner);
122      }
123      #endregion
124
125      #region IConfidenceRegressionModel
126      public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
127        if (Rules == null) throw new NotSupportedException("The classifier has not been built yet");
128        return rows.Select(row => GetEstimatedVariance(dataset, row));
129      }
130      public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
131        return new ConfidenceRegressionSolution(this, problemData);
132      }
133      private double GetEstimatedVariance(IDataset dataset, int row) {
134        foreach (var rule in Rules) {
135          var prediction = ((IConfidenceRegressionModel) rule).GetEstimatedVariances(dataset, row.ToEnumerable()).Single();
136          if (rule.Covers(dataset, row)) return prediction;
137        }
138        throw new ArgumentException("Instance is not covered by any rule");
139      }
140      #endregion
141    }
142  }
143}
Note: See TracBrowser for help on using the repository browser.