Free cookie consent management tool by TermsFeed Policy Generator

source: branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/MetaModels/M5RuleSetModel.cs @ 15614

Last change on this file since 15614 was 15614, checked in by bwerth, 6 years ago

#2847 made changes to M5 according to review comments

File size: 6.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2017 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using System.Threading;
26using HeuristicLab.Common;
27using HeuristicLab.Data;
28using HeuristicLab.Optimization;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30using HeuristicLab.Problems.DataAnalysis;
31
32namespace HeuristicLab.Algorithms.DataAnalysis {
33  [StorableClass]
34  internal class M5RuleSetModel : RegressionModel, IM5Model {
35    private const string NumRulesResultName = "Number of rules";
36    private const string CoveredInstancesResultName = "Covered instances";
37
38    #region Properties
39    [Storable]
40    internal List<M5RuleModel> Rules { get; private set; }
41    #endregion
42
43    #region HLConstructors & Cloning
44    [StorableConstructor]
45    protected M5RuleSetModel(bool deserializing) : base(deserializing) { }
46    protected M5RuleSetModel(M5RuleSetModel original, Cloner cloner) : base(original, cloner) {
47      if (original.Rules != null) Rules = original.Rules.Select(cloner.Clone).ToList();
48    }
49    protected M5RuleSetModel(string targetVariable) : base(targetVariable) { }
50    public override IDeepCloneable Clone(Cloner cloner) {
51      return new M5RuleSetModel(this, cloner);
52    }
53    #endregion
54
55    internal static M5RuleSetModel CreateRuleModel(string targetAttr, M5Parameters m5Params) {
56      return m5Params.LeafModel.ProvidesConfidence ? new ConfidenceM5RuleSetModel(targetAttr) : new M5RuleSetModel(targetAttr);
57    }
58
59    #region RegressionModel
60    public override IEnumerable<string> VariablesUsedForPrediction {
61      get {
62        var f = Rules.FirstOrDefault();
63        return f != null ? (f.VariablesUsedForPrediction ?? new List<string>()) : new List<string>();
64      }
65    }
66    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
67      if (Rules == null) throw new NotSupportedException("The model has not been built yet");
68      return rows.Select(row => GetEstimatedValue(dataset, row));
69    }
70    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
71      return new RegressionSolution(this, problemData);
72    }
73    #endregion
74
75    #region IM5Model
76    public void Build(IReadOnlyList<int> trainingRows, IReadOnlyList<int> pruningRows, M5Parameters m5Params, CancellationToken cancellationToken) {
77      Rules = new List<M5RuleModel>();
78      var tempTraining = trainingRows;
79      var tempPruning = pruningRows;
80      do {
81        var tempRule = M5RuleModel.CreateRuleModel(m5Params.TargetVariable, m5Params);
82        cancellationToken.ThrowIfCancellationRequested();
83
84        if (!m5Params.Results.ContainsKey(NumRulesResultName)) m5Params.Results.Add(new Result(NumRulesResultName, new IntValue(0)));
85        if (!m5Params.Results.ContainsKey(CoveredInstancesResultName)) m5Params.Results.Add(new Result(CoveredInstancesResultName, new IntValue(0)));
86
87        var t1 = tempTraining.Count;
88        tempRule.Build(tempTraining, tempPruning, m5Params, cancellationToken);
89        tempTraining = tempTraining.Where(i => !tempRule.Covers(m5Params.Data, i)).ToArray();
90        tempPruning = tempPruning.Where(i => !tempRule.Covers(m5Params.Data, i)).ToArray();
91        Rules.Add(tempRule);
92        ((IntValue)m5Params.Results[NumRulesResultName].Value).Value++;
93        ((IntValue)m5Params.Results[CoveredInstancesResultName].Value).Value += t1 - tempTraining.Count;
94      }
95      while (tempTraining.Count > 0);
96    }
97
98    public void Update(IReadOnlyList<int> rows, M5Parameters m5Parameters, CancellationToken cancellationToken) {
99      foreach (var rule in Rules) rule.Update(rows, m5Parameters, cancellationToken);
100    }
101    #endregion
102
103    #region Helpers
104    private double GetEstimatedValue(IDataset dataset, int row) {
105      foreach (var rule in Rules) {
106        if (rule.Covers(dataset, row))
107          return rule.GetEstimatedValues(dataset, row.ToEnumerable()).Single();
108      }
109      throw new ArgumentException("Instance is not covered by any rule");
110    }
111    #endregion
112
113    [StorableClass]
114    private class ConfidenceM5RuleSetModel : M5RuleSetModel, IConfidenceRegressionModel {
115      #region HLConstructors & Cloning
116      [StorableConstructor]
117      protected ConfidenceM5RuleSetModel(bool deserializing) : base(deserializing) { }
118      private ConfidenceM5RuleSetModel(ConfidenceM5RuleSetModel original, Cloner cloner) : base(original, cloner) { }
119      public ConfidenceM5RuleSetModel(string targetVariable) : base(targetVariable) { }
120      public override IDeepCloneable Clone(Cloner cloner) {
121        return new ConfidenceM5RuleSetModel(this, cloner);
122      }
123      #endregion
124
125      #region IConfidenceRegressionModel
126      public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
127        if (Rules == null) throw new NotSupportedException("The model has not been built yet");
128        return rows.Select(row => GetEstimatedVariance(dataset, row));
129      }
130      public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
131        return new ConfidenceRegressionSolution(this, problemData);
132      }
133      private double GetEstimatedVariance(IDataset dataset, int row) {
134        foreach (var rule in Rules) {
135          if (rule.Covers(dataset, row)) return ((IConfidenceRegressionModel)rule).GetEstimatedVariances(dataset, row.ToEnumerable()).Single();
136        }
137        throw new ArgumentException("Instance is not covered by any rule");
138      }
139      #endregion
140    }
141  }
142}
Note: See TracBrowser for help on using the repository browser.