Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2847_M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/M5Utilities/M5Analyzer.cs @ 16847

Last change on this file since 16847 was 15614, checked in by bwerth, 7 years ago

#2847 made changes to M5 according to review comments

File size: 6.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2017 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System.Collections.Generic;
23using System.Linq;
24using HeuristicLab.Analysis;
25using HeuristicLab.Data;
26using HeuristicLab.Optimization;
27using HeuristicLab.Problems.DataAnalysis;
28
29namespace HeuristicLab.Algorithms.DataAnalysis {
30  internal static class M5Analyzer {
31    private const string ConditionResultName = "Condition";
32    private const string CoverResultName = "Covered Instances";
33    private const string CoverageDiagramResultName = "Coverage";
34    private const string RuleModelResultName = "RuleModel";
35
36    public static Dictionary<string, int> GetRuleVariableFrequences(M5RuleSetModel ruleSetModel) {
37      var res = ruleSetModel.VariablesUsedForPrediction.ToDictionary(x => x, x => 0);
38      foreach (var rule in ruleSetModel.Rules)
39      foreach (var att in rule.SplitAttributes)
40        res[att]++;
41      return res;
42    }
43
44    public static Dictionary<string, int> GetTreeVariableFrequences(M5TreeModel treeModel) {
45      var res = treeModel.VariablesUsedForPrediction.ToDictionary(x => x, x => 0);
46      var root = treeModel.Root;
47      foreach (var cur in root.EnumerateNodes().Where(x => !x.IsLeaf))
48        res[cur.SplitAttribute]++;
49      return res;
50    }
51
52    public static Result CreateLeafDepthHistogram(M5TreeModel treeModel) {
53      var list = new List<int>();
54      GetLeafDepths(treeModel.Root, 0, list);
55      var row = new DataRow("Depths", "", list.Select(x => (double) x)) {
56        VisualProperties = {ChartType = DataRowVisualProperties.DataRowChartType.Histogram}
57      };
58      var hist = new DataTable("LeafDepths");
59      hist.Rows.Add(row);
60      return new Result(hist.Name, hist);
61    }
62
63    public static Result CreateRulesResult(M5RuleSetModel ruleSetModel, IRegressionProblemData pd, string resultName, bool displayModels) {
64      var res = new ResultCollection();
65      var i = 0;
66      foreach (var rule in ruleSetModel.Rules)
67        res.Add(new Result("Rule" + i++, CreateRulesResult(rule, pd, displayModels, out pd)));
68      return new Result(resultName, res);
69    }
70
71    public static IResult CreateCoverageDiagram(M5RuleSetModel setModel, IRegressionProblemData problemData) {
72      var res = new DataTable(CoverageDiagramResultName);
73      var training = CountCoverage(setModel, problemData.Dataset, problemData.TrainingIndices);
74      var test = CountCoverage(setModel, problemData.Dataset, problemData.TestIndices);
75      res.Rows.Add(new DataRow("Training", "", training));
76      res.Rows.Add(new DataRow("Test", "", test));
77
78      foreach (var row in res.Rows)
79        row.VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Columns;
80      res.VisualProperties.XAxisMaximumFixedValue = training.Count + 1;
81      res.VisualProperties.XAxisMaximumAuto = false;
82      res.VisualProperties.XAxisMinimumFixedValue = 0;
83      res.VisualProperties.XAxisMinimumAuto = false;
84      res.VisualProperties.XAxisTitle = "Rule";
85      res.VisualProperties.YAxisTitle = "Covered Instances";
86
87      return new Result(CoverageDiagramResultName, res);
88    }
89
90    private static void GetLeafDepths(M5NodeModel n, int depth, ICollection<int> res) {
91      if (n == null) return;
92      if (n.Left == null && n.Right == null) res.Add(depth);
93      else {
94        GetLeafDepths(n.Left, depth + 1, res);
95        GetLeafDepths(n.Right, depth + 1, res);
96      }
97    }
98
99    private static ResultCollection CreateRulesResult(M5RuleModel m5RuleModel, IRegressionProblemData pd, bool displayModels, out IRegressionProblemData notCovered) {
100      var training = pd.TrainingIndices.Where(x => !m5RuleModel.Covers(pd.Dataset, x)).ToArray();
101      var test = pd.TestIndices.Where(x => !m5RuleModel.Covers(pd.Dataset, x)).ToArray();
102      var data = new Dataset(pd.Dataset.DoubleVariables, pd.Dataset.DoubleVariables.Select(v => pd.Dataset.GetDoubleValues(v, training.Concat(test)).ToArray()));
103      notCovered = new RegressionProblemData(data, pd.AllowedInputVariables, pd.TargetVariable);
104      notCovered.TestPartition.Start = notCovered.TrainingPartition.End = training.Length;
105      notCovered.TestPartition.End = training.Length + test.Length;
106
107      var training2 = pd.TrainingIndices.Where(x => m5RuleModel.Covers(pd.Dataset, x)).ToArray();
108      var test2 = pd.TestIndices.Where(x => m5RuleModel.Covers(pd.Dataset, x)).ToArray();
109      var data2 = new Dataset(pd.Dataset.DoubleVariables, pd.Dataset.DoubleVariables.Select(v => pd.Dataset.GetDoubleValues(v, training2.Concat(test2)).ToArray()));
110      var covered = new RegressionProblemData(data2, pd.AllowedInputVariables, pd.TargetVariable);
111      covered.TestPartition.Start = covered.TrainingPartition.End = training2.Length;
112      covered.TestPartition.End = training2.Length + test2.Length;
113
114      var res = new ResultCollection {
115        new Result(ConditionResultName, new StringValue(m5RuleModel.ToCompactString())),
116        new Result(CoverResultName, new IntValue(pd.TrainingIndices.Count() - training.Length))
117      };
118      if (displayModels) res.Add(new Result(RuleModelResultName, m5RuleModel.CreateRegressionSolution(covered)));
119      return res;
120    }
121
122    private static IReadOnlyList<double> CountCoverage(M5RuleSetModel setModel, IDataset data, IEnumerable<int> rows) {
123      var rules = setModel.Rules.ToArray();
124      var res = new double[rules.Length];
125      foreach (int row in rows)
126        for (int i = 0; i < rules.Length; i++)
127          if (rules[i].Covers(data, row)) {
128            res[i]++;
129            break;
130          }
131      return res;
132    }
133  }
134}
Note: See TracBrowser for help on using the repository browser.