Free cookie consent management tool by TermsFeed Policy Generator

source: branches/DataPreprocessing/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs @ 10204

Last change on this file since 10204 was 9456, checked in by swagner, 12 years ago

Updated copyright year and added some missing license headers (#1889)

File size: 7.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
28using HeuristicLab.Problems.DataAnalysis;
29
30namespace HeuristicLab.Algorithms.DataAnalysis {
31  /// <summary>
32  /// Represents a random forest model for regression and classification
33  /// </summary>
34  [StorableClass]
35  [Item("RandomForestModel", "Represents a random forest for regression and classification.")]
36  public sealed class RandomForestModel : NamedItem, IRandomForestModel {
37
38    private alglib.decisionforest randomForest;
39    public alglib.decisionforest RandomForest {
40      get { return randomForest; }
41      set {
42        if (value != randomForest) {
43          if (value == null) throw new ArgumentNullException();
44          randomForest = value;
45          OnChanged(EventArgs.Empty);
46        }
47      }
48    }
49
50    [Storable]
51    private string targetVariable;
52    [Storable]
53    private string[] allowedInputVariables;
54    [Storable]
55    private double[] classValues;
56    [StorableConstructor]
57    private RandomForestModel(bool deserializing)
58      : base(deserializing) {
59      if (deserializing)
60        randomForest = new alglib.decisionforest();
61    }
62    private RandomForestModel(RandomForestModel original, Cloner cloner)
63      : base(original, cloner) {
64      randomForest = new alglib.decisionforest();
65      randomForest.innerobj.bufsize = original.randomForest.innerobj.bufsize;
66      randomForest.innerobj.nclasses = original.randomForest.innerobj.nclasses;
67      randomForest.innerobj.ntrees = original.randomForest.innerobj.ntrees;
68      randomForest.innerobj.nvars = original.randomForest.innerobj.nvars;
69      randomForest.innerobj.trees = (double[])original.randomForest.innerobj.trees.Clone();
70      targetVariable = original.targetVariable;
71      allowedInputVariables = (string[])original.allowedInputVariables.Clone();
72      if (original.classValues != null)
73        this.classValues = (double[])original.classValues.Clone();
74    }
75    public RandomForestModel(alglib.decisionforest randomForest, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues = null)
76      : base() {
77      this.name = ItemName;
78      this.description = ItemDescription;
79      this.randomForest = randomForest;
80      this.targetVariable = targetVariable;
81      this.allowedInputVariables = allowedInputVariables.ToArray();
82      if (classValues != null)
83        this.classValues = (double[])classValues.Clone();
84    }
85
86    public override IDeepCloneable Clone(Cloner cloner) {
87      return new RandomForestModel(this, cloner);
88    }
89
90    public IEnumerable<double> GetEstimatedValues(Dataset dataset, IEnumerable<int> rows) {
91      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
92
93      int n = inputData.GetLength(0);
94      int columns = inputData.GetLength(1);
95      double[] x = new double[columns];
96      double[] y = new double[1];
97
98      for (int row = 0; row < n; row++) {
99        for (int column = 0; column < columns; column++) {
100          x[column] = inputData[row, column];
101        }
102        alglib.dfprocess(randomForest, x, ref y);
103        yield return y[0];
104      }
105    }
106
107    public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) {
108      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
109
110      int n = inputData.GetLength(0);
111      int columns = inputData.GetLength(1);
112      double[] x = new double[columns];
113      double[] y = new double[randomForest.innerobj.nclasses];
114
115      for (int row = 0; row < n; row++) {
116        for (int column = 0; column < columns; column++) {
117          x[column] = inputData[row, column];
118        }
119        alglib.dfprocess(randomForest, x, ref y);
120        // find class for with the largest probability value
121        int maxProbClassIndex = 0;
122        double maxProb = y[0];
123        for (int i = 1; i < y.Length; i++) {
124          if (maxProb < y[i]) {
125            maxProb = y[i];
126            maxProbClassIndex = i;
127          }
128        }
129        yield return classValues[maxProbClassIndex];
130      }
131    }
132
133    public IRandomForestRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
134      return new RandomForestRegressionSolution(new RegressionProblemData(problemData), this);
135    }
136    IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {
137      return CreateRegressionSolution(problemData);
138    }
139    public IRandomForestClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) {
140      return new RandomForestClassificationSolution(new ClassificationProblemData(problemData), this);
141    }
142    IClassificationSolution IClassificationModel.CreateClassificationSolution(IClassificationProblemData problemData) {
143      return CreateClassificationSolution(problemData);
144    }
145
146    #region events
147    public event EventHandler Changed;
148    private void OnChanged(EventArgs e) {
149      var handlers = Changed;
150      if (handlers != null)
151        handlers(this, e);
152    }
153    #endregion
154
155    #region persistence
156    [Storable]
157    private int RandomForestBufSize {
158      get {
159        return randomForest.innerobj.bufsize;
160      }
161      set {
162        randomForest.innerobj.bufsize = value;
163      }
164    }
165    [Storable]
166    private int RandomForestNClasses {
167      get {
168        return randomForest.innerobj.nclasses;
169      }
170      set {
171        randomForest.innerobj.nclasses = value;
172      }
173    }
174    [Storable]
175    private int RandomForestNTrees {
176      get {
177        return randomForest.innerobj.ntrees;
178      }
179      set {
180        randomForest.innerobj.ntrees = value;
181      }
182    }
183    [Storable]
184    private int RandomForestNVars {
185      get {
186        return randomForest.innerobj.nvars;
187      }
188      set {
189        randomForest.innerobj.nvars = value;
190      }
191    }
192    [Storable]
193    private double[] RandomForestTrees {
194      get {
195        return randomForest.innerobj.trees;
196      }
197      set {
198        randomForest.innerobj.trees = value;
199      }
200    }
201    #endregion
202  }
203}
Note: See TracBrowser for help on using the repository browser.