#region License Information
/* HeuristicLab
* Copyright (C) 2002-2013 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Core;
using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
using HeuristicLab.Problems.DataAnalysis;
namespace HeuristicLab.Algorithms.DataAnalysis {
///
/// Represents a random forest model for regression and classification
///
[StorableClass]
[Item("RandomForestModel", "Represents a random forest for regression and classification.")]
public sealed class RandomForestModel : NamedItem, IRandomForestModel {
private alglib.decisionforest randomForest;
public alglib.decisionforest RandomForest {
get { return randomForest; }
set {
if (value != randomForest) {
if (value == null) throw new ArgumentNullException();
randomForest = value;
OnChanged(EventArgs.Empty);
}
}
}
[Storable]
private string targetVariable;
[Storable]
private string[] allowedInputVariables;
[Storable]
private double[] classValues;
[StorableConstructor]
private RandomForestModel(bool deserializing)
: base(deserializing) {
if (deserializing)
randomForest = new alglib.decisionforest();
}
private RandomForestModel(RandomForestModel original, Cloner cloner)
: base(original, cloner) {
randomForest = new alglib.decisionforest();
randomForest.innerobj.bufsize = original.randomForest.innerobj.bufsize;
randomForest.innerobj.nclasses = original.randomForest.innerobj.nclasses;
randomForest.innerobj.ntrees = original.randomForest.innerobj.ntrees;
randomForest.innerobj.nvars = original.randomForest.innerobj.nvars;
randomForest.innerobj.trees = (double[])original.randomForest.innerobj.trees.Clone();
targetVariable = original.targetVariable;
allowedInputVariables = (string[])original.allowedInputVariables.Clone();
if (original.classValues != null)
this.classValues = (double[])original.classValues.Clone();
}
public RandomForestModel(alglib.decisionforest randomForest, string targetVariable, IEnumerable allowedInputVariables, double[] classValues = null)
: base() {
this.name = ItemName;
this.description = ItemDescription;
this.randomForest = randomForest;
this.targetVariable = targetVariable;
this.allowedInputVariables = allowedInputVariables.ToArray();
if (classValues != null)
this.classValues = (double[])classValues.Clone();
}
public override IDeepCloneable Clone(Cloner cloner) {
return new RandomForestModel(this, cloner);
}
public IEnumerable GetEstimatedValues(Dataset dataset, IEnumerable rows) {
double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
int n = inputData.GetLength(0);
int columns = inputData.GetLength(1);
double[] x = new double[columns];
double[] y = new double[1];
for (int row = 0; row < n; row++) {
for (int column = 0; column < columns; column++) {
x[column] = inputData[row, column];
}
alglib.dfprocess(randomForest, x, ref y);
yield return y[0];
}
}
public IEnumerable GetEstimatedClassValues(Dataset dataset, IEnumerable rows) {
double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
int n = inputData.GetLength(0);
int columns = inputData.GetLength(1);
double[] x = new double[columns];
double[] y = new double[randomForest.innerobj.nclasses];
for (int row = 0; row < n; row++) {
for (int column = 0; column < columns; column++) {
x[column] = inputData[row, column];
}
alglib.dfprocess(randomForest, x, ref y);
// find class for with the largest probability value
int maxProbClassIndex = 0;
double maxProb = y[0];
for (int i = 1; i < y.Length; i++) {
if (maxProb < y[i]) {
maxProb = y[i];
maxProbClassIndex = i;
}
}
yield return classValues[maxProbClassIndex];
}
}
public IRandomForestRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
return new RandomForestRegressionSolution(new RegressionProblemData(problemData), this);
}
IRegressionSolution IRegressionModel.CreateRegressionSolution(IRegressionProblemData problemData) {
return CreateRegressionSolution(problemData);
}
public IRandomForestClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData) {
return new RandomForestClassificationSolution(new ClassificationProblemData(problemData), this);
}
IClassificationSolution IClassificationModel.CreateClassificationSolution(IClassificationProblemData problemData) {
return CreateClassificationSolution(problemData);
}
#region events
public event EventHandler Changed;
private void OnChanged(EventArgs e) {
var handlers = Changed;
if (handlers != null)
handlers(this, e);
}
#endregion
#region persistence
[Storable]
private int RandomForestBufSize {
get {
return randomForest.innerobj.bufsize;
}
set {
randomForest.innerobj.bufsize = value;
}
}
[Storable]
private int RandomForestNClasses {
get {
return randomForest.innerobj.nclasses;
}
set {
randomForest.innerobj.nclasses = value;
}
}
[Storable]
private int RandomForestNTrees {
get {
return randomForest.innerobj.ntrees;
}
set {
randomForest.innerobj.ntrees = value;
}
}
[Storable]
private int RandomForestNVars {
get {
return randomForest.innerobj.nvars;
}
set {
randomForest.innerobj.nvars = value;
}
}
[Storable]
private double[] RandomForestTrees {
get {
return randomForest.innerobj.trees;
}
set {
randomForest.innerobj.trees = value;
}
}
#endregion
}
}