#region License Information
/* HeuristicLab
* Copyright (C) 2002-2017 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
*
* This file is part of HeuristicLab.
*
* HeuristicLab is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* HeuristicLab is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with HeuristicLab. If not, see .
*/
#endregion
using System;
using System.Collections.Generic;
using System.Linq;
using HeuristicLab.Common;
using HeuristicLab.Problems.DataAnalysis;
using HEAL.Attic;
namespace HeuristicLab.Algorithms.DataAnalysis {
// multidimensional extension of http://www2.stat.duke.edu/~tjl13/s101/slides/unit6lec3H.pdf
[StorableType("15F2295C-28C1-48C3-8DCB-9470823C6734")]
internal sealed class PreconstructedLinearModel : RegressionModel {
[Storable]
public Dictionary Coefficients { get; private set; }
[Storable]
public double Intercept { get; private set; }
public override IEnumerable VariablesUsedForPrediction {
get { return Coefficients.Keys; }
}
#region HLConstructors
[StorableConstructor]
private PreconstructedLinearModel(StorableConstructorFlag _) : base(_) { }
private PreconstructedLinearModel(PreconstructedLinearModel original, Cloner cloner) : base(original, cloner) {
if (original.Coefficients != null) Coefficients = original.Coefficients.ToDictionary(x => x.Key, x => x.Value);
Intercept = original.Intercept;
}
public PreconstructedLinearModel(Dictionary coefficients, double intercept, string targetvariable) : base(targetvariable) {
Coefficients = new Dictionary(coefficients);
Intercept = intercept;
}
public PreconstructedLinearModel(double intercept, string targetvariable) : base(targetvariable) {
Coefficients = new Dictionary();
Intercept = intercept;
}
public override IDeepCloneable Clone(Cloner cloner) {
return new PreconstructedLinearModel(this, cloner);
}
#endregion
public static PreconstructedLinearModel CreateLinearModel(IRegressionProblemData pd, out double rmse) {
return AlternativeCalculation(pd, out rmse);
}
private static PreconstructedLinearModel ClassicCalculation(IRegressionProblemData pd) {
var inputMatrix = pd.Dataset.ToArray(pd.AllowedInputVariables.Concat(new[] {
pd.TargetVariable
}), pd.AllIndices);
var nFeatures = inputMatrix.GetLength(1) - 1;
double[] coefficients;
alglib.linearmodel lm;
alglib.lrreport ar;
int retVal;
alglib.lrbuild(inputMatrix, inputMatrix.GetLength(0), nFeatures, out retVal, out lm, out ar);
if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution");
alglib.lrunpack(lm, out coefficients, out nFeatures);
var coeffs = pd.AllowedInputVariables.Zip(coefficients, (s, d) => new {s, d}).ToDictionary(x => x.s, x => x.d);
var res = new PreconstructedLinearModel(coeffs, coefficients[nFeatures], pd.TargetVariable);
return res;
}
private static PreconstructedLinearModel AlternativeCalculation(IRegressionProblemData pd, out double rmse) {
var variables = pd.AllowedInputVariables.ToList();
var n = variables.Count;
var m = pd.TrainingIndices.Count();
//Set up X^T
var inTr = new double[n + 1, m];
for (var i = 0; i < n; i++) {
var vdata = pd.Dataset.GetDoubleValues(variables[i], pd.TrainingIndices).ToArray();
for (var j = 0; j < m; j++) inTr[i, j] = vdata[j];
}
for (var i = 0; i < m; i++) inTr[n, i] = 1;
//Set up y
var y = new double[m, 1];
var ydata = pd.TargetVariableTrainingValues.ToArray();
for (var i = 0; i < m; i++) y[i, 0] = ydata[i];
//Perform linear regression
var aTy = new double[n + 1, 1];
var aTa = new double[n + 1, n + 1];
var aTyVector = new double[n + 1];
int info;
alglib.densesolverreport report;
double[] coefficients;
//Perform linear regression
alglib.rmatrixgemm(n + 1, 1, m, 1, inTr, 0, 0, 0, y, 0, 0, 0, 0, ref aTy, 0, 0); //aTy = inTr * y;
alglib.rmatrixgemm(n + 1, n + 1, m, 1, inTr, 0, 0, 0, inTr, 0, 0, 1, 0, ref aTa, 0, 0); //aTa = inTr * t(inTr) +aTa //
alglib.spdmatrixcholesky(ref aTa, n + 1, true);
for (var i = 0; i < n + 1; i++) aTyVector[i] = aTy[i, 0];
alglib.spdmatrixcholeskysolve(aTa, n + 1, true, aTyVector, out info, out report, out coefficients);
//if Cholesky calculation fails fall back to classic linear regresseion
if (info != 1) {
alglib.linearmodel lm;
alglib.lrreport ar;
int retVal;
var inputMatrix = pd.Dataset.ToArray(pd.AllowedInputVariables.Concat(new[] {
pd.TargetVariable
}), pd.AllIndices);
alglib.lrbuild(inputMatrix, inputMatrix.GetLength(0), n, out retVal, out lm, out ar);
if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution");
alglib.lrunpack(lm, out coefficients, out n);
}
var coeffs = Enumerable.Range(0, n).ToDictionary(i => variables[i], i => coefficients[i]);
var model = new PreconstructedLinearModel(coeffs, coefficients[n], pd.TargetVariable);
rmse = pd.TrainingIndices.Select(i => pd.Dataset.GetDoubleValue(pd.TargetVariable, i) - model.GetEstimatedValue(pd.Dataset, i)).Sum(r => r * r) / m;
rmse = Math.Sqrt(rmse);
return model;
}
public override IEnumerable GetEstimatedValues(IDataset dataset, IEnumerable rows) {
return rows.Select(row => GetEstimatedValue(dataset, row));
}
public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
return new RegressionSolution(this, problemData);
}
#region helpers
private double GetEstimatedValue(IDataset dataset, int row) {
return Intercept + (Coefficients.Count == 0 ? 0 : Coefficients.Sum(s => s.Value * dataset.GetDoubleValue(s.Key, row)));
}
#endregion
}
}