Free cookie consent management tool by TermsFeed Policy Generator

source: branches/M5Regression/HeuristicLab.Algorithms.DataAnalysis/3.4/M5Regression/MetaModels/PreconstructedLinearModel.cs @ 15470

Last change on this file since 15470 was 15470, checked in by bwerth, 5 years ago

#2847 worked on M5Regression

File size: 5.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2017 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
27using HeuristicLab.Problems.DataAnalysis;
28
29namespace HeuristicLab.Algorithms.DataAnalysis {
30  //mulitdimensional extension of http://www2.stat.duke.edu/~tjl13/s101/slides/unit6lec3H.pdf
31  [StorableClass]
32  internal sealed class PreconstructedLinearModel : RegressionModel, IConfidenceRegressionModel {
33    [Storable]
34    public Dictionary<string, double> Coefficients { get; private set; }
35    [Storable]
36    public double Intercept { get; private set; }
37    [Storable]
38    private Dictionary<string, double> Center { get; set; }
39    [Storable]
40    private Dictionary<string, double> Variances { get; set; }
41    [Storable]
42    private double ResidualVariance { get; set; }
43    [Storable]
44    private int SampleSize { get; set; }
45
46    public override IEnumerable<string> VariablesUsedForPrediction {
47      get { return Coefficients.Keys; }
48    }
49    #region HLConstructors
50    [StorableConstructor]
51    private PreconstructedLinearModel(bool deserializing) : base(deserializing) { }
52    private PreconstructedLinearModel(PreconstructedLinearModel original, Cloner cloner) : base(original, cloner) {
53      if (original.Coefficients != null) Coefficients = original.Coefficients.ToDictionary(x => x.Key, x => x.Value);
54      Intercept = original.Intercept;
55      if (original.Center != null) Center = original.Center.ToDictionary(x => x.Key, x => x.Value);
56      if (original.Variances != null) Variances = original.Variances.ToDictionary(x => x.Key, x => x.Value);
57      ResidualVariance = original.ResidualVariance;
58      SampleSize = original.SampleSize;
59    }
60    private PreconstructedLinearModel(Dictionary<string, double> coefficients, double intercept, string targetvariable) : base(targetvariable) {
61      Coefficients = coefficients.ToDictionary(x => x.Key, x => x.Value);
62      Intercept = intercept;
63      Variances = new Dictionary<string, double>();
64      ResidualVariance = 0;
65      SampleSize = 0;
66    }
67    public PreconstructedLinearModel(double intercept, string targetvariable) : base(targetvariable) {
68      Coefficients = new Dictionary<string, double>();
69      Intercept = intercept;
70      Variances = new Dictionary<string, double>();
71      ResidualVariance = 0;
72      SampleSize = 0;
73    }
74    public override IDeepCloneable Clone(Cloner cloner) {
75      return new PreconstructedLinearModel(this, cloner);
76    }
77    #endregion
78
79    public static PreconstructedLinearModel CreateConfidenceLinearModel(IRegressionProblemData pd, out double rmse, out double cvRmse) {
80      var inputMatrix = pd.Dataset.ToArray(pd.AllowedInputVariables.Concat(new[] {pd.TargetVariable}), pd.AllIndices);
81      alglib.linearmodel lm;
82      alglib.lrreport ar;
83      var nFeatures = inputMatrix.GetLength(1) - 1;
84      double[] coefficients; // last coefficient is for the constant
85
86      int retVal;
87      alglib.lrbuild(inputMatrix, inputMatrix.GetLength(0), nFeatures, out retVal, out lm, out ar);
88      if (retVal != 1)
89        throw new ArgumentException("Error in calculation of linear regression solution");
90      rmse = ar.rmserror;
91      cvRmse = ar.cvrmserror;
92
93      alglib.lrunpack(lm, out coefficients, out nFeatures);
94      return new PreconstructedLinearModel(pd.AllowedInputVariables.Zip(coefficients, (s, d) => new {s, d}).ToDictionary(x => x.s, x => x.d), coefficients[nFeatures], pd.TargetVariable);
95    }
96
97    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
98      return rows.Select(row => GetEstimatedValue(dataset, row));
99    }
100
101    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
102      return new RegressionSolution(this, problemData);
103    }
104
105    public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
106      return rows.Select(i => GetEstimatedVariance(dataset, i));
107    }
108
109    #region helpers
110    private double GetEstimatedValue(IDataset dataset, int row) {
111      return Intercept + (Coefficients.Count == 0 ? 0 : Coefficients.Sum(s => s.Value * dataset.GetDoubleValue(s.Key, row)));
112    }
113    private double GetEstimatedVariance(IDataset dataset, int row) {
114      if (SampleSize == 0) return 0.0;
115      var sum = (from var in Variances let d = dataset.GetDoubleValue(var.Key, row) - Center[var.Key] select d * d / var.Value).Sum();
116      return ResidualVariance * (1.0 / SampleSize + sum / (SampleSize - 1));
117    }
118    #endregion
119  }
120}
Note: See TracBrowser for help on using the repository browser.