1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022017 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Collections.Generic;


24  using System.Linq;


25  using HeuristicLab.Common;


26  using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;


27  using HeuristicLab.Problems.DataAnalysis;


28 


29  namespace HeuristicLab.Algorithms.DataAnalysis {


30  //mulitdimensional extension of http://www2.stat.duke.edu/~tjl13/s101/slides/unit6lec3H.pdf


31  [StorableClass]


32  internal sealed class PreconstructedLinearModel : RegressionModel, IConfidenceRegressionModel {


33  [Storable]


34  public Dictionary<string, double> Coefficients { get; private set; }


35  [Storable]


36  public double Intercept { get; private set; }


37  [Storable]


38  private Dictionary<string, double> Center { get; set; }


39  [Storable]


40  private Dictionary<string, double> Variances { get; set; }


41  [Storable]


42  private double ResidualVariance { get; set; }


43  [Storable]


44  private int SampleSize { get; set; }


45 


46  public override IEnumerable<string> VariablesUsedForPrediction {


47  get { return Coefficients.Keys; }


48  }


49  #region HLConstructors


50  [StorableConstructor]


51  private PreconstructedLinearModel(bool deserializing) : base(deserializing) { }


52  private PreconstructedLinearModel(PreconstructedLinearModel original, Cloner cloner) : base(original, cloner) {


53  if (original.Coefficients != null) Coefficients = original.Coefficients.ToDictionary(x => x.Key, x => x.Value);


54  Intercept = original.Intercept;


55  if (original.Center != null) Center = original.Center.ToDictionary(x => x.Key, x => x.Value);


56  if (original.Variances != null) Variances = original.Variances.ToDictionary(x => x.Key, x => x.Value);


57  ResidualVariance = original.ResidualVariance;


58  SampleSize = original.SampleSize;


59  }


60  private PreconstructedLinearModel(Dictionary<string, double> coefficients, double intercept, string targetvariable) : base(targetvariable) {


61  Coefficients = coefficients.ToDictionary(x => x.Key, x => x.Value);


62  Intercept = intercept;


63  Variances = new Dictionary<string, double>();


64  ResidualVariance = 0;


65  SampleSize = 0;


66  }


67  public PreconstructedLinearModel(double intercept, string targetvariable) : base(targetvariable) {


68  Coefficients = new Dictionary<string, double>();


69  Intercept = intercept;


70  Variances = new Dictionary<string, double>();


71  ResidualVariance = 0;


72  SampleSize = 0;


73  }


74  public override IDeepCloneable Clone(Cloner cloner) {


75  return new PreconstructedLinearModel(this, cloner);


76  }


77  #endregion


78 


79  public static PreconstructedLinearModel CreateConfidenceLinearModel(IRegressionProblemData pd, out double rmse, out double cvRmse) {


80  var inputMatrix = pd.Dataset.ToArray(pd.AllowedInputVariables.Concat(new[] {pd.TargetVariable}), pd.AllIndices);


81  alglib.linearmodel lm;


82  alglib.lrreport ar;


83  var nFeatures = inputMatrix.GetLength(1)  1;


84  double[] coefficients; // last coefficient is for the constant


85 


86  int retVal;


87  alglib.lrbuild(inputMatrix, inputMatrix.GetLength(0), nFeatures, out retVal, out lm, out ar);


88  if (retVal != 1)


89  throw new ArgumentException("Error in calculation of linear regression solution");


90  rmse = ar.rmserror;


91  cvRmse = ar.cvrmserror;


92 


93  alglib.lrunpack(lm, out coefficients, out nFeatures);


94  return new PreconstructedLinearModel(pd.AllowedInputVariables.Zip(coefficients, (s, d) => new {s, d}).ToDictionary(x => x.s, x => x.d), coefficients[nFeatures], pd.TargetVariable);


95  }


96 


97  public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {


98  return rows.Select(row => GetEstimatedValue(dataset, row));


99  }


100 


101  public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {


102  return new RegressionSolution(this, problemData);


103  }


104 


105  public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {


106  return rows.Select(i => GetEstimatedVariance(dataset, i));


107  }


108 


109  #region helpers


110  private double GetEstimatedValue(IDataset dataset, int row) {


111  return Intercept + (Coefficients.Count == 0 ? 0 : Coefficients.Sum(s => s.Value * dataset.GetDoubleValue(s.Key, row)));


112  }


113  private double GetEstimatedVariance(IDataset dataset, int row) {


114  if (SampleSize == 0) return 0.0;


115  var sum = (from var in Variances let d = dataset.GetDoubleValue(var.Key, row)  Center[var.Key] select d * d / var.Value).Sum();


116  return ResidualVariance * (1.0 / SampleSize + sum / (SampleSize  1));


117  }


118  #endregion


119  }


120  } 
