Changeset 13940
- Timestamp:
- 06/28/16 12:53:54 (8 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4/ElasticNetLinearRegression.cs
r13930 r13940 21 21 public sealed class ElasticNetLinearRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> { 22 22 private const string PenalityParameterName = "Penality"; 23 private const string L ambdaParameterName = "Lambda";23 private const string LogLambdaParameterName = "Log10(Lambda)"; 24 24 #region parameters 25 25 public IFixedValueParameter<DoubleValue> PenalityParameter { 26 26 get { return (IFixedValueParameter<DoubleValue>)Parameters[PenalityParameterName]; } 27 27 } 28 public IValueParameter<DoubleValue> L ambdaParameter {29 get { return (IValueParameter<DoubleValue>)Parameters[L ambdaParameterName]; }28 public IValueParameter<DoubleValue> LogLambdaParameter { 29 get { return (IValueParameter<DoubleValue>)Parameters[LogLambdaParameterName]; } 30 30 } 31 31 #endregion … … 35 35 set { PenalityParameter.Value.Value = value; } 36 36 } 37 public DoubleValue L ambda {38 get { return L ambdaParameter.Value; }39 set { L ambdaParameter.Value = value; }37 public DoubleValue LogLambda { 38 get { return LogLambdaParameter.Value; } 39 set { LogLambdaParameter.Value = value; } 40 40 } 41 41 #endregion … … 46 46 : base(original, cloner) { 47 47 } 48 public ElasticNetLinearRegression() : base() { 48 public ElasticNetLinearRegression() 49 : base() { 49 50 Problem = new RegressionProblem(); 50 51 Parameters.Add(new FixedValueParameter<DoubleValue>(PenalityParameterName, "Penalty factor for balancing between ridge (0.0) and lasso (1.0) regression", new DoubleValue(0.5))); 51 Parameters.Add(new OptionalValueParameter<DoubleValue>(L ambdaParameterName, "Optional: the value of lambda for which to calculate an elastic-net solution. lambda == null => calculate the whole path of all lambdas"));52 Parameters.Add(new OptionalValueParameter<DoubleValue>(LogLambdaParameterName, "Optional: the value of lambda for which to calculate an elastic-net solution. lambda == null => calculate the whole path of all lambdas")); 52 53 } 53 54 … … 60 61 61 62 protected override void Run() { 62 if (L ambda == null) {63 if (LogLambda == null) { 63 64 CreateSolutionPath(); 64 65 } else { 65 CreateSolution(L ambda.Value);66 } 67 } 68 69 private void CreateSolution(double l ambda) {66 CreateSolution(LogLambda.Value); 67 } 68 } 69 70 private void CreateSolution(double logLambda) { 70 71 double trainRsq; 71 72 double testRsq; 72 var coeff = CreateElasticNetLinearRegressionSolution(Problem.ProblemData, Penality, lambda, out trainRsq, out testRsq);73 var coeff = CreateElasticNetLinearRegressionSolution(Problem.ProblemData, Penality, Math.Pow(10, logLambda), out trainRsq, out testRsq); 73 74 Results.Add(new Result("R² (train)", new DoubleValue(trainRsq))); 74 75 Results.Add(new Result("R² (test)", new DoubleValue(testRsq))); … … 113 114 RunElasticNetLinearRegression(Problem.ProblemData, Penality, out lambda, out trainRsq, out testRsq, out coeff, out intercept); 114 115 115 var coeffTable = new DataTable("Coefficient Paths", "The paths of coefficient values over different lambda values");116 var coeffTable = new DataTable("Coefficient Paths", "The paths of standarized coefficient values over different lambda values"); 116 117 var nLambdas = lambda.Length; 117 118 var nCoeff = coeff.GetLength(1); … … 120 121 for (int i = 0; i < nCoeff; i++) { 121 122 var coeffId = allowedVars[i]; 122 var path = Enumerable.Range(0, nLambdas).Select(r => coeff[r, i]).ToArray(); 123 double sigma = Problem.ProblemData.Dataset.GetDoubleValues(coeffId).StandardDeviation(); 124 var path = Enumerable.Range(0, nLambdas).Select(r => coeff[r, i] * sigma).ToArray(); 123 125 dataRows[i] = new DataRow(coeffId, coeffId, path); 124 126 coeffTable.Rows.Add(dataRows[i]); … … 127 129 Results.Add(new Result(coeffTable.Name, coeffTable.Description, coeffTable)); 128 130 129 var rsqTable = new DataTable("R-Squared", "Path of R² values over different lambda values"); 130 rsqTable.Rows.Add(new DataRow("R² (train)", "Path of R² values over different lambda values", trainRsq)); 131 rsqTable.Rows.Add(new DataRow("R² (test)", "Path of R² values over different lambda values", testRsq)); 132 rsqTable.Rows.Add(new DataRow("Lambda", "The lambda values along the path", lambda)); 133 rsqTable.Rows["Lambda"].VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points; 134 rsqTable.Rows["Lambda"].VisualProperties.SecondYAxis = true; 135 rsqTable.VisualProperties.SecondYAxisMinimumFixedValue = 1E-7; 136 rsqTable.VisualProperties.SecondYAxisLogScale = true; 137 Results.Add(new Result(rsqTable.Name, rsqTable.Description, rsqTable)); 131 var rsqPlot = new ScatterPlot("R-Squared", "Path of R² values over different lambda values"); 132 rsqPlot.VisualProperties.YAxisMaximumAuto = false; 133 rsqPlot.VisualProperties.YAxisMinimumAuto = false; 134 rsqPlot.VisualProperties.XAxisMaximumAuto = false; 135 rsqPlot.VisualProperties.XAxisMinimumAuto = false; 136 137 rsqPlot.VisualProperties.YAxisMinimumFixedValue = 0; 138 rsqPlot.VisualProperties.YAxisMaximumFixedValue = 1.0; 139 rsqPlot.VisualProperties.XAxisTitle = "Log10(Lambda)"; 140 rsqPlot.VisualProperties.YAxisTitle = "R²"; 141 rsqPlot.Rows.Add(new ScatterPlotDataRow("R² (train)", "Path of R² values over different lambda values", lambda.Zip(trainRsq, (l, r) => new Point2D<double>(Math.Log10(l), r)))); 142 rsqPlot.Rows.Add(new ScatterPlotDataRow("R² (test)", "Path of R² values over different lambda values", lambda.Zip(testRsq, (l, r) => new Point2D<double>(Math.Log10(l), r)))); 143 if (lambda.Length > 2) { 144 rsqPlot.VisualProperties.XAxisMinimumFixedValue = Math.Floor(Math.Log10(lambda.Last())); 145 rsqPlot.VisualProperties.XAxisMaximumFixedValue = Math.Ceiling(Math.Log10(lambda.Skip(1).First())); 146 } 147 rsqPlot.Rows["R² (train)"].VisualProperties.PointSize = 5; 148 rsqPlot.Rows["R² (test)"].VisualProperties.PointSize = 5; 149 150 Results.Add(new Result(rsqPlot.Name, rsqPlot.Description, rsqPlot)); 138 151 } 139 152 … … 227 240 int nx = numVars; 228 241 double thr = 1.0e-5; // default value as recommended in glmnet 229 int isd = 0; // => regression on originalpredictor variables242 int isd = 1; // => regression on standardized predictor variables 230 243 int intr = 1; // => do include intercept in model 231 244 int maxit = 100000; // default value as recommended in glmnet
Note: See TracChangeset
for help on using the changeset viewer.