Changeset 17074 for stable/HeuristicLab.Algorithms.DataAnalysis
- Timestamp:
- 07/04/19 16:11:29 (5 years ago)
- Location:
- stable
- Files:
-
- 5 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
stable
- Property svn:mergeinfo changed
/branches/2892_LR-prediction-intervals (added) merged: 15743-15744,16388 /trunk merged: 16389,16415,16448-16449
- Property svn:mergeinfo changed
-
stable/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
/branches/2892_LR-prediction-intervals/HeuristicLab.Algorithms.DataAnalysis (added) merged: 15744,16388 /trunk/HeuristicLab.Algorithms.DataAnalysis merged: 16389,16415,16448-16449
- Property svn:mergeinfo changed
-
stable/HeuristicLab.Algorithms.DataAnalysis/3.4
- Property svn:mergeinfo changed
/branches/2892_LR-prediction-intervals/HeuristicLab.Algorithms.DataAnalysis/3.4 (added) merged: 15744,16388 /trunk/HeuristicLab.Algorithms.DataAnalysis/3.4 merged: 16389,16415,16448-16449
- Property svn:mergeinfo changed
-
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r15788 r17074 263 263 <Compile Include="Linear\MultinomialLogitClassification.cs" /> 264 264 <Compile Include="Linear\MultinomialLogitClassificationSolution.cs" /> 265 <Compile Include="Linear\LinearRegressionModel.cs" /> 265 266 <Compile Include="Linear\MultinomialLogitModel.cs" /> 266 267 <Compile Include="Linear\Scaling.cs" /> -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs
r15788 r17074 41 41 [StorableClass] 42 42 public sealed class LinearRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> { 43 private const string LinearRegressionModelResultName = "Linear regression solution"; 43 private const string SolutionResultName = "Linear regression solution"; 44 private const string ConfidenceSolutionResultName = "Solution with prediction intervals"; 44 45 45 46 [StorableConstructor] … … 62 63 protected override void Run(CancellationToken cancellationToken) { 63 64 double rmsError, cvRmsError; 64 var solution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError); 65 Results.Add(new Result(LinearRegressionModelResultName, "The linear regression solution.", solution)); 65 // produce both solutions, to allow symbolic manipulation of LR solutions as well 66 // as the calculation of prediction intervals. 67 // There is no clean way to implement the new model class for LR as a symbolic model. 68 var solution = CreateSolution(Problem.ProblemData, out rmsError, out cvRmsError); 69 #pragma warning disable 168, 3021 70 var symbolicSolution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError); 71 #pragma warning restore 168, 3021 72 Results.Add(new Result(SolutionResultName, "The linear regression solution.", symbolicSolution)); 73 Results.Add(new Result(ConfidenceSolutionResultName, "Linear regression solution with parameter covariance matrix " + 74 "and calculation of prediction intervals", solution)); 66 75 Results.Add(new Result("Root mean square error", "The root of the mean of squared errors of the linear regression solution on the training set.", new DoubleValue(rmsError))); 67 76 Results.Add(new Result("Estimated root mean square error (cross-validation)", "The estimated root of the mean of squared errors of the linear regression solution via cross validation.", new DoubleValue(cvRmsError))); 68 77 } 69 78 79 [Obsolete("Use CreateSolution() instead")] 70 80 public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) { 71 var dataset = problemData.Dataset; 72 string targetVariable = problemData.TargetVariable; 73 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 74 IEnumerable<int> rows = problemData.TrainingIndices; 75 var doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>); 76 var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>); 77 var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows); 78 double[,] binaryMatrix = dataset.ToArray(factorVariables, rows); 79 double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows); 80 var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix); 81 82 if (inputMatrix.ContainsNanOrInfinity()) 83 throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); 81 IEnumerable<string> doubleVariables; 82 IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables; 83 double[,] inputMatrix; 84 PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables); 84 85 85 86 alglib.linearmodel lm = new alglib.linearmodel(); … … 87 88 int nRows = inputMatrix.GetLength(0); 88 89 int nFeatures = inputMatrix.GetLength(1) - 1; 89 double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant90 90 91 91 int retVal = 1; … … 95 95 cvRmsError = ar.cvrmserror; 96 96 97 double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant 97 98 alglib.lrunpack(lm, out coefficients, out nFeatures); 98 99 99 int nFactorCoeff = binaryMatrix.GetLength(1);100 int nFactorCoeff = factorVariables.Sum(kvp => kvp.Value.Count()); 100 101 int nVarCoeff = doubleVariables.Count(); 101 102 var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(), … … 108 109 return solution; 109 110 } 111 112 public static IRegressionSolution CreateSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) { 113 IEnumerable<string> doubleVariables; 114 IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables; 115 double[,] inputMatrix; 116 PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables); 117 118 alglib.linearmodel lm = new alglib.linearmodel(); 119 alglib.lrreport ar = new alglib.lrreport(); 120 int nRows = inputMatrix.GetLength(0); 121 int nFeatures = inputMatrix.GetLength(1) - 1; 122 123 int retVal = 1; 124 alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar); 125 if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution"); 126 rmsError = ar.rmserror; 127 cvRmsError = ar.cvrmserror; 128 129 // get parameters of the model 130 double[] w; 131 int nVars; 132 alglib.lrunpack(lm, out w, out nVars); 133 134 // ar.c is the covariation matrix, array[0..NVars,0..NVars]. 135 // C[i, j] = Cov(A[i], A[j]) 136 137 var solution = new LinearRegressionModel(w, ar.c, cvRmsError, problemData.TargetVariable, doubleVariables, factorVariables) 138 .CreateRegressionSolution((IRegressionProblemData)problemData.Clone()); 139 solution.Name = "Linear Regression Solution"; 140 return solution; 141 } 142 143 private static void PrepareData(IRegressionProblemData problemData, 144 out double[,] inputMatrix, 145 out IEnumerable<string> doubleVariables, 146 out IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables) { 147 var dataset = problemData.Dataset; 148 string targetVariable = problemData.TargetVariable; 149 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 150 IEnumerable<int> rows = problemData.TrainingIndices; 151 doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>); 152 var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>); 153 factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows); 154 double[,] binaryMatrix = dataset.ToArray(factorVariables, rows); 155 double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows); 156 inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix); 157 158 if (inputMatrix.ContainsNanOrInfinity()) 159 throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); 160 } 110 161 #endregion 111 162 } -
stable/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegressionModel.cs
r16389 r17074 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Drawing; 24 25 using System.Linq; 25 26 using HeuristicLab.Common; … … 35 36 [Item("Linear Regression Model", "Represents a linear regression model.")] 36 37 public sealed class LinearRegressionModel : RegressionModel, IConfidenceRegressionModel { 38 public static new Image StaticItemImage { 39 get { return HeuristicLab.Common.Resources.VSImageLibrary.Function; } 40 } 37 41 38 42 [Storable] … … 49 53 get; private set; 50 54 } 51 55 52 56 public override IEnumerable<string> VariablesUsedForPrediction { 53 get { return allowedInputVariables; }57 get { return doubleVariables.Union(factorVariables.Select(f => f.Key)); } 54 58 } 55 59 56 60 [Storable] 57 private string[] allowedInputVariables;61 private string[] doubleVariables; 58 62 [Storable] 59 63 private List<KeyValuePair<string, IEnumerable<string>>> factorVariables; 64 65 /// <summary> 66 /// Enumerable of variable names used by the model including one-hot-encoded of factor variables. 67 /// </summary> 68 public IEnumerable<string> ParameterNames { 69 get { 70 return factorVariables.SelectMany(kvp => kvp.Value.Select(factorVal => $"{kvp.Key}={factorVal}")) 71 .Concat(doubleVariables) 72 .Concat(new[] { "<const>" }); 73 } 74 } 60 75 61 76 [StorableConstructor] … … 69 84 this.NoiseSigma = original.NoiseSigma; 70 85 71 allowedInputVariables = (string[])original.allowedInputVariables.Clone();86 doubleVariables = (string[])original.doubleVariables.Clone(); 72 87 this.factorVariables = original.factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList(); 73 88 } … … 78 93 this.W = new double[w.Length]; 79 94 Array.Copy(w, W, w.Length); 80 this.C = new double[covariance.GetLength(0), covariance.GetLength(1)];95 this.C = new double[covariance.GetLength(0), covariance.GetLength(1)]; 81 96 Array.Copy(covariance, C, covariance.Length); 82 97 this.NoiseSigma = noiseSigma; 83 this.allowedInputVariables = doubleInputVariables.ToArray(); 98 this.doubleVariables = doubleInputVariables.ToArray(); 99 // clone 84 100 this.factorVariables = factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList(); 85 101 } … … 94 110 95 111 public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 96 double[,] inputData = dataset.ToArray( allowedInputVariables, rows);112 double[,] inputData = dataset.ToArray(doubleVariables, rows); 97 113 double[,] factorData = dataset.ToArray(factorVariables, rows); 98 114 … … 113 129 114 130 public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) { 115 double[,] inputData = dataset.ToArray( allowedInputVariables, rows);131 double[,] inputData = dataset.ToArray(doubleVariables, rows); 116 132 double[,] factorData = dataset.ToArray(factorVariables, rows); 117 133 … … 122 138 123 139 double[] d = new double[C.GetLength(0)]; 124 140 125 141 for (int row = 0; row < n; row++) { 126 142 for (int column = 0; column < columns; column++) { 127 d[column] = inputData[row, column];143 d[column] = inputData[row, column]; 128 144 } 129 145 d[columns] = 1; 130 146 131 147 double var = 0.0; 132 for (int i=0;i<d.Length;i++) {133 for (int j = 0;j<d.Length;j++) {148 for (int i = 0; i < d.Length; i++) { 149 for (int j = 0; j < d.Length; j++) { 134 150 var += d[i] * C[i, j] * d[j]; 135 151 } 136 152 } 137 yield return var + NoiseSigma *NoiseSigma;153 yield return var + NoiseSigma * NoiseSigma; 138 154 } 139 155 } 140 141 156 142 157 public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
Note: See TracChangeset
for help on using the changeset viewer.