Changeset 16489
- Timestamp:
- 01/03/19 15:06:27 (6 years ago)
- Location:
- branches/2942_KNNRegressionClassification/HeuristicLab.Algorithms.DataAnalysis/3.4
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2942_KNNRegressionClassification/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r16408 r16489 375 375 <SubType>Code</SubType> 376 376 </Compile> 377 <Compile Include="Linear\LinearRegressionModel.cs" /> 377 378 <Compile Include="Linear\MultinomialLogitClassification.cs" /> 378 379 <Compile Include="Linear\MultinomialLogitClassificationSolution.cs" /> -
branches/2942_KNNRegressionClassification/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs
r15786 r16489 41 41 [StorableClass] 42 42 public sealed class LinearRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> { 43 private const string LinearRegressionModelResultName = "Linear regression solution"; 43 private const string SolutionResultName = "Linear regression solution"; 44 private const string ConfidenceSolutionResultName = "Solution with prediction intervals"; 44 45 45 46 [StorableConstructor] … … 62 63 protected override void Run(CancellationToken cancellationToken) { 63 64 double rmsError, cvRmsError; 64 var solution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError); 65 Results.Add(new Result(LinearRegressionModelResultName, "The linear regression solution.", solution)); 65 // produce both solutions, to allow symbolic manipulation of LR solutions as well 66 // as the calculation of prediction intervals. 67 // There is no clean way to implement the new model class for LR as a symbolic model. 68 var solution = CreateSolution(Problem.ProblemData, out rmsError, out cvRmsError); 69 #pragma warning disable 168, 3021 70 var symbolicSolution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError); 71 #pragma warning restore 168, 3021 72 Results.Add(new Result(SolutionResultName, "The linear regression solution.", symbolicSolution)); 73 Results.Add(new Result(ConfidenceSolutionResultName, "Linear regression solution with parameter covariance matrix " + 74 "and calculation of prediction intervals", solution)); 66 75 Results.Add(new Result("Root mean square error", "The root of the mean of squared errors of the linear regression solution on the training set.", new DoubleValue(rmsError))); 67 76 Results.Add(new Result("Estimated root mean square error (cross-validation)", "The estimated root of the mean of squared errors of the linear regression solution via cross validation.", new DoubleValue(cvRmsError))); 68 77 } 69 78 79 [Obsolete("Use CreateSolution() instead")] 70 80 public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) { 71 var dataset = problemData.Dataset; 72 string targetVariable = problemData.TargetVariable; 73 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 74 IEnumerable<int> rows = problemData.TrainingIndices; 75 var doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>); 76 var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>); 77 var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows); 78 double[,] binaryMatrix = dataset.ToArray(factorVariables, rows); 79 double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows); 80 var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix); 81 82 if (inputMatrix.ContainsNanOrInfinity()) 83 throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); 81 IEnumerable<string> doubleVariables; 82 IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables; 83 double[,] inputMatrix; 84 PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables); 84 85 85 86 alglib.linearmodel lm = new alglib.linearmodel(); … … 87 88 int nRows = inputMatrix.GetLength(0); 88 89 int nFeatures = inputMatrix.GetLength(1) - 1; 89 double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant90 90 91 91 int retVal = 1; … … 95 95 cvRmsError = ar.cvrmserror; 96 96 97 double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant 97 98 alglib.lrunpack(lm, out coefficients, out nFeatures); 98 99 99 int nFactorCoeff = binaryMatrix.GetLength(1);100 int nFactorCoeff = factorVariables.Sum(kvp => kvp.Value.Count()); 100 101 int nVarCoeff = doubleVariables.Count(); 101 102 var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(), … … 108 109 return solution; 109 110 } 111 112 public static IRegressionSolution CreateSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) { 113 IEnumerable<string> doubleVariables; 114 IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables; 115 double[,] inputMatrix; 116 PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables); 117 118 alglib.linearmodel lm = new alglib.linearmodel(); 119 alglib.lrreport ar = new alglib.lrreport(); 120 int nRows = inputMatrix.GetLength(0); 121 int nFeatures = inputMatrix.GetLength(1) - 1; 122 123 int retVal = 1; 124 alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar); 125 if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution"); 126 rmsError = ar.rmserror; 127 cvRmsError = ar.cvrmserror; 128 129 // get parameters of the model 130 double[] w; 131 int nVars; 132 alglib.lrunpack(lm, out w, out nVars); 133 134 // ar.c is the covariation matrix, array[0..NVars,0..NVars]. 135 // C[i, j] = Cov(A[i], A[j]) 136 137 var solution = new LinearRegressionModel(w, ar.c, cvRmsError, problemData.TargetVariable, doubleVariables, factorVariables) 138 .CreateRegressionSolution((IRegressionProblemData)problemData.Clone()); 139 solution.Name = "Linear Regression Solution"; 140 return solution; 141 } 142 143 private static void PrepareData(IRegressionProblemData problemData, 144 out double[,] inputMatrix, 145 out IEnumerable<string> doubleVariables, 146 out IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables) { 147 var dataset = problemData.Dataset; 148 string targetVariable = problemData.TargetVariable; 149 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 150 IEnumerable<int> rows = problemData.TrainingIndices; 151 doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>); 152 var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>); 153 factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows); 154 double[,] binaryMatrix = dataset.ToArray(factorVariables, rows); 155 double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows); 156 inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix); 157 158 if (inputMatrix.ContainsNanOrInfinity()) 159 throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); 160 } 110 161 #endregion 111 162 }
Note: See TracChangeset
for help on using the changeset viewer.