Changeset 15744 for branches/2892_LR-prediction-intervals
- Timestamp:
- 02/09/18 10:51:27 (7 years ago)
- Location:
- branches/2892_LR-prediction-intervals/HeuristicLab.Algorithms.DataAnalysis/3.4
- Files:
-
- 1 added
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/2892_LR-prediction-intervals/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r15532 r15744 262 262 <Compile Include="Linear\MultinomialLogitClassification.cs" /> 263 263 <Compile Include="Linear\MultinomialLogitClassificationSolution.cs" /> 264 <Compile Include="Linear\LinearRegressionModel.cs" /> 264 265 <Compile Include="Linear\MultinomialLogitModel.cs" /> 265 266 <Compile Include="Linear\Scaling.cs" /> … … 320 321 <Compile Include="TSNE\Distances\IndexedItemDistance.cs" /> 321 322 <Compile Include="TSNE\Distances\ManhattanDistance.cs" /> 322 323 <Compile Include="TSNE\Distances\WeightedEuclideanDistance.cs" /> 323 324 <Compile Include="TSNE\Distances\IDistance.cs" /> 324 325 <Compile Include="TSNE\PriorityQueue.cs" /> -
branches/2892_LR-prediction-intervals/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs
r15583 r15744 62 62 protected override void Run(CancellationToken cancellationToken) { 63 63 double rmsError, cvRmsError; 64 var solution = Create LinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError);64 var solution = CreateSolution(Problem.ProblemData, out rmsError, out cvRmsError); 65 65 Results.Add(new Result(LinearRegressionModelResultName, "The linear regression solution.", solution)); 66 66 Results.Add(new Result("Root mean square error", "The root of the mean of squared errors of the linear regression solution on the training set.", new DoubleValue(rmsError))); … … 68 68 } 69 69 70 [Obsolete("Use CreateSolution() instead")] 70 71 public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) { 71 var dataset = problemData.Dataset; 72 string targetVariable = problemData.TargetVariable; 73 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 74 IEnumerable<int> rows = problemData.TrainingIndices; 75 var doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>); 76 var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>); 77 var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows); 78 double[,] binaryMatrix = dataset.ToArray(factorVariables, rows); 79 double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows); 80 var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix); 81 82 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 83 throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); 72 IEnumerable<string> doubleVariables; 73 IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables; 74 double[,] inputMatrix; 75 PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables); 84 76 85 77 alglib.linearmodel lm = new alglib.linearmodel(); … … 87 79 int nRows = inputMatrix.GetLength(0); 88 80 int nFeatures = inputMatrix.GetLength(1) - 1; 89 double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant90 81 91 82 int retVal = 1; … … 95 86 cvRmsError = ar.cvrmserror; 96 87 88 double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant 97 89 alglib.lrunpack(lm, out coefficients, out nFeatures); 98 99 int nFactorCoeff = binaryMatrix.GetLength(1);90 91 int nFactorCoeff = factorVariables.Sum(kvp=>kvp.Value.Count()); 100 92 int nVarCoeff = doubleVariables.Count(); 101 93 var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(), … … 108 100 return solution; 109 101 } 102 103 public static IRegressionSolution CreateSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) { 104 IEnumerable<string> doubleVariables; 105 IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables; 106 double[,] inputMatrix; 107 PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables); 108 109 alglib.linearmodel lm = new alglib.linearmodel(); 110 alglib.lrreport ar = new alglib.lrreport(); 111 int nRows = inputMatrix.GetLength(0); 112 int nFeatures = inputMatrix.GetLength(1) - 1; 113 114 int retVal = 1; 115 alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar); 116 if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution"); 117 rmsError = ar.rmserror; 118 cvRmsError = ar.cvrmserror; 119 120 // get parameters of the model 121 double[] w; 122 int nVars; 123 alglib.lrunpack(lm, out w, out nVars); 124 125 // ar.c is the covariation matrix, array[0..NVars,0..NVars]. 126 // C[i, j] = Cov(A[i], A[j]) 127 128 var solution = new LinearRegressionModel(w, ar.c, cvRmsError, problemData.TargetVariable, doubleVariables, factorVariables) 129 .CreateRegressionSolution((IRegressionProblemData)problemData.Clone()); 130 solution.Name = "Linear Regression Solution"; 131 return solution; 132 } 133 134 private static void PrepareData(IRegressionProblemData problemData, 135 out double[,] inputMatrix, 136 out IEnumerable<string> doubleVariables, 137 out IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables) { 138 var dataset = problemData.Dataset; 139 string targetVariable = problemData.TargetVariable; 140 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 141 IEnumerable<int> rows = problemData.TrainingIndices; 142 doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>); 143 var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>); 144 factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows); 145 double[,] binaryMatrix = dataset.ToArray(factorVariables, rows); 146 double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows); 147 inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix); 148 149 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 150 throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); 151 } 110 152 #endregion 111 153 }
Note: See TracChangeset
for help on using the changeset viewer.