Changeset 17991
- Timestamp:
- 06/16/21 21:35:37 (4 years ago)
- Location:
- branches
- Files:
-
- 1 added
- 16 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/3087_Ceres_Integration/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views/3.4/SymbolicRegressionSolutionErrorCharacteristicsCurveView.cs
r17180 r17991 26 26 using HeuristicLab.Algorithms.DataAnalysis; 27 27 using HeuristicLab.MainForm; 28 using HeuristicLab.PluginInfrastructure; 28 29 using HeuristicLab.Problems.DataAnalysis.Views; 29 30 … … 43 44 private IRegressionSolution CreateLinearRegressionSolution() { 44 45 if (Content == null) throw new InvalidOperationException(); 45 double rmse, cvRmsError;46 46 var problemData = (IRegressionProblemData)ProblemData.Clone(); 47 47 if (!problemData.TrainingIndices.Any()) return null; // don't create an LR model if the problem does not have a training set (e.g. loaded into an existing model) … … 87 87 newProblemData.TestPartition.End = problemData.TestPartition.End; 88 88 89 var solution = LinearRegression.CreateLinearRegressionSolution(newProblemData, out rmse, out cvRmsError); 90 solution.Name = "Baseline (linear subset)"; 91 return solution; 89 try { 90 var solution = LinearRegression.CreateSolution(newProblemData, out _, out _, out _); 91 solution.Name = "Baseline (linear subset)"; 92 return solution; 93 } catch (NotSupportedException e) { 94 ErrorHandling.ShowErrorDialog("Could not create a linear regression solution.", e); 95 } catch (ArgumentException e) { 96 ErrorHandling.ShowErrorDialog("Could not create a linear regression solution.", e); 97 } 98 return null; 92 99 } 93 100 … … 99 106 if (Content.Model.SymbolicExpressionTree.IterateNodesPrefix().OfType<LaggedVariableTreeNode>().Any()) yield break; 100 107 101 yield return CreateLinearRegressionSolution(); 108 var linearRegressionSolution = CreateLinearRegressionSolution(); 109 if (linearRegressionSolution != null) yield return linearRegressionSolution; 102 110 } 103 111 } -
branches/3087_Ceres_Integration/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionErrorCharacteristicsCurveView.cs
r17180 r17991 29 29 using HeuristicLab.MainForm; 30 30 using HeuristicLab.Optimization; 31 using HeuristicLab.PluginInfrastructure; 31 32 32 33 namespace HeuristicLab.Problems.DataAnalysis.Views { … … 74 75 } 75 76 76 private readonly IList<IRegressionSolution> solutions = new List<IRegressionSolution>();77 private readonly List<IRegressionSolution> solutions = new List<IRegressionSolution>(); 77 78 public IEnumerable<IRegressionSolution> Solutions { 78 79 get { return solutions.AsEnumerable(); } … … 103 104 solutions.Clear(); // remove all 104 105 solutions.Add(Content); // re-add the first solution 105 // and recalculate all other solutions 106 foreach (var sol in CreateBaselineSolutions()) { 107 solutions.Add(sol); 108 } 106 solutions.AddRange(CreateBaselineSolutions()); 109 107 UpdateChart(); 110 108 } … … 116 114 solutions.Clear(); // remove all 117 115 solutions.Add(Content); // re-add the first solution 118 // and recalculate all other solutions 119 foreach (var sol in CreateBaselineSolutions()) { 120 solutions.Add(sol); 121 } 116 solutions.AddRange(CreateBaselineSolutions()); 122 117 UpdateChart(); 123 118 } … … 249 244 protected virtual List<double> GetResiduals(IEnumerable<double> originalValues, IEnumerable<double> estimatedValues) { 250 245 switch (residualComboBox.SelectedItem.ToString()) { 251 case "Absolute error": return originalValues.Zip(estimatedValues, (x, y) => Math.Abs(x - y)) 246 case "Absolute error": 247 return originalValues.Zip(estimatedValues, (x, y) => Math.Abs(x - y)) 252 248 .Where(r => !double.IsNaN(r) && !double.IsInfinity(r)).ToList(); 253 case "Squared error": return originalValues.Zip(estimatedValues, (x, y) => (x - y) * (x - y)) 249 case "Squared error": 250 return originalValues.Zip(estimatedValues, (x, y) => (x - y) * (x - y)) 254 251 .Where(r => !double.IsNaN(r) && !double.IsInfinity(r)).ToList(); 255 252 case "Relative error": … … 289 286 290 287 protected virtual IEnumerable<IRegressionSolution> CreateBaselineSolutions() { 291 yield return CreateConstantSolution(); 292 yield return CreateLinearSolution(); 288 var constantSolution = CreateConstantSolution(); 289 if (constantSolution != null) yield return CreateConstantSolution(); 290 291 var linearRegressionSolution = CreateLinearSolution(); 292 if (linearRegressionSolution != null) yield return CreateLinearSolution(); 293 293 } 294 294 295 295 private IRegressionSolution CreateConstantSolution() { 296 if (!ProblemData.TrainingIndices.Any()) return null; 297 296 298 double averageTrainingTarget = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices).Average(); 297 299 var model = new ConstantModel(averageTrainingTarget, ProblemData.TargetVariable); … … 301 303 } 302 304 private IRegressionSolution CreateLinearSolution() { 303 double rmsError, cvRmsError; 304 var solution = LinearRegression.CreateLinearRegressionSolution((IRegressionProblemData)ProblemData.Clone(), out rmsError, out cvRmsError); 305 solution.Name = "Baseline (linear)"; 306 return solution; 305 try { 306 var solution = LinearRegression.CreateSolution((IRegressionProblemData)ProblemData.Clone(), out _, out _, out _); 307 solution.Name = "Baseline (linear)"; 308 return solution; 309 } catch (NotSupportedException e) { 310 ErrorHandling.ShowErrorDialog("Could not create a linear regression solution.", e); 311 } catch (ArgumentException e) { 312 ErrorHandling.ShowErrorDialog("Could not create a linear regression solution.", e); 313 } 314 return null; 307 315 } 308 316 -
branches/3128_Prediction_Intervals/HeuristicLab.Algorithms.DataAnalysis.DecisionTrees/3.4/LeafTypes/LinearLeaf.cs
r17180 r17991 50 50 double rmse, cvRmse; 51 51 numberOfParameters = pd.AllowedInputVariables.Count() + 1; 52 var res = LinearRegression.CreateSolution(pd, out rmse, out cvRmse );52 var res = LinearRegression.CreateSolution(pd, out rmse, out cvRmse, out _); 53 53 return res.Model; 54 54 } -
branches/3128_Prediction_Intervals/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4/ElasticNetLinearRegression.cs
r17180 r17991 34 34 using HeuristicLab.Problems.DataAnalysis.Symbolic; 35 35 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression; 36 using HeuristicLab.Analysis.Statistics; 36 37 37 38 namespace HeuristicLab.Algorithms.DataAnalysis.Glmnet { … … 88 89 } 89 90 90 private void CreateSolution(double lambda) {91 private void CreateSolution(double lambda) { 91 92 double trainNMSE; 92 93 double testNMSE; … … 97 98 var solution = CreateSymbolicSolution(coeff, Problem.ProblemData); 98 99 Results.Add(new Result(solution.Name, solution.Description, solution)); 100 101 var xy = Problem.ProblemData.Dataset.ToArray(Problem.ProblemData.AllowedInputVariables.Concat(new[] { Problem.ProblemData.TargetVariable }), Problem.ProblemData.TrainingIndices); 102 // prepare xy for calculation of parameter statistics 103 // the last coefficient is the offset 104 var resid = new double[xy.GetLength(0)]; 105 for (int r = 0; r < xy.GetLength(0); r++) { 106 resid[r] = xy[r, coeff.Length - 1] - coeff[coeff.Length - 1]; 107 xy[r, coeff.Length - 1] = 1.0; 108 } 109 var statistics = Statistics.CalculateParameterStatistics(xy, coeff, resid); 110 Results.AddOrUpdateResult("Statistics", statistics.AsResultCollection(Problem.ProblemData.AllowedInputVariables.Concat(new string[] { "<const>" }))); 99 111 } 100 112 -
branches/3128_Prediction_Intervals/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs
r17180 r17991 32 32 using HeuristicLab.Problems.DataAnalysis.Symbolic; 33 33 using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression; 34 using HeuristicLab.Analysis; 35 using HeuristicLab.Analysis.Statistics; 34 36 35 37 namespace HeuristicLab.Algorithms.DataAnalysis { … … 66 68 // as the calculation of prediction intervals. 67 69 // There is no clean way to implement the new model class for LR as a symbolic model. 68 var solution = CreateSolution(Problem.ProblemData, out rmsError, out cvRmsError );70 var solution = CreateSolution(Problem.ProblemData, out rmsError, out cvRmsError, out _); 69 71 #pragma warning disable 168, 3021 70 var symbolicSolution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError );72 var symbolicSolution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError, out var statistics); 71 73 #pragma warning restore 168, 3021 72 74 Results.Add(new Result(SolutionResultName, "The linear regression solution.", symbolicSolution)); … … 75 77 Results.Add(new Result("Root mean square error", "The root of the mean of squared errors of the linear regression solution on the training set.", new DoubleValue(rmsError))); 76 78 Results.Add(new Result("Estimated root mean square error (cross-validation)", "The estimated root of the mean of squared errors of the linear regression solution via cross validation.", new DoubleValue(cvRmsError))); 79 80 var predictorNames = Problem.ProblemData.AllowedInputVariables.Concat(new string[] { "<const>" }).ToArray(); 81 Results.AddOrUpdateResult("Statistics", statistics.AsResultCollection(predictorNames)); 82 77 83 } 78 84 79 85 [Obsolete("Use CreateSolution() instead")] 80 public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError ) {86 public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError, out Statistics statistics) { 81 87 IEnumerable<string> doubleVariables; 82 88 IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables; … … 84 90 PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables); 85 91 86 alglib.linearmodel lm = new alglib.linearmodel();87 alglib.lrreport ar = new alglib.lrreport();88 92 int nRows = inputMatrix.GetLength(0); 89 93 int nFeatures = inputMatrix.GetLength(1) - 1; 90 94 91 int retVal = 1; 92 alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar); 95 alglib.lrbuild(inputMatrix, nRows, nFeatures, out int retVal, out var lm, out var ar); 93 96 if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution"); 94 97 rmsError = ar.rmserror; … … 98 101 alglib.lrunpack(lm, out coefficients, out nFeatures); 99 102 103 // prepare inputmatrix (which has y as last column) for calculation of parameter statistics 104 // the last coefficient is the offset 105 var resid = new double[nRows]; 106 for (int r = 0; r < nRows; r++) { 107 resid[r] = inputMatrix[r, nFeatures] - coefficients[nFeatures]; 108 inputMatrix[r, nFeatures] = 1.0; 109 } 110 statistics = Statistics.CalculateParameterStatistics(inputMatrix, coefficients, resid); 111 100 112 int nFactorCoeff = factorVariables.Sum(kvp => kvp.Value.Count()); 101 113 int nVarCoeff = doubleVariables.Count(); … … 104 116 @const: coefficients[nFeatures]); 105 117 106 SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone()); 118 SymbolicRegressionSolution solution = new SymbolicRegressionSolution( 119 new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter(), parameterCovariance: statistics.CovMx), 120 (IRegressionProblemData)problemData.Clone()); 107 121 solution.Model.Name = "Linear Regression Model"; 108 122 solution.Name = "Linear Regression Solution"; … … 110 124 } 111 125 112 public static IRegressionSolution CreateSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError ) {126 public static IRegressionSolution CreateSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError, out Statistics statistics) { 113 127 IEnumerable<string> doubleVariables; 114 128 IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables; … … 116 130 PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables); 117 131 118 alglib.linearmodel lm = new alglib.linearmodel();119 alglib.lrreport ar = new alglib.lrreport();120 132 int nRows = inputMatrix.GetLength(0); 121 133 int nFeatures = inputMatrix.GetLength(1) - 1; 122 134 123 int retVal = 1; 124 alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar); 135 alglib.lrbuild(inputMatrix, nRows, nFeatures, out int retVal, out var lm, out var ar); 125 136 if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution"); 126 137 rmsError = ar.rmserror; … … 129 140 // get parameters of the model 130 141 double[] w; 131 int nVars; 132 alglib.lrunpack(lm, out w, out nVars); 142 alglib.lrunpack(lm, out w, out _); 143 144 // prepare inputmatrix (which has y as last column) for calculation of parameter statistics 145 // the last coefficient is the offset 146 var resid = new double[nRows]; 147 for (int r = 0; r < nRows; r++) { 148 resid[r] = inputMatrix[r, nFeatures] - w[nFeatures]; 149 inputMatrix[r, nFeatures] = 1.0; 150 } 151 statistics = Statistics.CalculateParameterStatistics(inputMatrix, w, resid); 133 152 134 153 // ar.c is the covariation matrix, array[0..NVars,0..NVars]. 135 // C[i, j] = Cov(A[i], A[j])136 137 154 var solution = new LinearRegressionModel(w, ar.c, cvRmsError, problemData.TargetVariable, doubleVariables, factorVariables) 138 155 .CreateRegressionSolution((IRegressionProblemData)problemData.Clone()); -
branches/3128_Prediction_Intervals/HeuristicLab.Analysis/3.3/HeuristicLab.Analysis-3.3.csproj
r17931 r17991 173 173 <Compile Include="Statistics\Fitting\ExpFitting.cs" /> 174 174 <Compile Include="Statistics\Fitting\IFitting.cs" /> 175 <Compile Include="Statistics\Fitting\Statistics.cs" /> 175 176 <Compile Include="Statistics\KruskalWallisTest.cs" /> 176 177 <Compile Include="Statistics\Fitting\LinearLeastSquaresFitting.cs" /> -
branches/3128_Prediction_Intervals/HeuristicLab.Common/3.3/MatrixExtensions.cs
r17180 r17991 96 96 return result; 97 97 } 98 99 /// <summary> 100 /// Concatenates matrices horizontally. 101 /// A B 102 /// 1 2 9 8 103 /// 3 4 7 6 104 /// 105 /// HorzCat(A, B) 106 /// 1 2 9 8 107 /// 3 4 7 6 108 /// </summary> 109 /// <typeparam name="T"></typeparam> 110 /// <param name="a"></param> 111 /// <param name="b"></param> 112 /// <returns>A new matrix with the number of columns = a.GetLength(1) + b.GetLength(1)</returns> 113 public static T[,] HorzCat<T>(this T[,] a, T[] b) { 114 Contract.Assert(a.GetLength(0) == b.Length); 115 var aLen = a.GetLength(1); 116 var bLen = 1; 117 var result = new T[a.GetLength(0), aLen + bLen]; 118 for (int i = 0; i < a.GetLength(0); i++) 119 for (int j = 0; j < aLen; j++) 120 result[i, j] = a[i, j]; 121 for (int i = 0; i < a.GetLength(0); i++) 122 result[i, aLen] = b[i]; 123 return result; 124 } 98 125 } 99 126 } -
branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views-3.4.csproj
r16658 r17991 164 164 <Private>False</Private> 165 165 </ProjectReference> 166 <ProjectReference Include="..\..\HeuristicLab.Analysis\3.3\HeuristicLab.Analysis-3.3.csproj"> 167 <Project>{887425B4-4348-49ED-A457-B7D2C26DDBF9}</Project> 168 <Name>HeuristicLab.Analysis-3.3</Name> 169 </ProjectReference> 166 170 <ProjectReference Include="..\..\HeuristicLab.Collections\3.3\HeuristicLab.Collections-3.3.csproj"> 167 171 <Project>{958B43BC-CC5C-4FA2-8628-2B3B01D890B6}</Project> -
branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views/3.4/Plugin.cs.frame
r17184 r17991 29 29 [PluginFile("HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views-3.4.dll",PluginFileType.Assembly)] 30 30 [PluginDependency("HeuristicLab.Algorithms.DataAnalysis", "3.4")] 31 [PluginDependency("HeuristicLab.Analysis", "3.3")] 31 32 [PluginDependency("HeuristicLab.Common", "3.3")] 32 33 [PluginDependency("HeuristicLab.Common.Resources", "3.3")] -
branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/Interfaces/ISymbolicRegressionModel.cs
r17579 r17991 24 24 namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression { 25 25 [StorableType("a411e2b5-f926-41a6-b55b-1aef862db2fb")] 26 public interface ISymbolicRegressionModel : IRegressionModel, ISymbolicDataAnalysisModel {26 public interface ISymbolicRegressionModel : IRegressionModel, ISymbolicDataAnalysisModel, IConfidenceRegressionModel { 27 27 void Scale(IRegressionProblemData problemData); 28 28 } -
branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/Interfaces/ISymbolicRegressionSolution.cs
r17579 r17991 25 25 namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression { 26 26 [StorableType("dff1a450-e958-454f-bf8e-6b763fdcaff3")] 27 public interface ISymbolicRegressionSolution : IRegressionSolution, ISymbolicDataAnalysisSolution {27 public interface ISymbolicRegressionSolution : IRegressionSolution, ISymbolicDataAnalysisSolution, IConfidenceRegressionSolution { 28 28 new ISymbolicRegressionModel Model { get; } 29 29 } -
branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/SymbolicRegressionSingleObjectiveTrainingBestSolutionAnalyzer.cs
r17180 r17991 25 25 using HeuristicLab.Parameters; 26 26 using HEAL.Attic; 27 using HeuristicLab.Data; 28 using System.Collections.Generic; 29 using System; 30 using System.Linq; 31 using HeuristicLab.Analysis.Statistics; 27 32 28 33 namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression { … … 63 68 64 69 protected override ISymbolicRegressionSolution CreateSolution(ISymbolicExpressionTree bestTree, double bestQuality) { 65 var model = new SymbolicRegressionModel(ProblemDataParameter.ActualValue.TargetVariable, (ISymbolicExpressionTree)bestTree.Clone(), SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper); 70 71 // HACK: create model first for scaling, then calculate statistics and create a new model with prediction intervals 72 var model = new SymbolicRegressionModel(ProblemDataParameter.ActualValue.TargetVariable, 73 (ISymbolicExpressionTree)bestTree.Clone(), 74 SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, 75 EstimationLimitsParameter.ActualValue.Lower, 76 EstimationLimitsParameter.ActualValue.Upper); 66 77 if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(ProblemDataParameter.ActualValue); 67 return new SymbolicRegressionSolution(model, (IRegressionProblemData)ProblemDataParameter.ActualValue.Clone()); 78 79 // use scaled tree 80 CalculateParameterCovariance(model.SymbolicExpressionTree, ProblemDataParameter.ActualValue, SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, out var cov, out var sigma); 81 var predIntervalModel = new SymbolicRegressionModel(ProblemDataParameter.ActualValue.TargetVariable, 82 (ISymbolicExpressionTree)model.SymbolicExpressionTree.Clone(), 83 SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, 84 EstimationLimitsParameter.ActualValue.Lower, 85 EstimationLimitsParameter.ActualValue.Upper, parameterCovariance: cov, sigma: sigma); 86 87 return new SymbolicRegressionSolution(predIntervalModel, (IRegressionProblemData)ProblemDataParameter.ActualValue.Clone()); 88 } 89 90 private void CalculateParameterCovariance(ISymbolicExpressionTree tree, IRegressionProblemData problemData, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, out double[,] cov, out double sigma) { 91 var y_pred = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndices).ToArray(); 92 var residuals = problemData.TargetVariableTrainingValues.Zip(y_pred, (yi, y_pred_i) => yi - y_pred_i).ToArray(); 93 94 var paramNodes = new List<ISymbolicExpressionTreeNode>(); 95 var coeffList = new List<double>(); 96 foreach (var node in tree.IterateNodesPostfix()) { 97 if (node is ConstantTreeNode constNode) { 98 paramNodes.Add(constNode); 99 coeffList.Add(constNode.Value); 100 } else if (node is VariableTreeNode varNode) { 101 paramNodes.Add(varNode); 102 coeffList.Add(varNode.Weight); 103 } 104 } 105 var coeff = coeffList.ToArray(); 106 var numParams = coeff.Length; 107 108 var rows = problemData.TrainingIndices.ToArray(); 109 var dcoeff = new double[rows.Length, numParams]; 110 TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, makeVariableWeightsVariable: true, addLinearScalingTerms: false, 111 out var parameters, out var initialConstants, out var func, out var func_grad); 112 if (initialConstants.Zip(coeff, (ici, coi) => ici != coi).Any(t => t)) throw new InvalidProgramException(); 113 var ds = problemData.Dataset; 114 var x_r = new double[parameters.Count]; 115 for (int r = 0; r < rows.Length; r++) { 116 // copy row 117 for (int c = 0; c < parameters.Count; c++) { 118 x_r[c] = ds.GetDoubleValue(parameters[c].variableName, rows[r]); 119 } 120 var tup = func_grad(coeff, x_r); 121 for (int c = 0; c < numParams; c++) { 122 dcoeff[r, c] = tup.Item1[c]; 123 } 124 } 125 126 var stats = Statistics.CalculateLinearModelStatistics(dcoeff, coeff, residuals); 127 cov = stats.CovMx; 128 sigma = stats.sigma; 68 129 } 69 130 } -
branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SymbolicRegressionModel.cs
r17180 r17991 26 26 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; 27 27 using HEAL.Attic; 28 using HeuristicLab.Data; 29 using System.Linq; 28 30 29 31 namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression { … … 45 47 } 46 48 49 [Storable] 50 private readonly double[,] parameterCovariance; 51 [Storable] 52 private readonly double sigma; 53 47 54 [StorableConstructor] 48 55 protected SymbolicRegressionModel(StorableConstructorFlag _) : base(_) { … … 53 60 : base(original, cloner) { 54 61 this.targetVariable = original.targetVariable; 62 this.parameterCovariance = original.parameterCovariance; // immutable 63 this.sigma = original.sigma; 55 64 } 56 65 57 66 public SymbolicRegressionModel(string targetVariable, ISymbolicExpressionTree tree, 58 67 ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, 59 double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue )68 double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue, double[,] parameterCovariance = null, double sigma = 0.0) 60 69 : base(tree, interpreter, lowerEstimationLimit, upperEstimationLimit) { 61 70 this.targetVariable = targetVariable; 71 if (parameterCovariance != null) 72 this.parameterCovariance = (double[,])parameterCovariance.Clone(); 73 this.sigma = sigma; 62 74 } 63 75 … … 69 81 return Interpreter.GetSymbolicExpressionTreeValues(SymbolicExpressionTree, dataset, rows) 70 82 .LimitToRange(LowerEstimationLimit, UpperEstimationLimit); 83 } 84 85 public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) { 86 // must work with a copy because we change tree nodes 87 var treeCopy = (ISymbolicExpressionTree)SymbolicExpressionTree.Clone(); 88 // uses sampling to produce prediction intervals 89 alglib.hqrndseed(31415, 926535, out var state); 90 var cov = parameterCovariance; 91 if (cov == null || cov.Length == 0) return rows.Select(_ => 0.0); 92 var n = 30; 93 var M = rows.Select(_ => new double[n]).ToArray(); 94 var paramNodes = new List<ISymbolicExpressionTreeNode>(); 95 var coeffList = new List<double>(); 96 // HACK: skip linear scaling parameters because the analyzer doesn't use them (and they are likely correlated with the remaining parameters) 97 // only works with linear scaling 98 if (!(treeCopy.Root.GetSubtree(0).GetSubtree(0).Symbol is Addition) || 99 !(treeCopy.Root.GetSubtree(0).GetSubtree(0).GetSubtree(0).Symbol is Multiplication)) 100 throw new NotImplementedException("prediction intervals are implemented only for linear scaling"); 101 102 foreach (var node in treeCopy.Root.GetSubtree(0).GetSubtree(0).IterateNodesPostfix()) { 103 if (node is ConstantTreeNode constNode) { 104 paramNodes.Add(constNode); 105 coeffList.Add(constNode.Value); 106 } else if (node is VariableTreeNode varNode) { 107 paramNodes.Add(varNode); 108 coeffList.Add(varNode.Weight); 109 } 110 } 111 var coeff = coeffList.ToArray(); 112 var numParams = coeff.Length; 113 if (cov.GetLength(0) != numParams) throw new InvalidProgramException(); 114 115 // TODO: probably we do not need to sample but can instead use a first-order or second-order approximation of f 116 // see http://sia.webpopix.org/nonlinearRegression.html 117 // also see https://rmazing.wordpress.com/2013/08/26/predictnls-part-2-taylor-approximation-confidence-intervals-for-nls-models/ 118 // https://www.rdocumentation.org/packages/propagate/versions/1.0-4/topics/predictNLS 119 double[] p = new double[numParams]; 120 for (int i = 0; i < 30; i++) { 121 // sample and update parameter vector delta is 122 alglib.hqrndnormalv(state, numParams, out var delta); 123 alglib.rmatrixmv(numParams, numParams, cov, 0, 0, 0, delta, 0, ref p, 0); 124 for (int j = 0; j < numParams; j++) { 125 if (paramNodes[j] is ConstantTreeNode constNode) constNode.Value = coeff[j] + p[j]; 126 else if (paramNodes[j] is VariableTreeNode varNode) varNode.Weight = coeff[j] + p[j]; 127 } 128 var r = 0; 129 var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(treeCopy, dataset, rows).LimitToRange(LowerEstimationLimit, UpperEstimationLimit); 130 131 foreach (var pred in estimatedValues) { 132 M[r++][i] = pred; 133 } 134 } 135 136 // reset parameters 137 for (int j = 0; j < numParams; j++) { 138 if (paramNodes[j] is ConstantTreeNode constNode) constNode.Value = coeff[j]; 139 else if (paramNodes[j] is VariableTreeNode varNode) varNode.Weight = coeff[j]; 140 } 141 var sigma2 = sigma * sigma; 142 return M.Select(M_i => M_i.Variance() + sigma2).ToArray(); 71 143 } 72 144 -
branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SymbolicRegressionSolution.cs
r17911 r17991 20 20 #endregion 21 21 22 using System.Collections.Generic; 22 23 using System.Linq; 23 24 using HEAL.Attic; … … 112 113 } 113 114 114 115 IConfidenceRegressionModel IConfidenceRegressionSolution.Model => Model; 115 116 116 117 [StorableConstructor] … … 215 216 return intervalEvaluation; 216 217 } 218 219 public IEnumerable<double> EstimatedVariances => GetEstimatedVariances(ProblemData.AllIndices); 220 221 public IEnumerable<double> EstimatedTrainingVariances => GetEstimatedVariances(ProblemData.TestIndices); 222 223 public IEnumerable<double> EstimatedTestVariances => GetEstimatedVariances(ProblemData.TestIndices); 224 225 226 public IEnumerable<double> GetEstimatedVariances(IEnumerable<int> rows) { 227 return Model.GetEstimatedVariances(ProblemData.Dataset, rows); 228 } 217 229 } 218 230 } -
branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Views/3.4/Solution Views/DataAnalysisSolutionView.cs
r17180 r17991 74 74 AddEvaluationViewTypes(); 75 75 76 // recover selection76 // recover selection 77 77 if (selectedName != null) { 78 78 foreach (ListViewItem item in itemsListView.Items) { -
branches/3128_Prediction_Intervals/HeuristicLab.Tests/HeuristicLab.Problems.DataAnalysis-3.4/RegressionVariableImpactCalculationTest.cs
r17948 r17991 46 46 double rmsError; 47 47 double cvRmsError; 48 var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError );48 var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError, out _); 49 49 Dictionary<string, double> expectedImpacts = GetExpectedValuesForLRTower(); 50 50 … … 59 59 double rmsError; 60 60 double cvRmsError; 61 var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError );61 var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError, out _); 62 62 Dictionary<string, double> expectedImpacts = GetExpectedValuesForLRMiba(); 63 63 … … 114 114 double rmsError; 115 115 double cvRmsError; 116 var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError );116 var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError, out _); 117 117 solution.ProblemData = LoadDefaultMibaProblem(); 118 118 RegressionSolutionVariableImpactsCalculator.CalculateImpacts(solution); … … 130 130 double rmsError; 131 131 double cvRmsError; 132 var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError );132 var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError, out _); 133 133 134 134 Stopwatch watch = new Stopwatch();
Note: See TracChangeset
for help on using the changeset viewer.