Changeset 17991 for branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression
- Timestamp:
- 06/16/21 21:35:37 (3 years ago)
- Location:
- branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/Interfaces/ISymbolicRegressionModel.cs
r17579 r17991 24 24 namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression { 25 25 [StorableType("a411e2b5-f926-41a6-b55b-1aef862db2fb")] 26 public interface ISymbolicRegressionModel : IRegressionModel, ISymbolicDataAnalysisModel {26 public interface ISymbolicRegressionModel : IRegressionModel, ISymbolicDataAnalysisModel, IConfidenceRegressionModel { 27 27 void Scale(IRegressionProblemData problemData); 28 28 } -
branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/Interfaces/ISymbolicRegressionSolution.cs
r17579 r17991 25 25 namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression { 26 26 [StorableType("dff1a450-e958-454f-bf8e-6b763fdcaff3")] 27 public interface ISymbolicRegressionSolution : IRegressionSolution, ISymbolicDataAnalysisSolution {27 public interface ISymbolicRegressionSolution : IRegressionSolution, ISymbolicDataAnalysisSolution, IConfidenceRegressionSolution { 28 28 new ISymbolicRegressionModel Model { get; } 29 29 } -
branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/SymbolicRegressionSingleObjectiveTrainingBestSolutionAnalyzer.cs
r17180 r17991 25 25 using HeuristicLab.Parameters; 26 26 using HEAL.Attic; 27 using HeuristicLab.Data; 28 using System.Collections.Generic; 29 using System; 30 using System.Linq; 31 using HeuristicLab.Analysis.Statistics; 27 32 28 33 namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression { … … 63 68 64 69 protected override ISymbolicRegressionSolution CreateSolution(ISymbolicExpressionTree bestTree, double bestQuality) { 65 var model = new SymbolicRegressionModel(ProblemDataParameter.ActualValue.TargetVariable, (ISymbolicExpressionTree)bestTree.Clone(), SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper); 70 71 // HACK: create model first for scaling, then calculate statistics and create a new model with prediction intervals 72 var model = new SymbolicRegressionModel(ProblemDataParameter.ActualValue.TargetVariable, 73 (ISymbolicExpressionTree)bestTree.Clone(), 74 SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, 75 EstimationLimitsParameter.ActualValue.Lower, 76 EstimationLimitsParameter.ActualValue.Upper); 66 77 if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(ProblemDataParameter.ActualValue); 67 return new SymbolicRegressionSolution(model, (IRegressionProblemData)ProblemDataParameter.ActualValue.Clone()); 78 79 // use scaled tree 80 CalculateParameterCovariance(model.SymbolicExpressionTree, ProblemDataParameter.ActualValue, SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, out var cov, out var sigma); 81 var predIntervalModel = new SymbolicRegressionModel(ProblemDataParameter.ActualValue.TargetVariable, 82 (ISymbolicExpressionTree)model.SymbolicExpressionTree.Clone(), 83 SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, 84 EstimationLimitsParameter.ActualValue.Lower, 85 EstimationLimitsParameter.ActualValue.Upper, parameterCovariance: cov, sigma: sigma); 86 87 return new SymbolicRegressionSolution(predIntervalModel, (IRegressionProblemData)ProblemDataParameter.ActualValue.Clone()); 88 } 89 90 private void CalculateParameterCovariance(ISymbolicExpressionTree tree, IRegressionProblemData problemData, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, out double[,] cov, out double sigma) { 91 var y_pred = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndices).ToArray(); 92 var residuals = problemData.TargetVariableTrainingValues.Zip(y_pred, (yi, y_pred_i) => yi - y_pred_i).ToArray(); 93 94 var paramNodes = new List<ISymbolicExpressionTreeNode>(); 95 var coeffList = new List<double>(); 96 foreach (var node in tree.IterateNodesPostfix()) { 97 if (node is ConstantTreeNode constNode) { 98 paramNodes.Add(constNode); 99 coeffList.Add(constNode.Value); 100 } else if (node is VariableTreeNode varNode) { 101 paramNodes.Add(varNode); 102 coeffList.Add(varNode.Weight); 103 } 104 } 105 var coeff = coeffList.ToArray(); 106 var numParams = coeff.Length; 107 108 var rows = problemData.TrainingIndices.ToArray(); 109 var dcoeff = new double[rows.Length, numParams]; 110 TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, makeVariableWeightsVariable: true, addLinearScalingTerms: false, 111 out var parameters, out var initialConstants, out var func, out var func_grad); 112 if (initialConstants.Zip(coeff, (ici, coi) => ici != coi).Any(t => t)) throw new InvalidProgramException(); 113 var ds = problemData.Dataset; 114 var x_r = new double[parameters.Count]; 115 for (int r = 0; r < rows.Length; r++) { 116 // copy row 117 for (int c = 0; c < parameters.Count; c++) { 118 x_r[c] = ds.GetDoubleValue(parameters[c].variableName, rows[r]); 119 } 120 var tup = func_grad(coeff, x_r); 121 for (int c = 0; c < numParams; c++) { 122 dcoeff[r, c] = tup.Item1[c]; 123 } 124 } 125 126 var stats = Statistics.CalculateLinearModelStatistics(dcoeff, coeff, residuals); 127 cov = stats.CovMx; 128 sigma = stats.sigma; 68 129 } 69 130 } -
branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SymbolicRegressionModel.cs
r17180 r17991 26 26 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; 27 27 using HEAL.Attic; 28 using HeuristicLab.Data; 29 using System.Linq; 28 30 29 31 namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression { … … 45 47 } 46 48 49 [Storable] 50 private readonly double[,] parameterCovariance; 51 [Storable] 52 private readonly double sigma; 53 47 54 [StorableConstructor] 48 55 protected SymbolicRegressionModel(StorableConstructorFlag _) : base(_) { … … 53 60 : base(original, cloner) { 54 61 this.targetVariable = original.targetVariable; 62 this.parameterCovariance = original.parameterCovariance; // immutable 63 this.sigma = original.sigma; 55 64 } 56 65 57 66 public SymbolicRegressionModel(string targetVariable, ISymbolicExpressionTree tree, 58 67 ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, 59 double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue )68 double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue, double[,] parameterCovariance = null, double sigma = 0.0) 60 69 : base(tree, interpreter, lowerEstimationLimit, upperEstimationLimit) { 61 70 this.targetVariable = targetVariable; 71 if (parameterCovariance != null) 72 this.parameterCovariance = (double[,])parameterCovariance.Clone(); 73 this.sigma = sigma; 62 74 } 63 75 … … 69 81 return Interpreter.GetSymbolicExpressionTreeValues(SymbolicExpressionTree, dataset, rows) 70 82 .LimitToRange(LowerEstimationLimit, UpperEstimationLimit); 83 } 84 85 public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) { 86 // must work with a copy because we change tree nodes 87 var treeCopy = (ISymbolicExpressionTree)SymbolicExpressionTree.Clone(); 88 // uses sampling to produce prediction intervals 89 alglib.hqrndseed(31415, 926535, out var state); 90 var cov = parameterCovariance; 91 if (cov == null || cov.Length == 0) return rows.Select(_ => 0.0); 92 var n = 30; 93 var M = rows.Select(_ => new double[n]).ToArray(); 94 var paramNodes = new List<ISymbolicExpressionTreeNode>(); 95 var coeffList = new List<double>(); 96 // HACK: skip linear scaling parameters because the analyzer doesn't use them (and they are likely correlated with the remaining parameters) 97 // only works with linear scaling 98 if (!(treeCopy.Root.GetSubtree(0).GetSubtree(0).Symbol is Addition) || 99 !(treeCopy.Root.GetSubtree(0).GetSubtree(0).GetSubtree(0).Symbol is Multiplication)) 100 throw new NotImplementedException("prediction intervals are implemented only for linear scaling"); 101 102 foreach (var node in treeCopy.Root.GetSubtree(0).GetSubtree(0).IterateNodesPostfix()) { 103 if (node is ConstantTreeNode constNode) { 104 paramNodes.Add(constNode); 105 coeffList.Add(constNode.Value); 106 } else if (node is VariableTreeNode varNode) { 107 paramNodes.Add(varNode); 108 coeffList.Add(varNode.Weight); 109 } 110 } 111 var coeff = coeffList.ToArray(); 112 var numParams = coeff.Length; 113 if (cov.GetLength(0) != numParams) throw new InvalidProgramException(); 114 115 // TODO: probably we do not need to sample but can instead use a first-order or second-order approximation of f 116 // see http://sia.webpopix.org/nonlinearRegression.html 117 // also see https://rmazing.wordpress.com/2013/08/26/predictnls-part-2-taylor-approximation-confidence-intervals-for-nls-models/ 118 // https://www.rdocumentation.org/packages/propagate/versions/1.0-4/topics/predictNLS 119 double[] p = new double[numParams]; 120 for (int i = 0; i < 30; i++) { 121 // sample and update parameter vector delta is 122 alglib.hqrndnormalv(state, numParams, out var delta); 123 alglib.rmatrixmv(numParams, numParams, cov, 0, 0, 0, delta, 0, ref p, 0); 124 for (int j = 0; j < numParams; j++) { 125 if (paramNodes[j] is ConstantTreeNode constNode) constNode.Value = coeff[j] + p[j]; 126 else if (paramNodes[j] is VariableTreeNode varNode) varNode.Weight = coeff[j] + p[j]; 127 } 128 var r = 0; 129 var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(treeCopy, dataset, rows).LimitToRange(LowerEstimationLimit, UpperEstimationLimit); 130 131 foreach (var pred in estimatedValues) { 132 M[r++][i] = pred; 133 } 134 } 135 136 // reset parameters 137 for (int j = 0; j < numParams; j++) { 138 if (paramNodes[j] is ConstantTreeNode constNode) constNode.Value = coeff[j]; 139 else if (paramNodes[j] is VariableTreeNode varNode) varNode.Weight = coeff[j]; 140 } 141 var sigma2 = sigma * sigma; 142 return M.Select(M_i => M_i.Variance() + sigma2).ToArray(); 71 143 } 72 144 -
branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SymbolicRegressionSolution.cs
r17911 r17991 20 20 #endregion 21 21 22 using System.Collections.Generic; 22 23 using System.Linq; 23 24 using HEAL.Attic; … … 112 113 } 113 114 114 115 IConfidenceRegressionModel IConfidenceRegressionSolution.Model => Model; 115 116 116 117 [StorableConstructor] … … 215 216 return intervalEvaluation; 216 217 } 218 219 public IEnumerable<double> EstimatedVariances => GetEstimatedVariances(ProblemData.AllIndices); 220 221 public IEnumerable<double> EstimatedTrainingVariances => GetEstimatedVariances(ProblemData.TestIndices); 222 223 public IEnumerable<double> EstimatedTestVariances => GetEstimatedVariances(ProblemData.TestIndices); 224 225 226 public IEnumerable<double> GetEstimatedVariances(IEnumerable<int> rows) { 227 return Model.GetEstimatedVariances(ProblemData.Dataset, rows); 228 } 217 229 } 218 230 }
Note: See TracChangeset
for help on using the changeset viewer.