Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
06/16/21 21:35:37 (3 years ago)
Author:
gkronber
Message:

#3128: first dump of exploratory work-in-progress code to make sure the working copy is not lost.

Location:
branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/Interfaces/ISymbolicRegressionModel.cs

    r17579 r17991  
    2424namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
    2525  [StorableType("a411e2b5-f926-41a6-b55b-1aef862db2fb")]
    26   public interface ISymbolicRegressionModel : IRegressionModel, ISymbolicDataAnalysisModel {
     26  public interface ISymbolicRegressionModel : IRegressionModel, ISymbolicDataAnalysisModel, IConfidenceRegressionModel {
    2727    void Scale(IRegressionProblemData problemData);
    2828  }
  • branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/Interfaces/ISymbolicRegressionSolution.cs

    r17579 r17991  
    2525namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
    2626  [StorableType("dff1a450-e958-454f-bf8e-6b763fdcaff3")]
    27   public interface ISymbolicRegressionSolution : IRegressionSolution, ISymbolicDataAnalysisSolution {
     27  public interface ISymbolicRegressionSolution : IRegressionSolution, ISymbolicDataAnalysisSolution, IConfidenceRegressionSolution {
    2828    new ISymbolicRegressionModel Model { get; }
    2929  }
  • branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/SymbolicRegressionSingleObjectiveTrainingBestSolutionAnalyzer.cs

    r17180 r17991  
    2525using HeuristicLab.Parameters;
    2626using HEAL.Attic;
     27using HeuristicLab.Data;
     28using System.Collections.Generic;
     29using System;
     30using System.Linq;
     31using HeuristicLab.Analysis.Statistics;
    2732
    2833namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
     
    6368
    6469    protected override ISymbolicRegressionSolution CreateSolution(ISymbolicExpressionTree bestTree, double bestQuality) {
    65       var model = new SymbolicRegressionModel(ProblemDataParameter.ActualValue.TargetVariable, (ISymbolicExpressionTree)bestTree.Clone(), SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper);
     70
     71      // HACK: create model first for scaling, then calculate statistics and create a new model with prediction intervals
     72      var model = new SymbolicRegressionModel(ProblemDataParameter.ActualValue.TargetVariable,
     73        (ISymbolicExpressionTree)bestTree.Clone(),
     74        SymbolicDataAnalysisTreeInterpreterParameter.ActualValue,
     75        EstimationLimitsParameter.ActualValue.Lower,
     76        EstimationLimitsParameter.ActualValue.Upper);
    6677      if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(ProblemDataParameter.ActualValue);
    67       return new SymbolicRegressionSolution(model, (IRegressionProblemData)ProblemDataParameter.ActualValue.Clone());
     78
     79      // use scaled tree
     80      CalculateParameterCovariance(model.SymbolicExpressionTree, ProblemDataParameter.ActualValue, SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, out var cov, out var sigma);
     81      var predIntervalModel = new SymbolicRegressionModel(ProblemDataParameter.ActualValue.TargetVariable,
     82        (ISymbolicExpressionTree)model.SymbolicExpressionTree.Clone(),
     83        SymbolicDataAnalysisTreeInterpreterParameter.ActualValue,
     84        EstimationLimitsParameter.ActualValue.Lower,
     85        EstimationLimitsParameter.ActualValue.Upper, parameterCovariance: cov, sigma: sigma);
     86
     87      return new SymbolicRegressionSolution(predIntervalModel, (IRegressionProblemData)ProblemDataParameter.ActualValue.Clone());
     88    }
     89
     90    private void CalculateParameterCovariance(ISymbolicExpressionTree tree, IRegressionProblemData problemData, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, out double[,] cov, out double sigma) {
     91      var y_pred = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndices).ToArray();
     92      var residuals = problemData.TargetVariableTrainingValues.Zip(y_pred, (yi, y_pred_i) => yi - y_pred_i).ToArray();
     93
     94      var paramNodes = new List<ISymbolicExpressionTreeNode>();
     95      var coeffList = new List<double>();
     96      foreach (var node in tree.IterateNodesPostfix()) {
     97        if (node is ConstantTreeNode constNode) {
     98          paramNodes.Add(constNode);
     99          coeffList.Add(constNode.Value);
     100        } else if (node is VariableTreeNode varNode) {
     101          paramNodes.Add(varNode);
     102          coeffList.Add(varNode.Weight);
     103        }
     104      }
     105      var coeff = coeffList.ToArray();
     106      var numParams = coeff.Length;
     107
     108      var rows = problemData.TrainingIndices.ToArray();
     109      var dcoeff = new double[rows.Length, numParams];
     110      TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, makeVariableWeightsVariable: true, addLinearScalingTerms: false,
     111        out var parameters, out var initialConstants, out var func, out var func_grad);
     112      if (initialConstants.Zip(coeff, (ici, coi) => ici != coi).Any(t => t)) throw new InvalidProgramException();
     113      var ds = problemData.Dataset;
     114      var x_r = new double[parameters.Count];
     115      for (int r = 0; r < rows.Length; r++) {
     116        // copy row
     117        for (int c = 0; c < parameters.Count; c++) {
     118          x_r[c] = ds.GetDoubleValue(parameters[c].variableName, rows[r]);
     119        }
     120        var tup = func_grad(coeff, x_r);
     121        for (int c = 0; c < numParams; c++) {
     122          dcoeff[r, c] = tup.Item1[c];
     123        }
     124      }
     125
     126      var stats = Statistics.CalculateLinearModelStatistics(dcoeff, coeff, residuals);
     127      cov = stats.CovMx;
     128      sigma = stats.sigma;
    68129    }
    69130  }
  • branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SymbolicRegressionModel.cs

    r17180 r17991  
    2626using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    2727using HEAL.Attic;
     28using HeuristicLab.Data;
     29using System.Linq;
    2830
    2931namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
     
    4547    }
    4648
     49    [Storable]
     50    private readonly double[,] parameterCovariance;
     51    [Storable]
     52    private readonly double sigma;
     53
    4754    [StorableConstructor]
    4855    protected SymbolicRegressionModel(StorableConstructorFlag _) : base(_) {
     
    5360      : base(original, cloner) {
    5461      this.targetVariable = original.targetVariable;
     62      this.parameterCovariance = original.parameterCovariance; // immutable
     63      this.sigma = original.sigma;
    5564    }
    5665
    5766    public SymbolicRegressionModel(string targetVariable, ISymbolicExpressionTree tree,
    5867      ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
    59       double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue)
     68      double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue, double[,] parameterCovariance = null, double sigma = 0.0)
    6069      : base(tree, interpreter, lowerEstimationLimit, upperEstimationLimit) {
    6170      this.targetVariable = targetVariable;
     71      if (parameterCovariance != null)
     72        this.parameterCovariance = (double[,])parameterCovariance.Clone();
     73      this.sigma = sigma;
    6274    }
    6375
     
    6981      return Interpreter.GetSymbolicExpressionTreeValues(SymbolicExpressionTree, dataset, rows)
    7082        .LimitToRange(LowerEstimationLimit, UpperEstimationLimit);
     83    }
     84
     85    public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
     86      // must work with a copy because we change tree nodes
     87      var treeCopy = (ISymbolicExpressionTree)SymbolicExpressionTree.Clone();
     88      // uses sampling to produce prediction intervals
     89      alglib.hqrndseed(31415, 926535, out var state);
     90      var cov = parameterCovariance;
     91      if (cov == null || cov.Length == 0) return rows.Select(_ => 0.0);
     92      var n = 30;
     93      var M = rows.Select(_ => new double[n]).ToArray();
     94      var paramNodes = new List<ISymbolicExpressionTreeNode>();
     95      var coeffList = new List<double>();
     96      // HACK: skip linear scaling parameters because the analyzer doesn't use them (and they are likely correlated with the remaining parameters)
     97      // only works with linear scaling
     98      if (!(treeCopy.Root.GetSubtree(0).GetSubtree(0).Symbol is Addition) ||
     99          !(treeCopy.Root.GetSubtree(0).GetSubtree(0).GetSubtree(0).Symbol is Multiplication))
     100        throw new NotImplementedException("prediction intervals are implemented only for linear scaling");
     101
     102      foreach (var node in treeCopy.Root.GetSubtree(0).GetSubtree(0).IterateNodesPostfix()) {
     103        if (node is ConstantTreeNode constNode) {
     104          paramNodes.Add(constNode);
     105          coeffList.Add(constNode.Value);
     106        } else if (node is VariableTreeNode varNode) {
     107          paramNodes.Add(varNode);
     108          coeffList.Add(varNode.Weight);
     109        }
     110      }
     111      var coeff = coeffList.ToArray();
     112      var numParams = coeff.Length;
     113      if (cov.GetLength(0) != numParams) throw new InvalidProgramException();
     114
     115      // TODO: probably we do not need to sample but can instead use a first-order or second-order approximation of f
     116      // see http://sia.webpopix.org/nonlinearRegression.html
     117      // also see https://rmazing.wordpress.com/2013/08/26/predictnls-part-2-taylor-approximation-confidence-intervals-for-nls-models/
     118      // https://www.rdocumentation.org/packages/propagate/versions/1.0-4/topics/predictNLS
     119      double[] p = new double[numParams];
     120      for (int i = 0; i < 30; i++) {
     121        // sample and update parameter vector delta is
     122        alglib.hqrndnormalv(state, numParams, out var delta);
     123        alglib.rmatrixmv(numParams, numParams, cov, 0, 0, 0, delta, 0, ref p, 0);
     124        for (int j = 0; j < numParams; j++) {
     125          if (paramNodes[j] is ConstantTreeNode constNode) constNode.Value = coeff[j] + p[j];
     126          else if (paramNodes[j] is VariableTreeNode varNode) varNode.Weight = coeff[j] + p[j];
     127        }
     128        var r = 0;
     129        var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(treeCopy, dataset, rows).LimitToRange(LowerEstimationLimit, UpperEstimationLimit);
     130
     131        foreach (var pred in estimatedValues) {
     132          M[r++][i] = pred;
     133        }
     134      }
     135
     136      // reset parameters
     137      for (int j = 0; j < numParams; j++) {
     138        if (paramNodes[j] is ConstantTreeNode constNode) constNode.Value = coeff[j];
     139        else if (paramNodes[j] is VariableTreeNode varNode) varNode.Weight = coeff[j];
     140      }
     141      var sigma2 = sigma * sigma;
     142      return M.Select(M_i => M_i.Variance() + sigma2).ToArray();
    71143    }
    72144
  • branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SymbolicRegressionSolution.cs

    r17911 r17991  
    2020#endregion
    2121
     22using System.Collections.Generic;
    2223using System.Linq;
    2324using HEAL.Attic;
     
    112113    }
    113114
    114 
     115    IConfidenceRegressionModel IConfidenceRegressionSolution.Model => Model;
    115116
    116117    [StorableConstructor]
     
    215216      return intervalEvaluation;
    216217    }
     218
     219    public IEnumerable<double> EstimatedVariances => GetEstimatedVariances(ProblemData.AllIndices);
     220
     221    public IEnumerable<double> EstimatedTrainingVariances => GetEstimatedVariances(ProblemData.TestIndices);
     222
     223    public IEnumerable<double> EstimatedTestVariances => GetEstimatedVariances(ProblemData.TestIndices);
     224
     225
     226    public IEnumerable<double> GetEstimatedVariances(IEnumerable<int> rows) {
     227      return Model.GetEstimatedVariances(ProblemData.Dataset, rows);
     228    }
    217229  }
    218230}
Note: See TracChangeset for help on using the changeset viewer.