Changeset 17991


Ignore:
Timestamp:
06/16/21 21:35:37 (4 months ago)
Author:
gkronber
Message:

#3128: first dump of exploratory work-in-progress code to make sure the working copy is not lost.

Location:
branches
Files:
1 added
16 edited

Legend:

Unmodified
Added
Removed
  • branches/3087_Ceres_Integration/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views/3.4/SymbolicRegressionSolutionErrorCharacteristicsCurveView.cs

    r17180 r17991  
    2626using HeuristicLab.Algorithms.DataAnalysis;
    2727using HeuristicLab.MainForm;
     28using HeuristicLab.PluginInfrastructure;
    2829using HeuristicLab.Problems.DataAnalysis.Views;
    2930
     
    4344    private IRegressionSolution CreateLinearRegressionSolution() {
    4445      if (Content == null) throw new InvalidOperationException();
    45       double rmse, cvRmsError;
    4646      var problemData = (IRegressionProblemData)ProblemData.Clone();
    4747      if (!problemData.TrainingIndices.Any()) return null; // don't create an LR model if the problem does not have a training set (e.g. loaded into an existing model)
     
    8787      newProblemData.TestPartition.End = problemData.TestPartition.End;
    8888
    89       var solution = LinearRegression.CreateLinearRegressionSolution(newProblemData, out rmse, out cvRmsError);
    90       solution.Name = "Baseline (linear subset)";
    91       return solution;
     89      try {
     90        var solution = LinearRegression.CreateSolution(newProblemData, out _, out _, out _);
     91        solution.Name = "Baseline (linear subset)";
     92        return solution;
     93      } catch (NotSupportedException e) {
     94        ErrorHandling.ShowErrorDialog("Could not create a linear regression solution.", e);
     95      } catch (ArgumentException e) {
     96        ErrorHandling.ShowErrorDialog("Could not create a linear regression solution.", e);
     97      }
     98      return null;
    9299    }
    93100
     
    99106      if (Content.Model.SymbolicExpressionTree.IterateNodesPrefix().OfType<LaggedVariableTreeNode>().Any()) yield break;
    100107
    101       yield return CreateLinearRegressionSolution();
     108      var linearRegressionSolution = CreateLinearRegressionSolution();
     109      if (linearRegressionSolution != null) yield return linearRegressionSolution;
    102110    }
    103111  }
  • branches/3087_Ceres_Integration/HeuristicLab.Problems.DataAnalysis.Views/3.4/Regression/RegressionSolutionErrorCharacteristicsCurveView.cs

    r17180 r17991  
    2929using HeuristicLab.MainForm;
    3030using HeuristicLab.Optimization;
     31using HeuristicLab.PluginInfrastructure;
    3132
    3233namespace HeuristicLab.Problems.DataAnalysis.Views {
     
    7475    }
    7576
    76     private readonly IList<IRegressionSolution> solutions = new List<IRegressionSolution>();
     77    private readonly List<IRegressionSolution> solutions = new List<IRegressionSolution>();
    7778    public IEnumerable<IRegressionSolution> Solutions {
    7879      get { return solutions.AsEnumerable(); }
     
    103104        solutions.Clear(); // remove all
    104105        solutions.Add(Content); // re-add the first solution
    105         // and recalculate all other solutions
    106         foreach (var sol in CreateBaselineSolutions()) {
    107           solutions.Add(sol);
    108         }
     106        solutions.AddRange(CreateBaselineSolutions());
    109107        UpdateChart();
    110108      }
     
    116114        solutions.Clear(); // remove all
    117115        solutions.Add(Content); // re-add the first solution
    118         // and recalculate all other solutions
    119         foreach (var sol in CreateBaselineSolutions()) {
    120           solutions.Add(sol);
    121         }
     116        solutions.AddRange(CreateBaselineSolutions());
    122117        UpdateChart();
    123118      }
     
    249244    protected virtual List<double> GetResiduals(IEnumerable<double> originalValues, IEnumerable<double> estimatedValues) {
    250245      switch (residualComboBox.SelectedItem.ToString()) {
    251         case "Absolute error": return originalValues.Zip(estimatedValues, (x, y) => Math.Abs(x - y))
     246        case "Absolute error":
     247          return originalValues.Zip(estimatedValues, (x, y) => Math.Abs(x - y))
    252248            .Where(r => !double.IsNaN(r) && !double.IsInfinity(r)).ToList();
    253         case "Squared error": return originalValues.Zip(estimatedValues, (x, y) => (x - y) * (x - y))
     249        case "Squared error":
     250          return originalValues.Zip(estimatedValues, (x, y) => (x - y) * (x - y))
    254251            .Where(r => !double.IsNaN(r) && !double.IsInfinity(r)).ToList();
    255252        case "Relative error":
     
    289286
    290287    protected virtual IEnumerable<IRegressionSolution> CreateBaselineSolutions() {
    291       yield return CreateConstantSolution();
    292       yield return CreateLinearSolution();
     288      var constantSolution = CreateConstantSolution();
     289      if (constantSolution != null) yield return CreateConstantSolution();
     290
     291      var linearRegressionSolution = CreateLinearSolution();
     292      if (linearRegressionSolution != null) yield return CreateLinearSolution();
    293293    }
    294294
    295295    private IRegressionSolution CreateConstantSolution() {
     296      if (!ProblemData.TrainingIndices.Any()) return null;
     297
    296298      double averageTrainingTarget = ProblemData.Dataset.GetDoubleValues(ProblemData.TargetVariable, ProblemData.TrainingIndices).Average();
    297299      var model = new ConstantModel(averageTrainingTarget, ProblemData.TargetVariable);
     
    301303    }
    302304    private IRegressionSolution CreateLinearSolution() {
    303       double rmsError, cvRmsError;
    304       var solution = LinearRegression.CreateLinearRegressionSolution((IRegressionProblemData)ProblemData.Clone(), out rmsError, out cvRmsError);
    305       solution.Name = "Baseline (linear)";
    306       return solution;
     305      try {
     306        var solution = LinearRegression.CreateSolution((IRegressionProblemData)ProblemData.Clone(), out _, out _, out _);
     307        solution.Name = "Baseline (linear)";
     308        return solution;
     309      } catch (NotSupportedException e) {
     310        ErrorHandling.ShowErrorDialog("Could not create a linear regression solution.", e);
     311      } catch (ArgumentException e) {
     312        ErrorHandling.ShowErrorDialog("Could not create a linear regression solution.", e);
     313      }
     314      return null;
    307315    }
    308316
  • branches/3128_Prediction_Intervals/HeuristicLab.Algorithms.DataAnalysis.DecisionTrees/3.4/LeafTypes/LinearLeaf.cs

    r17180 r17991  
    5050      double rmse, cvRmse;
    5151      numberOfParameters = pd.AllowedInputVariables.Count() + 1;
    52       var res = LinearRegression.CreateSolution(pd, out rmse, out cvRmse);
     52      var res = LinearRegression.CreateSolution(pd, out rmse, out cvRmse, out _);
    5353      return res.Model;
    5454    }
  • branches/3128_Prediction_Intervals/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4/ElasticNetLinearRegression.cs

    r17180 r17991  
    3434using HeuristicLab.Problems.DataAnalysis.Symbolic;
    3535using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
     36using HeuristicLab.Analysis.Statistics;
    3637
    3738namespace HeuristicLab.Algorithms.DataAnalysis.Glmnet {
     
    8889    }
    8990
    90   private void CreateSolution(double lambda) {
     91    private void CreateSolution(double lambda) {
    9192      double trainNMSE;
    9293      double testNMSE;
     
    9798      var solution = CreateSymbolicSolution(coeff, Problem.ProblemData);
    9899      Results.Add(new Result(solution.Name, solution.Description, solution));
     100
     101      var xy = Problem.ProblemData.Dataset.ToArray(Problem.ProblemData.AllowedInputVariables.Concat(new[] { Problem.ProblemData.TargetVariable }), Problem.ProblemData.TrainingIndices);
     102      // prepare xy for calculation of parameter statistics
     103      // the last coefficient is the offset
     104      var resid = new double[xy.GetLength(0)];
     105      for (int r = 0; r < xy.GetLength(0); r++) {
     106        resid[r] = xy[r, coeff.Length - 1] - coeff[coeff.Length - 1];
     107        xy[r, coeff.Length - 1] = 1.0;
     108      }
     109      var statistics = Statistics.CalculateParameterStatistics(xy, coeff, resid);
     110      Results.AddOrUpdateResult("Statistics", statistics.AsResultCollection(Problem.ProblemData.AllowedInputVariables.Concat(new string[] { "<const>" })));
    99111    }
    100112
  • branches/3128_Prediction_Intervals/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

    r17180 r17991  
    3232using HeuristicLab.Problems.DataAnalysis.Symbolic;
    3333using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
     34using HeuristicLab.Analysis;
     35using HeuristicLab.Analysis.Statistics;
    3436
    3537namespace HeuristicLab.Algorithms.DataAnalysis {
     
    6668      // as the calculation of prediction intervals.
    6769      // There is no clean way to implement the new model class for LR as a symbolic model.
    68       var solution = CreateSolution(Problem.ProblemData, out rmsError, out cvRmsError);
     70      var solution = CreateSolution(Problem.ProblemData, out rmsError, out cvRmsError, out _);
    6971#pragma warning disable 168, 3021
    70       var symbolicSolution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError);
     72      var symbolicSolution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError, out var statistics);
    7173#pragma warning restore 168, 3021
    7274      Results.Add(new Result(SolutionResultName, "The linear regression solution.", symbolicSolution));
     
    7577      Results.Add(new Result("Root mean square error", "The root of the mean of squared errors of the linear regression solution on the training set.", new DoubleValue(rmsError)));
    7678      Results.Add(new Result("Estimated root mean square error (cross-validation)", "The estimated root of the mean of squared errors of the linear regression solution via cross validation.", new DoubleValue(cvRmsError)));
     79
     80      var predictorNames = Problem.ProblemData.AllowedInputVariables.Concat(new string[] { "<const>" }).ToArray();
     81      Results.AddOrUpdateResult("Statistics", statistics.AsResultCollection(predictorNames));
     82
    7783    }
    7884
    7985    [Obsolete("Use CreateSolution() instead")]
    80     public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) {
     86    public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError, out Statistics statistics) {
    8187      IEnumerable<string> doubleVariables;
    8288      IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables;
     
    8490      PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables);
    8591
    86       alglib.linearmodel lm = new alglib.linearmodel();
    87       alglib.lrreport ar = new alglib.lrreport();
    8892      int nRows = inputMatrix.GetLength(0);
    8993      int nFeatures = inputMatrix.GetLength(1) - 1;
    9094
    91       int retVal = 1;
    92       alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
     95      alglib.lrbuild(inputMatrix, nRows, nFeatures, out int retVal, out var lm, out var ar);
    9396      if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution");
    9497      rmsError = ar.rmserror;
     
    98101      alglib.lrunpack(lm, out coefficients, out nFeatures);
    99102
     103      // prepare inputmatrix (which has y as last column) for calculation of parameter statistics
     104      // the last coefficient is the offset
     105      var resid = new double[nRows];
     106      for (int r = 0; r < nRows; r++) {
     107        resid[r] = inputMatrix[r, nFeatures] - coefficients[nFeatures];
     108        inputMatrix[r, nFeatures] = 1.0;
     109      }
     110      statistics = Statistics.CalculateParameterStatistics(inputMatrix, coefficients, resid);
     111
    100112      int nFactorCoeff = factorVariables.Sum(kvp => kvp.Value.Count());
    101113      int nVarCoeff = doubleVariables.Count();
     
    104116        @const: coefficients[nFeatures]);
    105117
    106       SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone());
     118      SymbolicRegressionSolution solution = new SymbolicRegressionSolution(
     119        new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter(), parameterCovariance: statistics.CovMx),
     120        (IRegressionProblemData)problemData.Clone());
    107121      solution.Model.Name = "Linear Regression Model";
    108122      solution.Name = "Linear Regression Solution";
     
    110124    }
    111125
    112     public static IRegressionSolution CreateSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) {
     126    public static IRegressionSolution CreateSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError, out Statistics statistics) {
    113127      IEnumerable<string> doubleVariables;
    114128      IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables;
     
    116130      PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables);
    117131
    118       alglib.linearmodel lm = new alglib.linearmodel();
    119       alglib.lrreport ar = new alglib.lrreport();
    120132      int nRows = inputMatrix.GetLength(0);
    121133      int nFeatures = inputMatrix.GetLength(1) - 1;
    122134
    123       int retVal = 1;
    124       alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
     135      alglib.lrbuild(inputMatrix, nRows, nFeatures, out int retVal, out var lm, out var ar);
    125136      if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution");
    126137      rmsError = ar.rmserror;
     
    129140      // get parameters of the model
    130141      double[] w;
    131       int nVars;
    132       alglib.lrunpack(lm, out w, out nVars);
     142      alglib.lrunpack(lm, out w, out _);
     143
     144      // prepare inputmatrix (which has y as last column) for calculation of parameter statistics
     145      // the last coefficient is the offset
     146      var resid = new double[nRows];
     147      for (int r = 0; r < nRows; r++) {
     148        resid[r] = inputMatrix[r, nFeatures] - w[nFeatures];
     149        inputMatrix[r, nFeatures] = 1.0;
     150      }
     151      statistics = Statistics.CalculateParameterStatistics(inputMatrix, w, resid);
    133152
    134153      // ar.c is the covariation matrix,  array[0..NVars,0..NVars].
    135       // C[i, j] = Cov(A[i], A[j])
    136 
    137154      var solution = new LinearRegressionModel(w, ar.c, cvRmsError, problemData.TargetVariable, doubleVariables, factorVariables)
    138155        .CreateRegressionSolution((IRegressionProblemData)problemData.Clone());
  • branches/3128_Prediction_Intervals/HeuristicLab.Analysis/3.3/HeuristicLab.Analysis-3.3.csproj

    r17931 r17991  
    173173    <Compile Include="Statistics\Fitting\ExpFitting.cs" />
    174174    <Compile Include="Statistics\Fitting\IFitting.cs" />
     175    <Compile Include="Statistics\Fitting\Statistics.cs" />
    175176    <Compile Include="Statistics\KruskalWallisTest.cs" />
    176177    <Compile Include="Statistics\Fitting\LinearLeastSquaresFitting.cs" />
  • branches/3128_Prediction_Intervals/HeuristicLab.Common/3.3/MatrixExtensions.cs

    r17180 r17991  
    9696      return result;
    9797    }
     98
     99    /// <summary>
     100    /// Concatenates matrices horizontally.
     101    /// A      B
     102    /// 1 2    9 8
     103    /// 3 4    7 6
     104    ///
     105    /// HorzCat(A, B)
     106    /// 1 2 9 8
     107    /// 3 4 7 6
     108    /// </summary>
     109    /// <typeparam name="T"></typeparam>
     110    /// <param name="a"></param>
     111    /// <param name="b"></param>
     112    /// <returns>A new matrix with the number of columns = a.GetLength(1) + b.GetLength(1)</returns>
     113    public static T[,] HorzCat<T>(this T[,] a, T[] b) {
     114      Contract.Assert(a.GetLength(0) == b.Length);
     115      var aLen = a.GetLength(1);
     116      var bLen = 1;
     117      var result = new T[a.GetLength(0), aLen + bLen];
     118      for (int i = 0; i < a.GetLength(0); i++)
     119        for (int j = 0; j < aLen; j++)
     120          result[i, j] = a[i, j];
     121      for (int i = 0; i < a.GetLength(0); i++)
     122        result[i, aLen] = b[i];
     123      return result;
     124    }
    98125  }
    99126}
  • branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views-3.4.csproj

    r16658 r17991  
    164164      <Private>False</Private>
    165165    </ProjectReference>
     166    <ProjectReference Include="..\..\HeuristicLab.Analysis\3.3\HeuristicLab.Analysis-3.3.csproj">
     167      <Project>{887425B4-4348-49ED-A457-B7D2C26DDBF9}</Project>
     168      <Name>HeuristicLab.Analysis-3.3</Name>
     169    </ProjectReference>
    166170    <ProjectReference Include="..\..\HeuristicLab.Collections\3.3\HeuristicLab.Collections-3.3.csproj">
    167171      <Project>{958B43BC-CC5C-4FA2-8628-2B3B01D890B6}</Project>
  • branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views/3.4/Plugin.cs.frame

    r17184 r17991  
    2929  [PluginFile("HeuristicLab.Problems.DataAnalysis.Symbolic.Regression.Views-3.4.dll",PluginFileType.Assembly)]
    3030  [PluginDependency("HeuristicLab.Algorithms.DataAnalysis", "3.4")]
     31  [PluginDependency("HeuristicLab.Analysis", "3.3")]
    3132  [PluginDependency("HeuristicLab.Common", "3.3")]
    3233  [PluginDependency("HeuristicLab.Common.Resources", "3.3")]
  • branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/Interfaces/ISymbolicRegressionModel.cs

    r17579 r17991  
    2424namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
    2525  [StorableType("a411e2b5-f926-41a6-b55b-1aef862db2fb")]
    26   public interface ISymbolicRegressionModel : IRegressionModel, ISymbolicDataAnalysisModel {
     26  public interface ISymbolicRegressionModel : IRegressionModel, ISymbolicDataAnalysisModel, IConfidenceRegressionModel {
    2727    void Scale(IRegressionProblemData problemData);
    2828  }
  • branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/Interfaces/ISymbolicRegressionSolution.cs

    r17579 r17991  
    2525namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
    2626  [StorableType("dff1a450-e958-454f-bf8e-6b763fdcaff3")]
    27   public interface ISymbolicRegressionSolution : IRegressionSolution, ISymbolicDataAnalysisSolution {
     27  public interface ISymbolicRegressionSolution : IRegressionSolution, ISymbolicDataAnalysisSolution, IConfidenceRegressionSolution {
    2828    new ISymbolicRegressionModel Model { get; }
    2929  }
  • branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SingleObjective/SymbolicRegressionSingleObjectiveTrainingBestSolutionAnalyzer.cs

    r17180 r17991  
    2525using HeuristicLab.Parameters;
    2626using HEAL.Attic;
     27using HeuristicLab.Data;
     28using System.Collections.Generic;
     29using System;
     30using System.Linq;
     31using HeuristicLab.Analysis.Statistics;
    2732
    2833namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
     
    6368
    6469    protected override ISymbolicRegressionSolution CreateSolution(ISymbolicExpressionTree bestTree, double bestQuality) {
    65       var model = new SymbolicRegressionModel(ProblemDataParameter.ActualValue.TargetVariable, (ISymbolicExpressionTree)bestTree.Clone(), SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper);
     70
     71      // HACK: create model first for scaling, then calculate statistics and create a new model with prediction intervals
     72      var model = new SymbolicRegressionModel(ProblemDataParameter.ActualValue.TargetVariable,
     73        (ISymbolicExpressionTree)bestTree.Clone(),
     74        SymbolicDataAnalysisTreeInterpreterParameter.ActualValue,
     75        EstimationLimitsParameter.ActualValue.Lower,
     76        EstimationLimitsParameter.ActualValue.Upper);
    6677      if (ApplyLinearScalingParameter.ActualValue.Value) model.Scale(ProblemDataParameter.ActualValue);
    67       return new SymbolicRegressionSolution(model, (IRegressionProblemData)ProblemDataParameter.ActualValue.Clone());
     78
     79      // use scaled tree
     80      CalculateParameterCovariance(model.SymbolicExpressionTree, ProblemDataParameter.ActualValue, SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, out var cov, out var sigma);
     81      var predIntervalModel = new SymbolicRegressionModel(ProblemDataParameter.ActualValue.TargetVariable,
     82        (ISymbolicExpressionTree)model.SymbolicExpressionTree.Clone(),
     83        SymbolicDataAnalysisTreeInterpreterParameter.ActualValue,
     84        EstimationLimitsParameter.ActualValue.Lower,
     85        EstimationLimitsParameter.ActualValue.Upper, parameterCovariance: cov, sigma: sigma);
     86
     87      return new SymbolicRegressionSolution(predIntervalModel, (IRegressionProblemData)ProblemDataParameter.ActualValue.Clone());
     88    }
     89
     90    private void CalculateParameterCovariance(ISymbolicExpressionTree tree, IRegressionProblemData problemData, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, out double[,] cov, out double sigma) {
     91      var y_pred = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, problemData.TrainingIndices).ToArray();
     92      var residuals = problemData.TargetVariableTrainingValues.Zip(y_pred, (yi, y_pred_i) => yi - y_pred_i).ToArray();
     93
     94      var paramNodes = new List<ISymbolicExpressionTreeNode>();
     95      var coeffList = new List<double>();
     96      foreach (var node in tree.IterateNodesPostfix()) {
     97        if (node is ConstantTreeNode constNode) {
     98          paramNodes.Add(constNode);
     99          coeffList.Add(constNode.Value);
     100        } else if (node is VariableTreeNode varNode) {
     101          paramNodes.Add(varNode);
     102          coeffList.Add(varNode.Weight);
     103        }
     104      }
     105      var coeff = coeffList.ToArray();
     106      var numParams = coeff.Length;
     107
     108      var rows = problemData.TrainingIndices.ToArray();
     109      var dcoeff = new double[rows.Length, numParams];
     110      TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, makeVariableWeightsVariable: true, addLinearScalingTerms: false,
     111        out var parameters, out var initialConstants, out var func, out var func_grad);
     112      if (initialConstants.Zip(coeff, (ici, coi) => ici != coi).Any(t => t)) throw new InvalidProgramException();
     113      var ds = problemData.Dataset;
     114      var x_r = new double[parameters.Count];
     115      for (int r = 0; r < rows.Length; r++) {
     116        // copy row
     117        for (int c = 0; c < parameters.Count; c++) {
     118          x_r[c] = ds.GetDoubleValue(parameters[c].variableName, rows[r]);
     119        }
     120        var tup = func_grad(coeff, x_r);
     121        for (int c = 0; c < numParams; c++) {
     122          dcoeff[r, c] = tup.Item1[c];
     123        }
     124      }
     125
     126      var stats = Statistics.CalculateLinearModelStatistics(dcoeff, coeff, residuals);
     127      cov = stats.CovMx;
     128      sigma = stats.sigma;
    68129    }
    69130  }
  • branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SymbolicRegressionModel.cs

    r17180 r17991  
    2626using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    2727using HEAL.Attic;
     28using HeuristicLab.Data;
     29using System.Linq;
    2830
    2931namespace HeuristicLab.Problems.DataAnalysis.Symbolic.Regression {
     
    4547    }
    4648
     49    [Storable]
     50    private readonly double[,] parameterCovariance;
     51    [Storable]
     52    private readonly double sigma;
     53
    4754    [StorableConstructor]
    4855    protected SymbolicRegressionModel(StorableConstructorFlag _) : base(_) {
     
    5360      : base(original, cloner) {
    5461      this.targetVariable = original.targetVariable;
     62      this.parameterCovariance = original.parameterCovariance; // immutable
     63      this.sigma = original.sigma;
    5564    }
    5665
    5766    public SymbolicRegressionModel(string targetVariable, ISymbolicExpressionTree tree,
    5867      ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
    59       double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue)
     68      double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue, double[,] parameterCovariance = null, double sigma = 0.0)
    6069      : base(tree, interpreter, lowerEstimationLimit, upperEstimationLimit) {
    6170      this.targetVariable = targetVariable;
     71      if (parameterCovariance != null)
     72        this.parameterCovariance = (double[,])parameterCovariance.Clone();
     73      this.sigma = sigma;
    6274    }
    6375
     
    6981      return Interpreter.GetSymbolicExpressionTreeValues(SymbolicExpressionTree, dataset, rows)
    7082        .LimitToRange(LowerEstimationLimit, UpperEstimationLimit);
     83    }
     84
     85    public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
     86      // must work with a copy because we change tree nodes
     87      var treeCopy = (ISymbolicExpressionTree)SymbolicExpressionTree.Clone();
     88      // uses sampling to produce prediction intervals
     89      alglib.hqrndseed(31415, 926535, out var state);
     90      var cov = parameterCovariance;
     91      if (cov == null || cov.Length == 0) return rows.Select(_ => 0.0);
     92      var n = 30;
     93      var M = rows.Select(_ => new double[n]).ToArray();
     94      var paramNodes = new List<ISymbolicExpressionTreeNode>();
     95      var coeffList = new List<double>();
     96      // HACK: skip linear scaling parameters because the analyzer doesn't use them (and they are likely correlated with the remaining parameters)
     97      // only works with linear scaling
     98      if (!(treeCopy.Root.GetSubtree(0).GetSubtree(0).Symbol is Addition) ||
     99          !(treeCopy.Root.GetSubtree(0).GetSubtree(0).GetSubtree(0).Symbol is Multiplication))
     100        throw new NotImplementedException("prediction intervals are implemented only for linear scaling");
     101
     102      foreach (var node in treeCopy.Root.GetSubtree(0).GetSubtree(0).IterateNodesPostfix()) {
     103        if (node is ConstantTreeNode constNode) {
     104          paramNodes.Add(constNode);
     105          coeffList.Add(constNode.Value);
     106        } else if (node is VariableTreeNode varNode) {
     107          paramNodes.Add(varNode);
     108          coeffList.Add(varNode.Weight);
     109        }
     110      }
     111      var coeff = coeffList.ToArray();
     112      var numParams = coeff.Length;
     113      if (cov.GetLength(0) != numParams) throw new InvalidProgramException();
     114
     115      // TODO: probably we do not need to sample but can instead use a first-order or second-order approximation of f
     116      // see http://sia.webpopix.org/nonlinearRegression.html
     117      // also see https://rmazing.wordpress.com/2013/08/26/predictnls-part-2-taylor-approximation-confidence-intervals-for-nls-models/
     118      // https://www.rdocumentation.org/packages/propagate/versions/1.0-4/topics/predictNLS
     119      double[] p = new double[numParams];
     120      for (int i = 0; i < 30; i++) {
     121        // sample and update parameter vector delta is
     122        alglib.hqrndnormalv(state, numParams, out var delta);
     123        alglib.rmatrixmv(numParams, numParams, cov, 0, 0, 0, delta, 0, ref p, 0);
     124        for (int j = 0; j < numParams; j++) {
     125          if (paramNodes[j] is ConstantTreeNode constNode) constNode.Value = coeff[j] + p[j];
     126          else if (paramNodes[j] is VariableTreeNode varNode) varNode.Weight = coeff[j] + p[j];
     127        }
     128        var r = 0;
     129        var estimatedValues = Interpreter.GetSymbolicExpressionTreeValues(treeCopy, dataset, rows).LimitToRange(LowerEstimationLimit, UpperEstimationLimit);
     130
     131        foreach (var pred in estimatedValues) {
     132          M[r++][i] = pred;
     133        }
     134      }
     135
     136      // reset parameters
     137      for (int j = 0; j < numParams; j++) {
     138        if (paramNodes[j] is ConstantTreeNode constNode) constNode.Value = coeff[j];
     139        else if (paramNodes[j] is VariableTreeNode varNode) varNode.Weight = coeff[j];
     140      }
     141      var sigma2 = sigma * sigma;
     142      return M.Select(M_i => M_i.Variance() + sigma2).ToArray();
    71143    }
    72144
  • branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Symbolic.Regression/3.4/SymbolicRegressionSolution.cs

    r17911 r17991  
    2020#endregion
    2121
     22using System.Collections.Generic;
    2223using System.Linq;
    2324using HEAL.Attic;
     
    112113    }
    113114
    114 
     115    IConfidenceRegressionModel IConfidenceRegressionSolution.Model => Model;
    115116
    116117    [StorableConstructor]
     
    215216      return intervalEvaluation;
    216217    }
     218
     219    public IEnumerable<double> EstimatedVariances => GetEstimatedVariances(ProblemData.AllIndices);
     220
     221    public IEnumerable<double> EstimatedTrainingVariances => GetEstimatedVariances(ProblemData.TestIndices);
     222
     223    public IEnumerable<double> EstimatedTestVariances => GetEstimatedVariances(ProblemData.TestIndices);
     224
     225
     226    public IEnumerable<double> GetEstimatedVariances(IEnumerable<int> rows) {
     227      return Model.GetEstimatedVariances(ProblemData.Dataset, rows);
     228    }
    217229  }
    218230}
  • branches/3128_Prediction_Intervals/HeuristicLab.Problems.DataAnalysis.Views/3.4/Solution Views/DataAnalysisSolutionView.cs

    r17180 r17991  
    7474      AddEvaluationViewTypes();
    7575
    76       //recover selection
     76      // recover selection
    7777      if (selectedName != null) {
    7878        foreach (ListViewItem item in itemsListView.Items) {
  • branches/3128_Prediction_Intervals/HeuristicLab.Tests/HeuristicLab.Problems.DataAnalysis-3.4/RegressionVariableImpactCalculationTest.cs

    r17948 r17991  
    4646      double rmsError;
    4747      double cvRmsError;
    48       var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError);
     48      var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError, out _);
    4949      Dictionary<string, double> expectedImpacts = GetExpectedValuesForLRTower();
    5050
     
    5959      double rmsError;
    6060      double cvRmsError;
    61       var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError);
     61      var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError, out _);
    6262      Dictionary<string, double> expectedImpacts = GetExpectedValuesForLRMiba();
    6363
     
    114114      double rmsError;
    115115      double cvRmsError;
    116       var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError);
     116      var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError, out _);
    117117      solution.ProblemData = LoadDefaultMibaProblem();
    118118      RegressionSolutionVariableImpactsCalculator.CalculateImpacts(solution);
     
    130130      double rmsError;
    131131      double cvRmsError;
    132       var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError);
     132      var solution = LinearRegression.CreateSolution(problemData, out rmsError, out cvRmsError, out _);
    133133
    134134      Stopwatch watch = new Stopwatch();
Note: See TracChangeset for help on using the changeset viewer.