Changeset 16448


Ignore:
Timestamp:
12/23/18 08:06:56 (4 months ago)
Author:
gkronber
Message:

#2892: changed LR to produce two solutions: symbolic representation and solution with prediction intervals.

It is not straight-forward to implement the model with prediction intervals as a symbolic regression model.

Location:
trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

    r16389 r16448  
    4141  [StorableClass]
    4242  public sealed class LinearRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> {
    43     private const string LinearRegressionModelResultName = "Linear regression solution";
     43    private const string SolutionResultName = "Linear regression solution";
     44    private const string ConfidenceSolutionResultName = "Solution with prediction intervals";
    4445
    4546    [StorableConstructor]
     
    6263    protected override void Run(CancellationToken cancellationToken) {
    6364      double rmsError, cvRmsError;
     65      // produce both solutions, to allow symbolic manipulation of LR solutions as well
     66      // as the calculation of prediction intervals.
     67      // There is no clean way to implement the new model class for LR as a symbolic model.
    6468      var solution = CreateSolution(Problem.ProblemData, out rmsError, out cvRmsError);
    65       Results.Add(new Result(LinearRegressionModelResultName, "The linear regression solution.", solution));
     69#pragma warning disable 168, 3021
     70      var symbolicSolution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError);
     71#pragma warning restore 168, 3021
     72      Results.Add(new Result(SolutionResultName, "The linear regression solution.", symbolicSolution));
     73      Results.Add(new Result(ConfidenceSolutionResultName, "Linear regression solution with parameter covariance matrix " +
     74                                                           "and calculation of prediction intervals", solution));
    6675      Results.Add(new Result("Root mean square error", "The root of the mean of squared errors of the linear regression solution on the training set.", new DoubleValue(rmsError)));
    6776      Results.Add(new Result("Estimated root mean square error (cross-validation)", "The estimated root of the mean of squared errors of the linear regression solution via cross validation.", new DoubleValue(cvRmsError)));
     
    8897      double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant
    8998      alglib.lrunpack(lm, out coefficients, out nFeatures);
    90      
    91       int nFactorCoeff = factorVariables.Sum(kvp=>kvp.Value.Count());
     99
     100      int nFactorCoeff = factorVariables.Sum(kvp => kvp.Value.Count());
    92101      int nVarCoeff = doubleVariables.Count();
    93102      var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(),
     
    132141    }
    133142
    134     private static void PrepareData(IRegressionProblemData problemData, 
    135       out double[,] inputMatrix, 
    136       out IEnumerable<string> doubleVariables, 
     143    private static void PrepareData(IRegressionProblemData problemData,
     144      out double[,] inputMatrix,
     145      out IEnumerable<string> doubleVariables,
    137146      out IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables) {
    138147      var dataset = problemData.Dataset;
  • trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegressionModel.cs

    r16415 r16448  
    4949      get; private set;
    5050    }
    51    
     51
    5252    public override IEnumerable<string> VariablesUsedForPrediction {
    5353      get { return allowedInputVariables.Union(factorVariables.Select(f => f.Key)); }
     
    7878      this.W = new double[w.Length];
    7979      Array.Copy(w, W, w.Length);
    80       this.C = new double[covariance.GetLength(0),covariance.GetLength(1)];
     80      this.C = new double[covariance.GetLength(0), covariance.GetLength(1)];
    8181      Array.Copy(covariance, C, covariance.Length);
    8282      this.NoiseSigma = noiseSigma;
     
    123123
    124124      double[] d = new double[C.GetLength(0)];
    125      
     125
    126126      for (int row = 0; row < n; row++) {
    127127        for (int column = 0; column < columns; column++) {
    128           d[column] = inputData[row,column];
     128          d[column] = inputData[row, column];
    129129        }
    130130        d[columns] = 1;
    131131
    132132        double var = 0.0;
    133         for(int i=0;i<d.Length;i++) {
    134           for(int j = 0;j<d.Length;j++) {
     133        for (int i = 0; i < d.Length; i++) {
     134          for (int j = 0; j < d.Length; j++) {
    135135            var += d[i] * C[i, j] * d[j];
    136136          }
    137137        }
    138         yield return var + NoiseSigma*NoiseSigma;
     138        yield return var + NoiseSigma * NoiseSigma;
    139139      }
    140140    }
    141 
    142141
    143142    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
Note: See TracChangeset for help on using the changeset viewer.