Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
07/04/19 16:11:29 (5 years ago)
Author:
abeham
Message:

#2892: merged to stable

Location:
stable
Files:
5 edited
1 copied

Legend:

Unmodified
Added
Removed
  • stable

  • stable/HeuristicLab.Algorithms.DataAnalysis

  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4

  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r15788 r17074  
    263263    <Compile Include="Linear\MultinomialLogitClassification.cs" />
    264264    <Compile Include="Linear\MultinomialLogitClassificationSolution.cs" />
     265    <Compile Include="Linear\LinearRegressionModel.cs" />
    265266    <Compile Include="Linear\MultinomialLogitModel.cs" />
    266267    <Compile Include="Linear\Scaling.cs" />
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

    r15788 r17074  
    4141  [StorableClass]
    4242  public sealed class LinearRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> {
    43     private const string LinearRegressionModelResultName = "Linear regression solution";
     43    private const string SolutionResultName = "Linear regression solution";
     44    private const string ConfidenceSolutionResultName = "Solution with prediction intervals";
    4445
    4546    [StorableConstructor]
     
    6263    protected override void Run(CancellationToken cancellationToken) {
    6364      double rmsError, cvRmsError;
    64       var solution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError);
    65       Results.Add(new Result(LinearRegressionModelResultName, "The linear regression solution.", solution));
     65      // produce both solutions, to allow symbolic manipulation of LR solutions as well
     66      // as the calculation of prediction intervals.
     67      // There is no clean way to implement the new model class for LR as a symbolic model.
     68      var solution = CreateSolution(Problem.ProblemData, out rmsError, out cvRmsError);
     69#pragma warning disable 168, 3021
     70      var symbolicSolution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError);
     71#pragma warning restore 168, 3021
     72      Results.Add(new Result(SolutionResultName, "The linear regression solution.", symbolicSolution));
     73      Results.Add(new Result(ConfidenceSolutionResultName, "Linear regression solution with parameter covariance matrix " +
     74                                                           "and calculation of prediction intervals", solution));
    6675      Results.Add(new Result("Root mean square error", "The root of the mean of squared errors of the linear regression solution on the training set.", new DoubleValue(rmsError)));
    6776      Results.Add(new Result("Estimated root mean square error (cross-validation)", "The estimated root of the mean of squared errors of the linear regression solution via cross validation.", new DoubleValue(cvRmsError)));
    6877    }
    6978
     79    [Obsolete("Use CreateSolution() instead")]
    7080    public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) {
    71       var dataset = problemData.Dataset;
    72       string targetVariable = problemData.TargetVariable;
    73       IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    74       IEnumerable<int> rows = problemData.TrainingIndices;
    75       var doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>);
    76       var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>);
    77       var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows);
    78       double[,] binaryMatrix = dataset.ToArray(factorVariables, rows);
    79       double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows);
    80       var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix);
    81 
    82       if (inputMatrix.ContainsNanOrInfinity())
    83         throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
     81      IEnumerable<string> doubleVariables;
     82      IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables;
     83      double[,] inputMatrix;
     84      PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables);
    8485
    8586      alglib.linearmodel lm = new alglib.linearmodel();
     
    8788      int nRows = inputMatrix.GetLength(0);
    8889      int nFeatures = inputMatrix.GetLength(1) - 1;
    89       double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant
    9090
    9191      int retVal = 1;
     
    9595      cvRmsError = ar.cvrmserror;
    9696
     97      double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant
    9798      alglib.lrunpack(lm, out coefficients, out nFeatures);
    9899
    99       int nFactorCoeff = binaryMatrix.GetLength(1);
     100      int nFactorCoeff = factorVariables.Sum(kvp => kvp.Value.Count());
    100101      int nVarCoeff = doubleVariables.Count();
    101102      var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(),
     
    108109      return solution;
    109110    }
     111
     112    public static IRegressionSolution CreateSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) {
     113      IEnumerable<string> doubleVariables;
     114      IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables;
     115      double[,] inputMatrix;
     116      PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables);
     117
     118      alglib.linearmodel lm = new alglib.linearmodel();
     119      alglib.lrreport ar = new alglib.lrreport();
     120      int nRows = inputMatrix.GetLength(0);
     121      int nFeatures = inputMatrix.GetLength(1) - 1;
     122
     123      int retVal = 1;
     124      alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
     125      if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution");
     126      rmsError = ar.rmserror;
     127      cvRmsError = ar.cvrmserror;
     128
     129      // get parameters of the model
     130      double[] w;
     131      int nVars;
     132      alglib.lrunpack(lm, out w, out nVars);
     133
     134      // ar.c is the covariation matrix,  array[0..NVars,0..NVars].
     135      // C[i, j] = Cov(A[i], A[j])
     136
     137      var solution = new LinearRegressionModel(w, ar.c, cvRmsError, problemData.TargetVariable, doubleVariables, factorVariables)
     138        .CreateRegressionSolution((IRegressionProblemData)problemData.Clone());
     139      solution.Name = "Linear Regression Solution";
     140      return solution;
     141    }
     142
     143    private static void PrepareData(IRegressionProblemData problemData,
     144      out double[,] inputMatrix,
     145      out IEnumerable<string> doubleVariables,
     146      out IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables) {
     147      var dataset = problemData.Dataset;
     148      string targetVariable = problemData.TargetVariable;
     149      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
     150      IEnumerable<int> rows = problemData.TrainingIndices;
     151      doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>);
     152      var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>);
     153      factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows);
     154      double[,] binaryMatrix = dataset.ToArray(factorVariables, rows);
     155      double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows);
     156      inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix);
     157
     158      if (inputMatrix.ContainsNanOrInfinity())
     159        throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
     160    }
    110161    #endregion
    111162  }
  • stable/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegressionModel.cs

    r16389 r17074  
    2222using System;
    2323using System.Collections.Generic;
     24using System.Drawing;
    2425using System.Linq;
    2526using HeuristicLab.Common;
     
    3536  [Item("Linear Regression Model", "Represents a linear regression model.")]
    3637  public sealed class LinearRegressionModel : RegressionModel, IConfidenceRegressionModel {
     38    public static new Image StaticItemImage {
     39      get { return HeuristicLab.Common.Resources.VSImageLibrary.Function; }
     40    }
    3741
    3842    [Storable]
     
    4953      get; private set;
    5054    }
    51    
     55
    5256    public override IEnumerable<string> VariablesUsedForPrediction {
    53       get { return allowedInputVariables; }
     57      get { return doubleVariables.Union(factorVariables.Select(f => f.Key)); }
    5458    }
    5559
    5660    [Storable]
    57     private string[] allowedInputVariables;
     61    private string[] doubleVariables;
    5862    [Storable]
    5963    private List<KeyValuePair<string, IEnumerable<string>>> factorVariables;
     64
     65    /// <summary>
     66    /// Enumerable of variable names used by the model including one-hot-encoded of factor variables.
     67    /// </summary>
     68    public IEnumerable<string> ParameterNames {
     69      get {
     70        return factorVariables.SelectMany(kvp => kvp.Value.Select(factorVal => $"{kvp.Key}={factorVal}"))
     71          .Concat(doubleVariables)
     72          .Concat(new[] { "<const>" });
     73      }
     74    }
    6075
    6176    [StorableConstructor]
     
    6984      this.NoiseSigma = original.NoiseSigma;
    7085
    71       allowedInputVariables = (string[])original.allowedInputVariables.Clone();
     86      doubleVariables = (string[])original.doubleVariables.Clone();
    7287      this.factorVariables = original.factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList();
    7388    }
     
    7893      this.W = new double[w.Length];
    7994      Array.Copy(w, W, w.Length);
    80       this.C = new double[covariance.GetLength(0),covariance.GetLength(1)];
     95      this.C = new double[covariance.GetLength(0), covariance.GetLength(1)];
    8196      Array.Copy(covariance, C, covariance.Length);
    8297      this.NoiseSigma = noiseSigma;
    83       this.allowedInputVariables = doubleInputVariables.ToArray();
     98      this.doubleVariables = doubleInputVariables.ToArray();
     99      // clone
    84100      this.factorVariables = factorVariables.Select(kvp => new KeyValuePair<string, IEnumerable<string>>(kvp.Key, new List<string>(kvp.Value))).ToList();
    85101    }
     
    94110
    95111    public override IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    96       double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
     112      double[,] inputData = dataset.ToArray(doubleVariables, rows);
    97113      double[,] factorData = dataset.ToArray(factorVariables, rows);
    98114
     
    113129
    114130    public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) {
    115       double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
     131      double[,] inputData = dataset.ToArray(doubleVariables, rows);
    116132      double[,] factorData = dataset.ToArray(factorVariables, rows);
    117133
     
    122138
    123139      double[] d = new double[C.GetLength(0)];
    124      
     140
    125141      for (int row = 0; row < n; row++) {
    126142        for (int column = 0; column < columns; column++) {
    127           d[column] = inputData[row,column];
     143          d[column] = inputData[row, column];
    128144        }
    129145        d[columns] = 1;
    130146
    131147        double var = 0.0;
    132         for(int i=0;i<d.Length;i++) {
    133           for(int j = 0;j<d.Length;j++) {
     148        for (int i = 0; i < d.Length; i++) {
     149          for (int j = 0; j < d.Length; j++) {
    134150            var += d[i] * C[i, j] * d[j];
    135151          }
    136152        }
    137         yield return var + NoiseSigma*NoiseSigma;
     153        yield return var + NoiseSigma * NoiseSigma;
    138154      }
    139155    }
    140 
    141156
    142157    public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
Note: See TracChangeset for help on using the changeset viewer.