Changeset 16489


Ignore:
Timestamp:
01/03/19 15:06:27 (3 weeks ago)
Author:
gkronber
Message:

#2942: merged changes from trunk to support testing with trunk

Location:
branches/2942_KNNRegressionClassification/HeuristicLab.Algorithms.DataAnalysis/3.4
Files:
1 added
2 edited

Legend:

Unmodified
Added
Removed
  • branches/2942_KNNRegressionClassification/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r16408 r16489  
    375375      <SubType>Code</SubType>
    376376    </Compile>
     377    <Compile Include="Linear\LinearRegressionModel.cs" />
    377378    <Compile Include="Linear\MultinomialLogitClassification.cs" />
    378379    <Compile Include="Linear\MultinomialLogitClassificationSolution.cs" />
  • branches/2942_KNNRegressionClassification/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

    r15786 r16489  
    4141  [StorableClass]
    4242  public sealed class LinearRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> {
    43     private const string LinearRegressionModelResultName = "Linear regression solution";
     43    private const string SolutionResultName = "Linear regression solution";
     44    private const string ConfidenceSolutionResultName = "Solution with prediction intervals";
    4445
    4546    [StorableConstructor]
     
    6263    protected override void Run(CancellationToken cancellationToken) {
    6364      double rmsError, cvRmsError;
    64       var solution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError);
    65       Results.Add(new Result(LinearRegressionModelResultName, "The linear regression solution.", solution));
     65      // produce both solutions, to allow symbolic manipulation of LR solutions as well
     66      // as the calculation of prediction intervals.
     67      // There is no clean way to implement the new model class for LR as a symbolic model.
     68      var solution = CreateSolution(Problem.ProblemData, out rmsError, out cvRmsError);
     69#pragma warning disable 168, 3021
     70      var symbolicSolution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError);
     71#pragma warning restore 168, 3021
     72      Results.Add(new Result(SolutionResultName, "The linear regression solution.", symbolicSolution));
     73      Results.Add(new Result(ConfidenceSolutionResultName, "Linear regression solution with parameter covariance matrix " +
     74                                                           "and calculation of prediction intervals", solution));
    6675      Results.Add(new Result("Root mean square error", "The root of the mean of squared errors of the linear regression solution on the training set.", new DoubleValue(rmsError)));
    6776      Results.Add(new Result("Estimated root mean square error (cross-validation)", "The estimated root of the mean of squared errors of the linear regression solution via cross validation.", new DoubleValue(cvRmsError)));
    6877    }
    6978
     79    [Obsolete("Use CreateSolution() instead")]
    7080    public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) {
    71       var dataset = problemData.Dataset;
    72       string targetVariable = problemData.TargetVariable;
    73       IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    74       IEnumerable<int> rows = problemData.TrainingIndices;
    75       var doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>);
    76       var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>);
    77       var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows);
    78       double[,] binaryMatrix = dataset.ToArray(factorVariables, rows);
    79       double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows);
    80       var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix);
    81 
    82       if (inputMatrix.ContainsNanOrInfinity())
    83         throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
     81      IEnumerable<string> doubleVariables;
     82      IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables;
     83      double[,] inputMatrix;
     84      PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables);
    8485
    8586      alglib.linearmodel lm = new alglib.linearmodel();
     
    8788      int nRows = inputMatrix.GetLength(0);
    8889      int nFeatures = inputMatrix.GetLength(1) - 1;
    89       double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant
    9090
    9191      int retVal = 1;
     
    9595      cvRmsError = ar.cvrmserror;
    9696
     97      double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant
    9798      alglib.lrunpack(lm, out coefficients, out nFeatures);
    9899
    99       int nFactorCoeff = binaryMatrix.GetLength(1);
     100      int nFactorCoeff = factorVariables.Sum(kvp => kvp.Value.Count());
    100101      int nVarCoeff = doubleVariables.Count();
    101102      var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(),
     
    108109      return solution;
    109110    }
     111
     112    public static IRegressionSolution CreateSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) {
     113      IEnumerable<string> doubleVariables;
     114      IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables;
     115      double[,] inputMatrix;
     116      PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables);
     117
     118      alglib.linearmodel lm = new alglib.linearmodel();
     119      alglib.lrreport ar = new alglib.lrreport();
     120      int nRows = inputMatrix.GetLength(0);
     121      int nFeatures = inputMatrix.GetLength(1) - 1;
     122
     123      int retVal = 1;
     124      alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
     125      if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution");
     126      rmsError = ar.rmserror;
     127      cvRmsError = ar.cvrmserror;
     128
     129      // get parameters of the model
     130      double[] w;
     131      int nVars;
     132      alglib.lrunpack(lm, out w, out nVars);
     133
     134      // ar.c is the covariation matrix,  array[0..NVars,0..NVars].
     135      // C[i, j] = Cov(A[i], A[j])
     136
     137      var solution = new LinearRegressionModel(w, ar.c, cvRmsError, problemData.TargetVariable, doubleVariables, factorVariables)
     138        .CreateRegressionSolution((IRegressionProblemData)problemData.Clone());
     139      solution.Name = "Linear Regression Solution";
     140      return solution;
     141    }
     142
     143    private static void PrepareData(IRegressionProblemData problemData,
     144      out double[,] inputMatrix,
     145      out IEnumerable<string> doubleVariables,
     146      out IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables) {
     147      var dataset = problemData.Dataset;
     148      string targetVariable = problemData.TargetVariable;
     149      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
     150      IEnumerable<int> rows = problemData.TrainingIndices;
     151      doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>);
     152      var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>);
     153      factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows);
     154      double[,] binaryMatrix = dataset.ToArray(factorVariables, rows);
     155      double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows);
     156      inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix);
     157
     158      if (inputMatrix.ContainsNanOrInfinity())
     159        throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
     160    }
    110161    #endregion
    111162  }
Note: See TracChangeset for help on using the changeset viewer.