Ignore:
Timestamp:
12/15/18 12:46:02 (2 years ago)
Author:
gkronber
Message:

#2892: merged branch back to trunk

Location:
trunk
Files:
5 edited
1 copied

Legend:

Unmodified
Added
Removed
  • trunk

  • trunk/HeuristicLab.Algorithms.DataAnalysis

  • trunk/HeuristicLab.Algorithms.DataAnalysis/3.4

  • trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r15783 r16389  
    263263    <Compile Include="Linear\MultinomialLogitClassification.cs" />
    264264    <Compile Include="Linear\MultinomialLogitClassificationSolution.cs" />
     265    <Compile Include="Linear\LinearRegressionModel.cs" />
    265266    <Compile Include="Linear\MultinomialLogitModel.cs" />
    266267    <Compile Include="Linear\Scaling.cs" />
  • trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

    r15786 r16389  
    6262    protected override void Run(CancellationToken cancellationToken) {
    6363      double rmsError, cvRmsError;
    64       var solution = CreateLinearRegressionSolution(Problem.ProblemData, out rmsError, out cvRmsError);
     64      var solution = CreateSolution(Problem.ProblemData, out rmsError, out cvRmsError);
    6565      Results.Add(new Result(LinearRegressionModelResultName, "The linear regression solution.", solution));
    6666      Results.Add(new Result("Root mean square error", "The root of the mean of squared errors of the linear regression solution on the training set.", new DoubleValue(rmsError)));
     
    6868    }
    6969
     70    [Obsolete("Use CreateSolution() instead")]
    7071    public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) {
    71       var dataset = problemData.Dataset;
    72       string targetVariable = problemData.TargetVariable;
    73       IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    74       IEnumerable<int> rows = problemData.TrainingIndices;
    75       var doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>);
    76       var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>);
    77       var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows);
    78       double[,] binaryMatrix = dataset.ToArray(factorVariables, rows);
    79       double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows);
    80       var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix);
    81 
    82       if (inputMatrix.ContainsNanOrInfinity())
    83         throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
     72      IEnumerable<string> doubleVariables;
     73      IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables;
     74      double[,] inputMatrix;
     75      PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables);
    8476
    8577      alglib.linearmodel lm = new alglib.linearmodel();
     
    8779      int nRows = inputMatrix.GetLength(0);
    8880      int nFeatures = inputMatrix.GetLength(1) - 1;
    89       double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant
    9081
    9182      int retVal = 1;
     
    9586      cvRmsError = ar.cvrmserror;
    9687
     88      double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant
    9789      alglib.lrunpack(lm, out coefficients, out nFeatures);
    98 
    99       int nFactorCoeff = binaryMatrix.GetLength(1);
     90     
     91      int nFactorCoeff = factorVariables.Sum(kvp=>kvp.Value.Count());
    10092      int nVarCoeff = doubleVariables.Count();
    10193      var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(),
     
    108100      return solution;
    109101    }
     102
     103    public static IRegressionSolution CreateSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) {
     104      IEnumerable<string> doubleVariables;
     105      IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables;
     106      double[,] inputMatrix;
     107      PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables);
     108
     109      alglib.linearmodel lm = new alglib.linearmodel();
     110      alglib.lrreport ar = new alglib.lrreport();
     111      int nRows = inputMatrix.GetLength(0);
     112      int nFeatures = inputMatrix.GetLength(1) - 1;
     113
     114      int retVal = 1;
     115      alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
     116      if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution");
     117      rmsError = ar.rmserror;
     118      cvRmsError = ar.cvrmserror;
     119
     120      // get parameters of the model
     121      double[] w;
     122      int nVars;
     123      alglib.lrunpack(lm, out w, out nVars);
     124
     125      // ar.c is the covariation matrix,  array[0..NVars,0..NVars].
     126      // C[i, j] = Cov(A[i], A[j])
     127
     128      var solution = new LinearRegressionModel(w, ar.c, cvRmsError, problemData.TargetVariable, doubleVariables, factorVariables)
     129        .CreateRegressionSolution((IRegressionProblemData)problemData.Clone());
     130      solution.Name = "Linear Regression Solution";
     131      return solution;
     132    }
     133
     134    private static void PrepareData(IRegressionProblemData problemData,
     135      out double[,] inputMatrix,
     136      out IEnumerable<string> doubleVariables,
     137      out IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables) {
     138      var dataset = problemData.Dataset;
     139      string targetVariable = problemData.TargetVariable;
     140      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
     141      IEnumerable<int> rows = problemData.TrainingIndices;
     142      doubleVariables = allowedInputVariables.Where(dataset.VariableHasType<double>);
     143      var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType<string>);
     144      factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows);
     145      double[,] binaryMatrix = dataset.ToArray(factorVariables, rows);
     146      double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows);
     147      inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix);
     148
     149      if (inputMatrix.ContainsNanOrInfinity())
     150        throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
     151    }
    110152    #endregion
    111153  }
Note: See TracChangeset for help on using the changeset viewer.