Changeset 13929


Ignore:
Timestamp:
06/20/16 22:14:50 (3 years ago)
Author:
gkronber
Message:

#745: fixed copying of license file and added evaluation of all models along the path on the test set

Location:
branches/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4/ElasticNetLinearRegression.cs

    r13928 r13929  
    4949    protected override void Run() {
    5050      double[] lambda;
    51       double[] rsq;
     51      double[] trainRsq;
     52      double[] testRsq;
    5253      double[,] coeff;
    5354      double[] intercept;
    54       RunElasticNetLinearRegression(Problem.ProblemData, Penality, out lambda, out rsq, out coeff, out intercept);
     55      RunElasticNetLinearRegression(Problem.ProblemData, Penality, out lambda, out trainRsq, out testRsq, out coeff, out intercept);
    5556
    5657      var coeffTable = new DataTable("Coefficient Paths", "The paths of coefficient values over different lambda values");
     
    6970
    7071      var rsqTable = new DataTable("R-Squared", "Path of R² values over different lambda values");
    71       rsqTable.Rows.Add(new DataRow("R-Squared", "Path of R² values over different lambda values", rsq));
     72      rsqTable.Rows.Add(new DataRow("R² (train)", "Path of R² values over different lambda values", trainRsq));
     73      rsqTable.Rows.Add(new DataRow("R² (test)", "Path of R² values over different lambda values", testRsq));
    7274      Results.Add(new Result(rsqTable.Name, rsqTable.Description, rsqTable));
    7375    }
    7476
    7577    public static double[] CreateElasticNetLinearRegressionSolution(IRegressionProblemData problemData, double penalty, double lambda,
    76             out double rsq,
     78            out double trainRsq,  out double testRsq,
    7779            double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity) {
    78       double[] rsqs;
     80      double[] trainRsqs;
     81      double[] testRsqs;
    7982      // run for exactly one lambda
    80       var coeffs = CreateElasticNetLinearRegressionSolution(problemData, penalty, new double[] { lambda }, out rsqs, coeffLowerBound, coeffUpperBound);
    81       rsq = rsqs[0];
     83      var coeffs = CreateElasticNetLinearRegressionSolution(problemData, penalty, new double[] { lambda }, out trainRsqs, out testRsqs, coeffLowerBound, coeffUpperBound);
     84      trainRsq = trainRsqs[0];
     85      testRsq = testRsqs[0];
    8286      return coeffs[0];
    8387    }
    8488    public static double[][] CreateElasticNetLinearRegressionSolution(IRegressionProblemData problemData, double penalty, double[] lambda,
    85             out double[] rsq,
     89            out double[] trainRsq, out double[] testRsq,
    8690            double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity,
    8791            int maxVars = -1) {
     
    8993      double[,] coeff;
    9094      double[] intercept;
    91       RunElasticNetLinearRegression(problemData, penalty, lambda.Length, 1.0, lambda, out lambda, out rsq, out coeff, out intercept, coeffLowerBound, coeffUpperBound, maxVars);
     95      RunElasticNetLinearRegression(problemData, penalty, lambda.Length, 1.0, lambda, out lambda, out trainRsq, out testRsq, out coeff, out intercept, coeffLowerBound, coeffUpperBound, maxVars);
    9296
    9397      int nRows = intercept.Length;
     
    105109
    106110    public static void RunElasticNetLinearRegression(IRegressionProblemData problemData, double penalty,
    107       out double[] lambda, out double[] rsq, out double[,] coeff, out double[] intercept,
     111      out double[] lambda, out double[] trainRsq, out double[] testRsq, out double[,] coeff, out double[] intercept,
    108112      double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity,
    109113      int maxVars = -1
     
    111115      double[] userLambda = new double[0];
    112116      // automatically determine lambda values (maximum 100 different lambda values)
    113       RunElasticNetLinearRegression(problemData, penalty, 100, 0.0, userLambda, out lambda, out rsq, out coeff, out intercept, coeffLowerBound, coeffUpperBound, maxVars);
     117      RunElasticNetLinearRegression(problemData, penalty, 100, 0.0, userLambda, out lambda, out trainRsq, out testRsq, out coeff, out intercept, coeffLowerBound, coeffUpperBound, maxVars);
    114118    }
    115119
     
    123127    /// <param name="ulam">User supplied lambda values</param>
    124128    /// <param name="lambda">Output lambda values</param>
    125     /// <param name="rsq">Vector of R² values for each set of coefficients along the path</param>
     129    /// <param name="trainRsq">Vector of R² values on the training set for each set of coefficients along the path</param>
     130    /// <param name="testRsq">Vector of R² values on the test set for each set of coefficients along the path</param>
    126131    /// <param name="coeff">Vector of coefficient vectors for each solution along the path</param>
    127132    /// <param name="intercept">Vector of intercepts for each solution along the path</param>
     
    130135    /// <param name="maxVars">Maximum allowed number of variables in each solution along the path (-1 => all variables are allowed)</param>
    131136    private static void RunElasticNetLinearRegression(IRegressionProblemData problemData, double penalty,
    132   int nlam, double flmin, double[] ulam, out double[] lambda, out double[] rsq, out double[,] coeff, out double[] intercept,
     137  int nlam, double flmin, double[] ulam, out double[] lambda, out double[] trainRsq, out double[] testRsq, out double[,] coeff, out double[] intercept,
    133138  double coeffLowerBound = double.NegativeInfinity, double coeffUpperBound = double.PositiveInfinity,
    134139  int maxVars = -1
     
    136141      if (penalty < 0.0 || penalty > 1.0) throw new ArgumentException("0 <= penalty <= 1", "penalty");
    137142
    138       double[,] x;
    139       double[] y;
    140       int numObs;
     143      double[,] trainX;
     144      double[,] testX;
     145
     146      double[] trainY;
     147      double[] testY;
     148      int numTrainObs, numTestObs;
    141149      int numVars;
    142       PrepareData(problemData, out x, out y, out numObs, out numVars);
     150      PrepareData(problemData, out trainX, out trainY, out numTrainObs, out testX, out testY, out numTestObs, out numVars);
    143151
    144152      int ka = 1; // => covariance updating algorithm
    145153      double parm = penalty;
    146       double[] w = Enumerable.Repeat(1.0, numObs).ToArray(); // all observations have the same weight
     154      double[] w = Enumerable.Repeat(1.0, numTrainObs).ToArray(); // all observations have the same weight
    147155      int[] jd = new int[1]; // do not force to use any of the variables
    148156      double[] vp = Enumerable.Repeat(1.0, numVars).ToArray(); // all predictor variables are unpenalized
     
    167175      int jerr = -99;
    168176
    169       elnet(ka, parm, numObs, numVars, x, y, w, jd, vp, cl, ne, nx, nlam, flmin, ulam, thr, isd, intr, maxit, out lmu, out intercept, out ca, out ia, out nin, out rsq, out lambda, out nlp, out jerr);
    170 
     177      elnet(ka, parm, numTrainObs, numVars, trainX, trainY, w, jd, vp, cl, ne, nx, nlam, flmin, ulam, thr, isd, intr, maxit, out lmu, out intercept, out ca, out ia, out nin, out trainRsq, out lambda, out nlp, out jerr);
     178
     179      testRsq = new double[lmu];
    171180      coeff = new double[lmu, numVars];
    172181      for (int solIdx = 0; solIdx < lmu; solIdx++) {
     
    175184        double[] coefficients;
    176185        double[] selectedCa = new double[nx];
    177         for (int i = 0; i < nx; i++) selectedCa[i] = ca[solIdx, i];
    178 
     186        for (int i = 0; i < nx; i++) {
     187          selectedCa[i] = ca[solIdx, i];
     188        }
     189
     190        // apply to test set to calculate test R² values for each lambda step
     191        double[] fn;
     192        modval(intercept[solIdx], selectedCa, ia, selectedNin, numTestObs, testX, out fn);
     193        OnlineCalculatorError error;
     194        var r  = OnlinePearsonsRCalculator.Calculate(testY, fn, out error);
     195        if (error != OnlineCalculatorError.None) r = 0;
     196        testRsq[solIdx] = r * r;
     197
     198        // uncompress coefficients
    179199        uncomp(numVars, selectedCa, ia, selectedNin, out coefficients);
    180200        for (int i = 0; i < coefficients.Length; i++) {
     
    184204    }
    185205
    186     private static void PrepareData(IRegressionProblemData problemData, out double[,] x, out double[] y, out int numObs, out int numVars) {
     206    private static void PrepareData(IRegressionProblemData problemData, out double[,] trainX, out double[] trainY, out int numTrainObs,
     207      out double[,] testX, out double[] testY, out int numTestObs, out int numVars) {
    187208      numVars = problemData.AllowedInputVariables.Count();
    188       numObs = problemData.TrainingIndices.Count();
    189 
    190       x = new double[numVars, numObs];
    191       y = new double[numObs];
     209      numTrainObs = problemData.TrainingIndices.Count();
     210      numTestObs = problemData.TestIndices.Count();
     211
     212      trainX = new double[numVars, numTrainObs];
     213      trainY = new double[numTrainObs];
     214      testX = new double[numVars, numTestObs];
     215      testY = new double[numTestObs];
    192216      var ds = problemData.Dataset;
    193217      var targetVar = problemData.TargetVariable;
     218      // train
    194219      int rIdx = 0;
    195220      foreach (var row in problemData.TrainingIndices) {
    196221        int cIdx = 0;
    197222        foreach (var var in problemData.AllowedInputVariables) {
    198           x[cIdx, rIdx] = ds.GetDoubleValue(var, row);
     223          trainX[cIdx, rIdx] = ds.GetDoubleValue(var, row);
    199224          cIdx++;
    200225        }
    201         y[rIdx] = ds.GetDoubleValue(targetVar, row);
     226        trainY[rIdx] = ds.GetDoubleValue(targetVar, row);
     227        rIdx++;
     228      }
     229      // test
     230      rIdx = 0;
     231      foreach(var row in problemData.TestIndices) {
     232        int cIdx = 0;
     233        foreach(var var in problemData.AllowedInputVariables) {
     234          testX[cIdx, rIdx] = ds.GetDoubleValue(var, row);
     235          cIdx++;
     236        }
     237        testY[rIdx] = ds.GetDoubleValue(targetVar, row);
    202238        rIdx++;
    203239      }
     
    449485    private static extern void uncomp_x64(ref int numVars, double[] ca, int[] ia, ref int nin, double[] a);
    450486
     487    private static void modval(double a0, double[] ca, int[] ia, int nin, int numObs, double[,] x, out double[] fn) {
     488      fn = new double[numObs];
     489      if (Environment.Is64BitProcess) {
     490        modval_x64(ref a0, ca, ia, ref nin, ref numObs, x, fn);
     491      } else {
     492        modval_x86(ref a0, ca, ia, ref nin, ref numObs, x, fn);
     493      }
     494    }
     495    // evaluate linear model from compressed coefficients and
     496    // uncompressed predictor matrix:
     497    //
     498    // call modval(a0, ca, ia, nin, n, x, f);
     499    //   c
     500    //   c input:
     501    //
     502    //    a0 = intercept
     503    //    ca(nx) = compressed coefficient values for a solution
     504    // ia(nx) = pointers to compressed coefficients
     505    // nin = number of compressed coefficients for solution
     506    //    n = number of predictor vectors(observations)
     507    // x(n, ni) = full(uncompressed) predictor matrix
     508    //
     509    // output:
     510    //
     511    //    f(n) = model predictions
     512    [DllImport("glmnet-x86.dll", EntryPoint = "modval_", CallingConvention = CallingConvention.Cdecl)]
     513    private static extern void modval_x86(ref double a0, double[] ca, int[] ia, ref int nin, ref int numObs, [Out] double[,] x, double[] fn);
     514    [DllImport("glmnet-x64.dll", EntryPoint = "modval_", CallingConvention = CallingConvention.Cdecl)]
     515    private static extern void modval_x64(ref double a0, double[] ca, int[] ia, ref int nin, ref int numObs, [Out] double[,] x, double[] fn);
     516
    451517    #endregion
    452518  }
  • branches/HeuristicLab.Algorithms.DataAnalysis.Glmnet/3.4/HeuristicLab.Algorithms.DataAnalysis.Glmnet.csproj

    r13928 r13929  
    123123  </ItemGroup>
    124124  <ItemGroup>
    125     <Content Include="glmnet-license-gpl2.txt" />
     125    <Content Include="glmnet-license-gpl2.txt">
     126      <CopyToOutputDirectory>Always</CopyToOutputDirectory>
     127    </Content>
    126128    <Content Include="glmnet-x64.dll">
    127129      <CopyToOutputDirectory>Always</CopyToOutputDirectory>
     
    133135  <ItemGroup>
    134136    <Compile Include="ElasticNetLinearRegression.cs" />
    135     <Compile Include="Plugin.cs">
    136       <CopyToOutputDirectory>Always</CopyToOutputDirectory>
    137     </Compile>
     137    <Compile Include="Plugin.cs" />
    138138    <Compile Include="Properties\AssemblyInfo.cs" />
    139139  </ItemGroup>
Note: See TracChangeset for help on using the changeset viewer.