Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
04/09/21 19:41:33 (3 years ago)
Author:
gkronber
Message:

#3117: update alglib to version 3.17

Location:
trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest
Files:
5 edited
1 copied

Legend:

Unmodified
Added
Removed
  • trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassification.cs

    r17180 r17931  
    207207        inputMatrix[row, nColumns - 1] = classIndices[inputMatrix[row, nColumns - 1]];
    208208      }
    209 
    210       alglib.dfreport rep;
    211       var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out rep);
     209     
     210      var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out var rep);
    212211
    213212      rmsError = rep.rmserror;
     
    216215      outOfBagRelClassificationError = rep.oobrelclserror;
    217216
    218       return new RandomForestModelFull(dForest, problemData.TargetVariable, problemData.AllowedInputVariables, classValues);
     217      return new RandomForestModelFull(dForest, nTrees, problemData.TargetVariable, problemData.AllowedInputVariables, classValues);
    219218    }
    220219    #endregion
  • trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs

    r17180 r17931  
    2020#endregion
    2121
     22extern alias alglib_3_7;
     23
    2224using System;
    2325using System.Collections.Generic;
     
    3941  public sealed class RandomForestModel : ClassificationModel, IRandomForestModel {
    4042    // not persisted
    41     private alglib.decisionforest randomForest;
    42     private alglib.decisionforest RandomForest {
     43    private alglib_3_7.alglib.decisionforest randomForest;
     44    private alglib_3_7.alglib.decisionforest RandomForest {
    4345      get {
    4446        // recalculate lazily
     
    7476    private RandomForestModel(StorableConstructorFlag _) : base(_) {
    7577      // for backwards compatibility (loading old solutions)
    76       randomForest = new alglib.decisionforest();
     78      randomForest = new alglib_3_7.alglib.decisionforest();
    7779    }
    7880    private RandomForestModel(RandomForestModel original, Cloner cloner)
    7981      : base(original, cloner) {
    80       randomForest = new alglib.decisionforest();
     82      randomForest = new alglib_3_7.alglib.decisionforest();
    8183      randomForest.innerobj.bufsize = original.randomForest.innerobj.bufsize;
    8284      randomForest.innerobj.nclasses = original.randomForest.innerobj.nclasses;
     
    100102
    101103    // random forest models can only be created through the static factory methods CreateRegressionModel and CreateClassificationModel
    102     private RandomForestModel(string targetVariable, alglib.decisionforest randomForest,
     104    private RandomForestModel(string targetVariable, alglib_3_7.alglib.decisionforest randomForest,
    103105      int seed, IDataAnalysisProblemData originalTrainingData,
    104106      int nTrees, double r, double m, double[] classValues = null)
     
    151153          x[column] = inputData[row, column];
    152154        }
    153         alglib.dfprocess(RandomForest, x, ref y);
     155        alglib_3_7.alglib.dfprocess(RandomForest, x, ref y);
    154156        yield return y[0];
    155157      }
     
    169171          x[column] = inputData[row, column];
    170172        }
    171         alglib.dforest.dfprocessraw(RandomForest.innerobj, x, ref ys);
     173        alglib_3_7.alglib.dforest.dfprocessraw(RandomForest.innerobj, x, ref ys);
    172174        yield return ys.VariancePop();
    173175      }
     
    187189          x[column] = inputData[row, column];
    188190        }
    189         alglib.dfprocess(randomForest, x, ref y);
     191        alglib_3_7.alglib.dfprocess(randomForest, x, ref y);
    190192        // find class for with the largest probability value
    191193        int maxProbClassIndex = 0;
     
    315317      double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices);
    316318
    317       alglib.dfreport rep;
    318       var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, 1, out rep);
     319      var dForest = RandomForestUtil.CreateRandomForestModelAlglib_3_7(seed, inputMatrix, nTrees, r, m, 1, out var rep);
    319320
    320321      rmsError = rep.rmserror;
     
    353354      }
    354355
    355       alglib.dfreport rep;
    356       var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out rep);
     356      var dForest = RandomForestUtil.CreateRandomForestModelAlglib_3_7(seed, inputMatrix, nTrees, r, m, nClasses, out var rep);
    357357
    358358      rmsError = rep.rmserror;
  • trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModelAlglib_3_7.cs

    r17926 r17931  
    2020#endregion
    2121
     22extern alias alglib_3_7;
    2223using System;
    2324using System.Collections.Generic;
     
    3132
    3233namespace HeuristicLab.Algorithms.DataAnalysis {
     34
    3335  [StorableType("9C797DF0-1169-4381-A732-6DAB90802839")]
    34   [Item("RandomForestModelFull", "Represents a random forest for regression and classification.")]
    35   public sealed class RandomForestModelFull : ClassificationModel, IRandomForestModel {
     36  [Item("RandomForestModel (alglib 3.7)", "Represents a random forest for regression and classification.")]
     37  [Obsolete("This version uses alglib version 3.7. Use RandomForestModelFull instead.")]
     38  public sealed class RandomForestModelAlglib_3_7 : ClassificationModel, IRandomForestModel {
    3639
    3740    public override IEnumerable<string> VariablesUsedForPrediction {
     
    5053
    5154    // not persisted
    52     private alglib.decisionforest randomForest;
     55    private alglib_3_7.alglib.decisionforest randomForest;
    5356
    5457    [Storable]
     
    7982
    8083    [StorableConstructor]
    81     private RandomForestModelFull(StorableConstructorFlag _) : base(_) {
    82       randomForest = new alglib.decisionforest();
    83     }
    84 
    85     private RandomForestModelFull(RandomForestModelFull original, Cloner cloner) : base(original, cloner) {
    86       randomForest = new alglib.decisionforest();
     84    private RandomForestModelAlglib_3_7(StorableConstructorFlag _) : base(_) {
     85      randomForest = new alglib_3_7.alglib.decisionforest();
     86    }
     87
     88    private RandomForestModelAlglib_3_7(RandomForestModelAlglib_3_7 original, Cloner cloner) : base(original, cloner) {
     89      randomForest = new alglib_3_7.alglib.decisionforest();
    8790      randomForest.innerobj.bufsize = original.randomForest.innerobj.bufsize;
    8891      randomForest.innerobj.nclasses = original.randomForest.innerobj.nclasses;
     
    9699    }
    97100    public override IDeepCloneable Clone(Cloner cloner) {
    98       return new RandomForestModelFull(this, cloner);
    99     }
    100 
    101     public RandomForestModelFull(alglib.decisionforest decisionForest, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<double> classValues = null) : base(targetVariable) {
     101      return new RandomForestModelAlglib_3_7(this, cloner);
     102    }
     103
     104    public RandomForestModelAlglib_3_7(alglib_3_7.alglib.decisionforest decisionForest, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<double> classValues = null) : base(targetVariable) {
    102105      this.name = ItemName;
    103106      this.description = ItemDescription;
     
    151154          x[column] = inputData[row, column];
    152155        }
    153         alglib.dfprocess(randomForest, x, ref y);
     156        alglib_3_7.alglib.dfprocess(randomForest, x, ref y);
    154157        yield return y[0];
    155158      }
     
    168171          x[column] = inputData[row, column];
    169172        }
    170         alglib.dforest.dfprocessraw(randomForest.innerobj, x, ref ys);
     173        alglib_3_7.alglib.dforest.dfprocessraw(randomForest.innerobj, x, ref ys);
    171174        yield return ys.VariancePop();
    172175      }
     
    186189          x[column] = inputData[row, column];
    187190        }
    188         alglib.dfprocess(randomForest, x, ref y);
     191        alglib_3_7::alglib.dfprocess(randomForest, x, ref y);
    189192        // find class for with the largest probability value
    190193        int maxProbClassIndex = 0;
  • trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModelFull.cs

    r17180 r17931  
    3131
    3232namespace HeuristicLab.Algorithms.DataAnalysis {
    33   [StorableType("9C797DF0-1169-4381-A732-6DAB90802839")]
     33  [StorableType("55412E08-DAD4-4C2E-9181-C142E7EA9474")]
    3434  [Item("RandomForestModelFull", "Represents a random forest for regression and classification.")]
    3535  public sealed class RandomForestModelFull : ClassificationModel, IRandomForestModel {
     
    4242    private double[] classValues;
    4343
     44    public int NumClasses => classValues == null ? 0 : classValues.Length;
     45
    4446    [Storable]
    4547    private string[] inputVariables;
    4648
     49    [Storable]
    4750    public int NumberOfTrees {
    48       get { return RandomForestNTrees; }
     51      get; private set;
    4952    }
    5053
     
    5356
    5457    [Storable]
    55     private int RandomForestBufSize {
    56       get { return randomForest.innerobj.bufsize; }
    57       set { randomForest.innerobj.bufsize = value; }
    58     }
    59     [Storable]
    60     private int RandomForestNClasses {
    61       get { return randomForest.innerobj.nclasses; }
    62       set { randomForest.innerobj.nclasses = value; }
    63     }
    64     [Storable]
    65     private int RandomForestNTrees {
    66       get { return randomForest.innerobj.ntrees; }
    67       set { randomForest.innerobj.ntrees = value; }
    68     }
    69     [Storable]
    70     private int RandomForestNVars {
    71       get { return randomForest.innerobj.nvars; }
    72       set { randomForest.innerobj.nvars = value; }
    73     }
    74     [Storable]
    75     private double[] RandomForestTrees {
    76       get { return randomForest.innerobj.trees; }
    77       set { randomForest.innerobj.trees = value; }
     58    private string RandomForestSerialized {
     59      get { alglib.dfserialize(randomForest, out var serialized); return serialized; }
     60      set { if (value != null) alglib.dfunserialize(value, out randomForest); }
    7861    }
    7962
    8063    [StorableConstructor]
    81     private RandomForestModelFull(StorableConstructorFlag _) : base(_) {
    82       randomForest = new alglib.decisionforest();
    83     }
     64    private RandomForestModelFull(StorableConstructorFlag _) : base(_) { }
    8465
    8566    private RandomForestModelFull(RandomForestModelFull original, Cloner cloner) : base(original, cloner) {
    86       randomForest = new alglib.decisionforest();
    87       randomForest.innerobj.bufsize = original.randomForest.innerobj.bufsize;
    88       randomForest.innerobj.nclasses = original.randomForest.innerobj.nclasses;
    89       randomForest.innerobj.ntrees = original.randomForest.innerobj.ntrees;
    90       randomForest.innerobj.nvars = original.randomForest.innerobj.nvars;
    91       randomForest.innerobj.trees = (double[])original.randomForest.innerobj.trees.Clone();
     67      if (original.randomForest != null)
     68        randomForest = (alglib.decisionforest)original.randomForest.make_copy();
     69      NumberOfTrees = original.NumberOfTrees;
    9270
    9371      // following fields are immutable so we don't need to clone them
     
    9977    }
    10078
    101     public RandomForestModelFull(alglib.decisionforest decisionForest, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<double> classValues = null) : base(targetVariable) {
     79    public RandomForestModelFull(alglib.decisionforest decisionForest, int nTrees, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<double> classValues = null) : base(targetVariable) {
    10280      this.name = ItemName;
    10381      this.description = ItemDescription;
    10482
    105       randomForest = decisionForest;
     83      randomForest = (alglib.decisionforest)decisionForest.make_copy();
     84      NumberOfTrees = nTrees;
    10685
    10786      this.inputVariables = inputVariables.ToArray();
     
    147126      double[] y = new double[1];
    148127
     128      alglib.dfcreatebuffer(randomForest, out var buf);
    149129      for (int row = 0; row < n; row++) {
    150130        for (int column = 0; column < columns; column++) {
    151131          x[column] = inputData[row, column];
    152132        }
    153         alglib.dfprocess(randomForest, x, ref y);
     133        alglib.dftsprocess(randomForest, buf, x, ref y); // thread-safe process (as long as separate buffers are used)
    154134        yield return y[0];
    155135      }
     
    168148          x[column] = inputData[row, column];
    169149        }
    170         alglib.dforest.dfprocessraw(randomForest.innerobj, x, ref ys);
     150        lock (randomForest)
     151          alglib.dforest.dfprocessraw(randomForest.innerobj, x, ref ys, null);
    171152        yield return ys.VariancePop();
    172153      }
     
    180161      int columns = inputData.GetLength(1);
    181162      double[] x = new double[columns];
    182       double[] y = new double[randomForest.innerobj.nclasses];
    183 
     163      double[] y = new double[NumClasses];
     164
     165      alglib.dfcreatebuffer(randomForest, out var buf);
    184166      for (int row = 0; row < n; row++) {
    185167        for (int column = 0; column < columns; column++) {
    186168          x[column] = inputData[row, column];
    187169        }
    188         alglib.dfprocess(randomForest, x, ref y);
     170        alglib.dftsprocess(randomForest, buf, x, ref y);
    189171        // find class for with the largest probability value
    190172        int maxProbClassIndex = 0;
  • trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegression.cs

    r17180 r17931  
    2020#endregion
    2121
     22extern alias alglib_3_7;
     23using alglib_3_7;
    2224using System.Collections.Generic;
    2325using System.Linq;
     
    200202      outOfBagAvgRelError = rep.oobavgrelerror;
    201203
    202       return new RandomForestModelFull(dForest, problemData.TargetVariable, problemData.AllowedInputVariables);
     204      return new RandomForestModelFull(dForest, nTrees, problemData.TargetVariable, problemData.AllowedInputVariables);
    203205    }
    204206
  • trunk/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestUtil.cs

    r17180 r17931  
    2222#endregion
    2323
     24extern alias alglib_3_7;
     25
    2426using System;
    2527using System.Collections.Generic;
     
    103105      RandomForestUtil.AssertInputMatrix(inputMatrix);
    104106
     107      int nRows = inputMatrix.GetLength(0);
     108      int nColumns = inputMatrix.GetLength(1);
     109
     110      alglib.dfbuildercreate(out var dfbuilder);
     111      alglib.dfbuildersetdataset(dfbuilder, inputMatrix, nRows, nColumns - 1, nClasses);
     112      alglib.dfbuildersetimportancenone(dfbuilder); // do not calculate importance (TODO add this feature)
     113      alglib.dfbuildersetrdfalgo(dfbuilder, 0); // only one algorithm supported in version 3.17
     114      alglib.dfbuildersetrdfsplitstrength(dfbuilder, 2); // 0 = split at the random position, fastest one
     115                                                         // 1 = split at the middle of the range
     116                                                         // 2 = strong split at the best point of the range (default)
     117      alglib.dfbuildersetrndvarsratio(dfbuilder, m);
     118      alglib.dfbuildersetsubsampleratio(dfbuilder, r);
     119      alglib.dfbuildersetseed(dfbuilder, seed);
     120      alglib.dfbuilderbuildrandomforest(dfbuilder, nTrees, out var dForest, out rep);
     121      return dForest;
     122    }
     123    internal static alglib_3_7.alglib.decisionforest CreateRandomForestModelAlglib_3_7(int seed, double[,] inputMatrix, int nTrees, double r, double m, int nClasses, out alglib_3_7.alglib.dfreport rep) {
     124      RandomForestUtil.AssertParameters(r, m);
     125      RandomForestUtil.AssertInputMatrix(inputMatrix);
     126
    105127      int info = 0;
    106       alglib.math.rndobject = new System.Random(seed);
    107       var dForest = new alglib.decisionforest();
    108       rep = new alglib.dfreport();
     128      alglib_3_7.alglib.math.rndobject = new System.Random(seed);
     129      var dForest = new alglib_3_7.alglib.decisionforest();
     130      rep = new alglib_3_7.alglib.dfreport();
    109131      int nRows = inputMatrix.GetLength(0);
    110132      int nColumns = inputMatrix.GetLength(1);
     
    112134      int nFeatures = Math.Max((int)Math.Round(m * (nColumns - 1)), 1);
    113135
    114       alglib.dforest.dfbuildinternal(inputMatrix, nRows, nColumns - 1, nClasses, nTrees, sampleSize, nFeatures, alglib.dforest.dfusestrongsplits + alglib.dforest.dfuseevs, ref info, dForest.innerobj, rep.innerobj);
     136      alglib_3_7.alglib.dforest.dfbuildinternal(inputMatrix, nRows, nColumns - 1, nClasses, nTrees, sampleSize, nFeatures, alglib_3_7.alglib.dforest.dfusestrongsplits + alglib_3_7.alglib.dforest.dfuseevs, ref info, dForest.innerobj, rep.innerobj);
    115137      if (info != 1) throw new ArgumentException("Error in calculation of random forest model");
    116138      return dForest;
     
    123145      var targetVariable = GetTargetVariableName(problemData);
    124146      foreach (var tuple in partitions) {
    125         double rmsError, avgRelError, outOfBagAvgRelError, outOfBagRmsError;
    126147        var trainingRandomForestPartition = tuple.Item1;
    127148        var testRandomForestPartition = tuple.Item2;
    128         var model = RandomForestModel.CreateRegressionModel(problemData, trainingRandomForestPartition, nTrees, r, m, seed, out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError);
     149        var model = RandomForestRegression.CreateRandomForestRegressionModel(problemData, trainingRandomForestPartition, nTrees, r, m, seed,
     150                                                                             out var rmsError, out var avgRelError, out var outOfBagRmsError, out var outOfBagAvgRelError);
    129151        var estimatedValues = model.GetEstimatedValues(ds, testRandomForestPartition);
    130152        var targetValues = ds.GetDoubleValues(targetVariable, testRandomForestPartition);
     
    143165      var targetVariable = GetTargetVariableName(problemData);
    144166      foreach (var tuple in partitions) {
    145         double rmsError, avgRelError, outOfBagAvgRelError, outOfBagRmsError;
    146167        var trainingRandomForestPartition = tuple.Item1;
    147168        var testRandomForestPartition = tuple.Item2;
    148         var model = RandomForestModel.CreateClassificationModel(problemData, trainingRandomForestPartition, nTrees, r, m, seed, out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError);
     169        var model = RandomForestClassification.CreateRandomForestClassificationModel(problemData, trainingRandomForestPartition, nTrees, r, m, seed,
     170                                                                                     out var rmsError, out var avgRelError, out var outOfBagRmsError, out var outOfBagAvgRelError);
    149171        var estimatedValues = model.GetEstimatedClassValues(ds, testRandomForestPartition);
    150172        var targetValues = ds.GetDoubleValues(targetVariable, testRandomForestPartition);
     
    176198        var parameters = new RFParameter();
    177199        for (int i = 0; i < setters.Count; ++i) { setters[i](parameters, parameterValues[i]); }
    178         double rmsError, outOfBagRmsError, avgRelError, outOfBagAvgRelError;
    179         RandomForestModel.CreateRegressionModel(problemData, problemData.TrainingIndices, parameters.N, parameters.R, parameters.M, seed, out rmsError, out outOfBagRmsError, out avgRelError, out outOfBagAvgRelError);
     200        RandomForestRegression.CreateRandomForestRegressionModel(problemData, problemData.TrainingIndices, parameters.N, parameters.R, parameters.M, seed,
     201                                                                 out var rmsError, out var outOfBagRmsError, out var avgRelError, out var outOfBagAvgRelError);
    180202
    181203        lock (locker) {
     
    208230        var parameters = new RFParameter();
    209231        for (int i = 0; i < setters.Count; ++i) { setters[i](parameters, parameterValues[i]); }
    210         double rmsError, outOfBagRmsError, avgRelError, outOfBagAvgRelError;
    211         RandomForestModel.CreateClassificationModel(problemData, problemData.TrainingIndices, parameters.N, parameters.R, parameters.M, seed,
    212                                                                 out rmsError, out outOfBagRmsError, out avgRelError, out outOfBagAvgRelError);
     232        RandomForestClassification.CreateRandomForestClassificationModel(problemData, problemData.TrainingIndices, parameters.N, parameters.R, parameters.M, seed,
     233                                                                         out var rmsError, out var outOfBagRmsError, out var avgRelError, out var outOfBagAvgRelError);
    213234
    214235        lock (locker) {
Note: See TracChangeset for help on using the changeset viewer.