Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
07/13/21 10:55:09 (3 years ago)
Author:
gkronber
Message:

#3087: merged r17784:18004 from trunk to branch to prepare for trunk reintegration (fixed a conflict in CrossValidation.cs)

Location:
branches/3087_Ceres_Integration
Files:
22 edited
4 copied

Legend:

Unmodified
Added
Removed
  • branches/3087_Ceres_Integration

  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis

  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4

  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/CrossValidation.cs

    r17833 r18006  
    3535using HeuristicLab.Problems.DataAnalysis.Symbolic;
    3636using HeuristicLab.Random;
     37using HeuristicLab.Problems.DataAnalysis.Symbolic.Regression;
     38using HeuristicLab.Problems.DataAnalysis.Symbolic.Classification;
    3739
    3840namespace HeuristicLab.Algorithms.DataAnalysis {
     
    338340              symbolicProblem.FitnessCalculationPartition.End = SamplesEnd.Value;
    339341            }
     342
     343            // We need to set the estimation limits because they are recalculated by the problem
     344            // whenever the data partitions change.
     345            // Instead of explicitly handling all types we could also check the parameters-collection
     346            // for a parameter with name "EstimationLimits".
     347            SetEstimationLimits(problem, new[] { typeof(SymbolicRegressionSingleObjectiveProblem),
     348                                                 typeof(SymbolicRegressionMultiObjectiveProblem),
     349                                                 typeof(SymbolicClassificationSingleObjectiveProblem),
     350                                                 typeof(SymbolicClassificationMultiObjectiveProblem) });
     351
    340352            clonedAlgorithm.Prepare();
    341353            clonedAlgorithms.Add(clonedAlgorithm);
     
    509521      foreach (KeyValuePair<string, List<IClassificationSolution>> solutions in resultSolutions) {
    510522        // at least one algorithm (GBT with logistic regression loss) produces a classification solution even though the original problem is a regression problem.
    511         var targetVariable = solutions.Value.First().ProblemData.TargetVariable;
    512523        var dataset = (Dataset)Problem.ProblemData.Dataset;
    513524        if (ShuffleSamples.Value) {
     
    516527        }
    517528        var problemData = (IClassificationProblemData)Problem.ProblemData;
    518         var problemDataClone = new ClassificationProblemData(dataset, problemData.AllowedInputVariables, targetVariable);
     529        var problemDataClone = new ClassificationProblemData(dataset, problemData.AllowedInputVariables, problemData.TargetVariable, problemData.ClassNames, problemData.PositiveClass);
    519530        // set partitions of problem data clone correctly
    520531        problemDataClone.TrainingPartition.Start = SamplesStart.Value; problemDataClone.TrainingPartition.End = SamplesEnd.Value;
     
    811822    }
    812823    #endregion
     824
     825    #region helper
     826
     827    private void SetEstimationLimits(IDataAnalysisProblem problem, Type[] types) {
     828      foreach (var type in types) {
     829        if (type.IsAssignableFrom(problem.GetType())) {
     830          var originalLimits = (DoubleLimit)Problem.Parameters["EstimationLimits"].ActualValue;  // problem is a clone of Problem
     831          var limits = (DoubleLimit)problem.Parameters["EstimationLimits"].ActualValue;
     832          limits.Lower = originalLimits.Lower;
     833          limits.Upper = originalLimits.Upper;
     834        }
     835      }
     836    }
     837
     838    #endregion
    813839  }
    814840}
  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs

    r17180 r18006  
    321321
    322322      // cholesky decomposition
    323       var res = alglib.trfac.spdmatrixcholesky(ref l, n, false);
     323      var res = alglib.trfac.spdmatrixcholesky(ref l, n, false, null);
    324324      if (!res) throw new ArgumentException("Matrix is not positive semidefinite");
    325325      return l;
     
    412412
    413413        // for stddev
    414         alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, ref sWKs, 0, 0);
     414        alglib.ablas.rmatrixlefttrsm(n, newN, l, 0, 0, false, false, 0, sWKs, 0, 0, null);
    415415
    416416        for (int i = 0; i < newN; i++) {
  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithm.cs

    r17180 r18006  
    281281            new AccuracyMaximizationThresholdCalculator());
    282282          var classificationProblemData = new ClassificationProblemData(problemData.Dataset,
    283             problemData.AllowedInputVariables, problemData.TargetVariable, problemData.Transformations);
     283            problemData.AllowedInputVariables, problemData.TargetVariable, transformations: problemData.Transformations);
    284284          classificationProblemData.TrainingPartition.Start = Problem.ProblemData.TrainingPartition.Start;
    285285          classificationProblemData.TrainingPartition.End = Problem.ProblemData.TrainingPartition.End;
  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r17154 r18006  
    107107  </PropertyGroup>
    108108  <ItemGroup>
     109    <Reference Include="ALGLIB-3.17.0, Version=3.17.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
     110      <SpecificVersion>False</SpecificVersion>
     111      <HintPath>..\..\bin\ALGLIB-3.17.0.dll</HintPath>
     112      <Aliases>global</Aliases>
     113      <Private>False</Private>
     114    </Reference>
    109115    <Reference Include="ALGLIB-3.7.0, Version=3.7.0.0, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL">
     116      <SpecificVersion>False</SpecificVersion>
    110117      <HintPath>..\..\bin\ALGLIB-3.7.0.dll</HintPath>
     118      <Aliases>alglib_3_7</Aliases>
    111119      <Private>False</Private>
    112120    </Reference>
     
    133141    <Compile Include="DoubleArrayExtensions.cs" />
    134142    <Compile Include="FixedDataAnalysisAlgorithm.cs" />
     143    <Compile Include="GAM\GeneralizedAdditiveModelAlgorithm.cs" />
     144    <Compile Include="GAM\Spline1dModel.cs" />
    135145    <Compile Include="GaussianProcess\CovarianceFunctions\CovarianceSpectralMixture.cs" />
    136146    <Compile Include="GaussianProcess\CovarianceFunctions\CovariancePiecewisePolynomial.cs" />
     
    284294    <Compile Include="NearestNeighbour\NearestNeighbourClassificationSolution.cs" />
    285295    <Compile Include="NearestNeighbour\NearestNeighbourModel.cs" />
     296    <Compile Include="NearestNeighbour\NearestNeighbourModelAlglib_3_7.cs" />
    286297    <Compile Include="NearestNeighbour\NearestNeighbourRegression.cs" />
    287298    <Compile Include="NearestNeighbour\NearestNeighbourRegressionSolution.cs" />
     
    294305    <Compile Include="NeuralNetwork\NeuralNetworkClassificationSolution.cs" />
    295306    <Compile Include="NeuralNetwork\NeuralNetworkModel.cs" />
     307    <Compile Include="NeuralNetwork\NeuralNetworkModelAlglib_3_7.cs" />
    296308    <Compile Include="NeuralNetwork\NeuralNetworkRegression.cs" />
    297309    <Compile Include="NeuralNetwork\NeuralNetworkRegressionSolution.cs" />
     
    303315    <Compile Include="RandomForest\RandomForestClassification.cs" />
    304316    <Compile Include="RandomForest\RandomForestModel.cs" />
     317    <Compile Include="RandomForest\RandomForestModelAlglib_3_7.cs" />
    305318    <Compile Include="RandomForest\RandomForestModelFull.cs" />
    306319    <Compile Include="RandomForest\RandomForestRegression.cs" />
  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/KernelRidgeRegression/KernelRidgeRegressionModel.cs

    r17180 r18006  
    108108        }
    109109        // cholesky decomposition
    110         var res = alglib.trfac.spdmatrixcholesky(ref l, n, false);
     110        var res = alglib.trfac.spdmatrixcholesky(ref l, n, false, null);
    111111        if (res == false) { //try lua decomposition if cholesky faild
    112112          int[] pivots;
  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaModel.cs

    r17180 r18006  
    6565
    6666      var ds = ReduceDataset(dataset, rows);
    67       nnModel = new NearestNeighbourModel(ds, Enumerable.Range(0, ds.Rows), k, false, ds.VariableNames.Last(), ds.VariableNames.Take(transformationMatrix.GetLength(1)), classValues: classValues);
     67      // the new implementation of kNN uses selfmatch=true by default
     68      nnModel = new NearestNeighbourModelAlglib_3_7(ds, Enumerable.Range(0, ds.Rows), k, false, ds.VariableNames.Last(), ds.VariableNames.Take(transformationMatrix.GetLength(1)), classValues: classValues);
    6869    }
    6970
  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs

    r17180 r18006  
    4242    private const string NearestNeighbourClassificationModelResultName = "Nearest neighbour classification solution";
    4343    private const string WeightsParameterName = "Weights";
    44     private const string SelfMatchParameterName = "SelfMatch";
    4544
    4645    #region parameter properties
    4746    public IFixedValueParameter<IntValue> KParameter {
    4847      get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; }
    49     }
    50     public IFixedValueParameter<BoolValue> SelfMatchParameter {
    51       get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; }
    5248    }
    5349    public IValueParameter<DoubleArray> WeightsParameter {
     
    5652    #endregion
    5753    #region properties
    58     public bool SelfMatch {
    59       get { return SelfMatchParameter.Value.Value; }
    60       set { SelfMatchParameter.Value.Value = value; }
    61     }
    6254    public int K {
    6355      get { return KParameter.Value.Value; }
     
    8072    public NearestNeighbourClassification()
    8173      : base() {
    82       Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    8374      Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3)));
    8475      Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
     
    9182      if (!Parameters.ContainsKey(WeightsParameterName)) {
    9283        Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
    93       }
    94       if (!Parameters.ContainsKey(SelfMatchParameterName)) {
    95         Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    9684      }
    9785      #endregion
     
    10694      double[] weights = null;
    10795      if (Weights != null) weights = Weights.CloneAsArray();
    108       var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, SelfMatch, weights);
     96      var solution = CreateNearestNeighbourClassificationSolution(Problem.ProblemData, K, weights);
    10997      Results.Add(new Result(NearestNeighbourClassificationModelResultName, "The nearest neighbour classification solution.", solution));
    11098    }
    11199
    112     public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
     100    public static IClassificationSolution CreateNearestNeighbourClassificationSolution(IClassificationProblemData problemData, int k, double[] weights = null) {
    113101      var problemDataClone = (IClassificationProblemData)problemData.Clone();
    114       return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, selfMatch, weights), problemDataClone);
     102      return new NearestNeighbourClassificationSolution(Train(problemDataClone, k, weights), problemDataClone);
    115103    }
    116104
    117     public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
     105    public static INearestNeighbourModel Train(IClassificationProblemData problemData, int k, double[] weights = null) {
    118106      return new NearestNeighbourModel(problemData.Dataset,
    119107        problemData.TrainingIndices,
    120108        k,
    121         selfMatch,
    122109        problemData.TargetVariable,
    123110        problemData.AllowedInputVariables,
  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs

    r17180 r18006  
    3232  /// Represents a nearest neighbour model for regression and classification
    3333  /// </summary>
    34   [StorableType("A76C0823-3077-4ACE-8A40-E9B717C7DB60")]
     34  [StorableType("04A07DF6-6EB5-4D29-B7AE-5BE204CAF6BC")]
    3535  [Item("NearestNeighbourModel", "Represents a nearest neighbour model for regression and classification.")]
    3636  public sealed class NearestNeighbourModel : ClassificationModel, INearestNeighbourModel {
    3737
    38     private readonly object kdTreeLockObject = new object();
    39 
    40     private alglib.nearestneighbor.kdtree kdTree;
    41     public alglib.nearestneighbor.kdtree KDTree {
    42       get { return kdTree; }
    43       set {
    44         if (value != kdTree) {
    45           if (value == null) throw new ArgumentNullException();
    46           kdTree = value;
    47           OnChanged(EventArgs.Empty);
    48         }
    49       }
     38    private alglib.knnmodel model;
     39    [Storable]
     40    private string SerializedModel {
     41      get { alglib.knnserialize(model, out var ser); return ser; }
     42      set { if (value != null) alglib.knnunserialize(value, out model); }
    5043    }
    5144
     
    6053    [Storable]
    6154    private int k;
    62     [Storable(DefaultValue = false)]
    63     private bool selfMatch;
    64     [Storable(DefaultValue = null)]
    65     private double[] weights; // not set for old versions loaded from disk
    66     [Storable(DefaultValue = null)]
    67     private double[] offsets; // not set for old versions loaded from disk
     55    [Storable]
     56    private double[] weights;
     57    [Storable]
     58    private double[] offsets;
    6859
    6960    [StorableConstructor]
    70     private NearestNeighbourModel(StorableConstructorFlag _) : base(_) {
    71       kdTree = new alglib.nearestneighbor.kdtree();
    72     }
     61    private NearestNeighbourModel(StorableConstructorFlag _) : base(_) { }
    7362    private NearestNeighbourModel(NearestNeighbourModel original, Cloner cloner)
    7463      : base(original, cloner) {
    75       kdTree = new alglib.nearestneighbor.kdtree();
    76       kdTree.approxf = original.kdTree.approxf;
    77       kdTree.boxmax = (double[])original.kdTree.boxmax.Clone();
    78       kdTree.boxmin = (double[])original.kdTree.boxmin.Clone();
    79       kdTree.buf = (double[])original.kdTree.buf.Clone();
    80       kdTree.curboxmax = (double[])original.kdTree.curboxmax.Clone();
    81       kdTree.curboxmin = (double[])original.kdTree.curboxmin.Clone();
    82       kdTree.curdist = original.kdTree.curdist;
    83       kdTree.debugcounter = original.kdTree.debugcounter;
    84       kdTree.idx = (int[])original.kdTree.idx.Clone();
    85       kdTree.kcur = original.kdTree.kcur;
    86       kdTree.kneeded = original.kdTree.kneeded;
    87       kdTree.n = original.kdTree.n;
    88       kdTree.nodes = (int[])original.kdTree.nodes.Clone();
    89       kdTree.normtype = original.kdTree.normtype;
    90       kdTree.nx = original.kdTree.nx;
    91       kdTree.ny = original.kdTree.ny;
    92       kdTree.r = (double[])original.kdTree.r.Clone();
    93       kdTree.rneeded = original.kdTree.rneeded;
    94       kdTree.selfmatch = original.kdTree.selfmatch;
    95       kdTree.splits = (double[])original.kdTree.splits.Clone();
    96       kdTree.tags = (int[])original.kdTree.tags.Clone();
    97       kdTree.x = (double[])original.kdTree.x.Clone();
    98       kdTree.xy = (double[,])original.kdTree.xy.Clone();
    99       selfMatch = original.selfMatch;
     64      if (original.model != null)
     65        model = (alglib.knnmodel)original.model.make_copy();
    10066      k = original.k;
    101       isCompatibilityLoaded = original.IsCompatibilityLoaded;
    102       if (!IsCompatibilityLoaded) {
    103         weights = new double[original.weights.Length];
    104         Array.Copy(original.weights, weights, weights.Length);
    105         offsets = new double[original.offsets.Length];
    106         Array.Copy(original.offsets, this.offsets, this.offsets.Length);
    107       }
     67      weights = new double[original.weights.Length];
     68      Array.Copy(original.weights, weights, weights.Length);
     69      offsets = new double[original.offsets.Length];
     70      Array.Copy(original.offsets, this.offsets, this.offsets.Length);
     71
    10872      allowedInputVariables = (string[])original.allowedInputVariables.Clone();
    10973      if (original.classValues != null)
    11074        this.classValues = (double[])original.classValues.Clone();
    11175    }
    112     public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, bool selfMatch, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)
     76    public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<double> weights = null, double[] classValues = null)
    11377      : base(targetVariable) {
    11478      Name = ItemName;
    11579      Description = ItemDescription;
    116       this.selfMatch = selfMatch;
    11780      this.k = k;
    11881      this.allowedInputVariables = allowedInputVariables.ToArray();
    11982      double[,] inputMatrix;
    120       if (IsCompatibilityLoaded) {
    121         // no scaling
    122         inputMatrix = dataset.ToArray(
    123           this.allowedInputVariables.Concat(new string[] { targetVariable }),
    124           rows);
     83      this.offsets = this.allowedInputVariables
     84        .Select(name => dataset.GetDoubleValues(name, rows).Average() * -1)
     85        .Concat(new double[] { 0 }) // no offset for target variable
     86        .ToArray();
     87      if (weights == null) {
     88        // automatic determination of weights (all features should have variance = 1)
     89        this.weights = this.allowedInputVariables
     90          .Select(name => {
     91            var pop = dataset.GetDoubleValues(name, rows).StandardDeviationPop();
     92            return pop.IsAlmost(0) ? 1.0 : 1.0 / pop;
     93          })
     94          .Concat(new double[] { 1.0 }) // no scaling for target variable
     95          .ToArray();
    12596      } else {
    126         this.offsets = this.allowedInputVariables
    127           .Select(name => dataset.GetDoubleValues(name, rows).Average() * -1)
    128           .Concat(new double[] { 0 }) // no offset for target variable
    129           .ToArray();
    130         if (weights == null) {
    131           // automatic determination of weights (all features should have variance = 1)
    132           this.weights = this.allowedInputVariables
    133             .Select(name => {
    134               var pop = dataset.GetDoubleValues(name, rows).StandardDeviationPop();
    135               return pop.IsAlmost(0) ? 1.0 : 1.0 / pop;
    136             })
    137             .Concat(new double[] { 1.0 }) // no scaling for target variable
    138             .ToArray();
    139         } else {
    140           // user specified weights (+ 1 for target)
    141           this.weights = weights.Concat(new double[] { 1.0 }).ToArray();
    142           if (this.weights.Length - 1 != this.allowedInputVariables.Length)
    143             throw new ArgumentException("The number of elements in the weight vector must match the number of input variables");
    144         }
    145         inputMatrix = CreateScaledData(dataset, this.allowedInputVariables.Concat(new string[] { targetVariable }), rows, this.offsets, this.weights);
    146       }
     97        // user specified weights (+ 1 for target)
     98        this.weights = weights.Concat(new double[] { 1.0 }).ToArray();
     99        if (this.weights.Length - 1 != this.allowedInputVariables.Length)
     100          throw new ArgumentException("The number of elements in the weight vector must match the number of input variables");
     101      }
     102      inputMatrix = CreateScaledData(dataset, this.allowedInputVariables.Concat(new string[] { targetVariable }), rows, this.offsets, this.weights);
    147103
    148104      if (inputMatrix.ContainsNanOrInfinity())
    149105        throw new NotSupportedException(
    150106          "Nearest neighbour model does not support NaN or infinity values in the input dataset.");
    151 
    152       this.kdTree = new alglib.nearestneighbor.kdtree();
    153107
    154108      var nRows = inputMatrix.GetLength(0);
     
    167121        }
    168122      }
    169       alglib.nearestneighbor.kdtreebuild(inputMatrix, nRows, inputMatrix.GetLength(1) - 1, 1, 2, kdTree);
     123
     124      alglib.knnbuildercreate(out var knnbuilder);
     125      if (classValues == null) {
     126        alglib.knnbuildersetdatasetreg(knnbuilder, inputMatrix, nRows, nFeatures, nout: 1);
     127      } else {
     128        alglib.knnbuildersetdatasetcls(knnbuilder, inputMatrix, nRows, nFeatures, classValues.Length);
     129      }
     130      alglib.knnbuilderbuildknnmodel(knnbuilder, k, 0.0, out model, out var report); // eps=0 (exact k-nn search is performed)
     131
    170132    }
    171133
     
    184146    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    185147      double[,] inputData;
    186       if (IsCompatibilityLoaded) {
    187         inputData = dataset.ToArray(allowedInputVariables, rows);
    188       } else {
    189         inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights);
    190       }
     148      inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights);
    191149
    192150      int n = inputData.GetLength(0);
    193151      int columns = inputData.GetLength(1);
    194152      double[] x = new double[columns];
    195       double[] dists = new double[k];
    196       double[,] neighbours = new double[k, columns + 1];
    197 
     153
     154      alglib.knncreatebuffer(model, out var buf);
     155      var y = new double[1];
    198156      for (int row = 0; row < n; row++) {
    199157        for (int column = 0; column < columns; column++) {
    200158          x[column] = inputData[row, column];
    201159        }
    202         int numNeighbours;
    203         lock (kdTreeLockObject) { // gkronber: the following calls change the kdTree data structure
    204           numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch);
    205           alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists);
    206           alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours);
    207         }
    208         if (selfMatch) {
    209           // weights for neighbours are 1/d.
    210           // override distances (=0) of exact matches using 1% of the distance of the next closest non-self-match neighbour -> selfmatches weight 100x more than the next closest neighbor.
    211           // if all k neighbours are selfmatches then they all have weight 0.01.
    212           double minDist = dists[0] + 1;
    213           for (int i = 0; i < numNeighbours; i++) {
    214             if ((minDist > dists[i]) && (dists[i] != 0)) {
    215               minDist = dists[i];
    216             }
    217           }
    218           minDist /= 100.0;
    219           for (int i = 0; i < numNeighbours; i++) {
    220             if (dists[i] == 0) {
    221               dists[i] = minDist;
    222             }
    223           }
    224         }
    225         double distanceWeightedValue = 0.0;
    226         double distsSum = 0.0;
    227         for (int i = 0; i < numNeighbours; i++) {
    228           distanceWeightedValue += neighbours[i, columns] / dists[i];
    229           distsSum += 1.0 / dists[i];
    230         }
    231         yield return distanceWeightedValue / distsSum;
     160        alglib.knntsprocess(model, buf, x, ref y); // thread-safe process
     161        yield return y[0];
    232162      }
    233163    }
     
    236166      if (classValues == null) throw new InvalidOperationException("No class values are defined.");
    237167      double[,] inputData;
    238       if (IsCompatibilityLoaded) {
    239         inputData = dataset.ToArray(allowedInputVariables, rows);
    240       } else {
    241         inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights);
    242       }
     168      inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights);
     169
    243170      int n = inputData.GetLength(0);
    244171      int columns = inputData.GetLength(1);
    245172      double[] x = new double[columns];
    246       int[] y = new int[classValues.Length];
    247       double[] dists = new double[k];
    248       double[,] neighbours = new double[k, columns + 1];
    249 
     173
     174      alglib.knncreatebuffer(model, out var buf);
     175      var y = new double[classValues.Length];
    250176      for (int row = 0; row < n; row++) {
    251177        for (int column = 0; column < columns; column++) {
    252178          x[column] = inputData[row, column];
    253179        }
    254         int numNeighbours;
    255         lock (kdTreeLockObject) {
    256           // gkronber: the following calls change the kdTree data structure
    257           numNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, selfMatch);
    258           alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists);
    259           alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours);
    260         }
    261         Array.Clear(y, 0, y.Length);
    262         for (int i = 0; i < numNeighbours; i++) {
    263           int classValue = (int)Math.Round(neighbours[i, columns]);
    264           y[classValue]++;
    265         }
    266 
    267         // find class for with the largest probability value
    268         int maxProbClassIndex = 0;
    269         double maxProb = y[0];
    270         for (int i = 1; i < y.Length; i++) {
    271           if (maxProb < y[i]) {
    272             maxProb = y[i];
    273             maxProbClassIndex = i;
    274           }
    275         }
    276         yield return classValues[maxProbClassIndex];
     180        alglib.knntsprocess(model, buf, x, ref y); // thread-safe process
     181        // find most probably class
     182        var maxC = 0;
     183        for (int i = 1; i < y.Length; i++)
     184          if (maxC < y[i]) maxC = i;
     185        yield return classValues[maxC];
    277186      }
    278187    }
     
    303212      return new NearestNeighbourClassificationSolution(this, new ClassificationProblemData(problemData));
    304213    }
    305 
    306     #region events
    307     public event EventHandler Changed;
    308     private void OnChanged(EventArgs e) {
    309       var handlers = Changed;
    310       if (handlers != null)
    311         handlers(this, e);
    312     }
    313     #endregion
    314 
    315 
    316     // BackwardsCompatibility3.3
    317     #region Backwards compatible code, remove with 3.4
    318 
    319     private bool isCompatibilityLoaded = false; // new kNN models have the value false, kNN models loaded from disc have the value true
    320     [Storable(DefaultValue = true)]
    321     public bool IsCompatibilityLoaded {
    322       get { return isCompatibilityLoaded; }
    323       set { isCompatibilityLoaded = value; }
    324     }
    325     #endregion
    326     #region persistence
    327     [Storable]
    328     public double KDTreeApproxF {
    329       get { return kdTree.approxf; }
    330       set { kdTree.approxf = value; }
    331     }
    332     [Storable]
    333     public double[] KDTreeBoxMax {
    334       get { return kdTree.boxmax; }
    335       set { kdTree.boxmax = value; }
    336     }
    337     [Storable]
    338     public double[] KDTreeBoxMin {
    339       get { return kdTree.boxmin; }
    340       set { kdTree.boxmin = value; }
    341     }
    342     [Storable]
    343     public double[] KDTreeBuf {
    344       get { return kdTree.buf; }
    345       set { kdTree.buf = value; }
    346     }
    347     [Storable]
    348     public double[] KDTreeCurBoxMax {
    349       get { return kdTree.curboxmax; }
    350       set { kdTree.curboxmax = value; }
    351     }
    352     [Storable]
    353     public double[] KDTreeCurBoxMin {
    354       get { return kdTree.curboxmin; }
    355       set { kdTree.curboxmin = value; }
    356     }
    357     [Storable]
    358     public double KDTreeCurDist {
    359       get { return kdTree.curdist; }
    360       set { kdTree.curdist = value; }
    361     }
    362     [Storable]
    363     public int KDTreeDebugCounter {
    364       get { return kdTree.debugcounter; }
    365       set { kdTree.debugcounter = value; }
    366     }
    367     [Storable]
    368     public int[] KDTreeIdx {
    369       get { return kdTree.idx; }
    370       set { kdTree.idx = value; }
    371     }
    372     [Storable]
    373     public int KDTreeKCur {
    374       get { return kdTree.kcur; }
    375       set { kdTree.kcur = value; }
    376     }
    377     [Storable]
    378     public int KDTreeKNeeded {
    379       get { return kdTree.kneeded; }
    380       set { kdTree.kneeded = value; }
    381     }
    382     [Storable]
    383     public int KDTreeN {
    384       get { return kdTree.n; }
    385       set { kdTree.n = value; }
    386     }
    387     [Storable]
    388     public int[] KDTreeNodes {
    389       get { return kdTree.nodes; }
    390       set { kdTree.nodes = value; }
    391     }
    392     [Storable]
    393     public int KDTreeNormType {
    394       get { return kdTree.normtype; }
    395       set { kdTree.normtype = value; }
    396     }
    397     [Storable]
    398     public int KDTreeNX {
    399       get { return kdTree.nx; }
    400       set { kdTree.nx = value; }
    401     }
    402     [Storable]
    403     public int KDTreeNY {
    404       get { return kdTree.ny; }
    405       set { kdTree.ny = value; }
    406     }
    407     [Storable]
    408     public double[] KDTreeR {
    409       get { return kdTree.r; }
    410       set { kdTree.r = value; }
    411     }
    412     [Storable]
    413     public double KDTreeRNeeded {
    414       get { return kdTree.rneeded; }
    415       set { kdTree.rneeded = value; }
    416     }
    417     [Storable]
    418     public bool KDTreeSelfMatch {
    419       get { return kdTree.selfmatch; }
    420       set { kdTree.selfmatch = value; }
    421     }
    422     [Storable]
    423     public double[] KDTreeSplits {
    424       get { return kdTree.splits; }
    425       set { kdTree.splits = value; }
    426     }
    427     [Storable]
    428     public int[] KDTreeTags {
    429       get { return kdTree.tags; }
    430       set { kdTree.tags = value; }
    431     }
    432     [Storable]
    433     public double[] KDTreeX {
    434       get { return kdTree.x; }
    435       set { kdTree.x = value; }
    436     }
    437     [Storable]
    438     public double[,] KDTreeXY {
    439       get { return kdTree.xy; }
    440       set { kdTree.xy = value; }
    441     }
    442     #endregion
    443214  }
    444215}
  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs

    r17180 r18006  
    4141    private const string NearestNeighbourRegressionModelResultName = "Nearest neighbour regression solution";
    4242    private const string WeightsParameterName = "Weights";
    43     private const string SelfMatchParameterName = "SelfMatch";
    4443
    4544    #region parameter properties
    4645    public IFixedValueParameter<IntValue> KParameter {
    4746      get { return (IFixedValueParameter<IntValue>)Parameters[KParameterName]; }
    48     }
    49     public IFixedValueParameter<BoolValue> SelfMatchParameter {
    50       get { return (IFixedValueParameter<BoolValue>)Parameters[SelfMatchParameterName]; }
    5147    }
    5248    public IValueParameter<DoubleArray> WeightsParameter {
     
    6157        else KParameter.Value.Value = value;
    6258      }
    63     }
    64     public bool SelfMatch {
    65       get { return SelfMatchParameter.Value.Value; }
    66       set { SelfMatchParameter.Value.Value = value; }
    6759    }
    6860    public DoubleArray Weights {
     
    8173      Parameters.Add(new FixedValueParameter<IntValue>(KParameterName, "The number of nearest neighbours to consider for regression.", new IntValue(3)));
    8274      Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
    83       Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    8475      Problem = new RegressionProblem();
    8576    }
     
    9182      if (!Parameters.ContainsKey(WeightsParameterName)) {
    9283        Parameters.Add(new OptionalValueParameter<DoubleArray>(WeightsParameterName, "Optional: use weights to specify individual scaling values for all features. If not set the weights are calculated automatically (each feature is scaled to unit variance)"));
    93       }
    94       if (!Parameters.ContainsKey(SelfMatchParameterName)) {
    95         Parameters.Add(new FixedValueParameter<BoolValue>(SelfMatchParameterName, "Should we use equal points for classification?", new BoolValue(false)));
    9684      }
    9785      #endregion
     
    10694      double[] weights = null;
    10795      if (Weights != null) weights = Weights.CloneAsArray();
    108       var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, SelfMatch, weights);
     96      var solution = CreateNearestNeighbourRegressionSolution(Problem.ProblemData, K, weights);
    10997      Results.Add(new Result(NearestNeighbourRegressionModelResultName, "The nearest neighbour regression solution.", solution));
    11098    }
    11199
    112     public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
     100    public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, double[] weights = null) {
    113101      var clonedProblemData = (IRegressionProblemData)problemData.Clone();
    114       return new NearestNeighbourRegressionSolution(Train(problemData, k, selfMatch, weights), clonedProblemData);
     102      return new NearestNeighbourRegressionSolution(Train(problemData, k, weights), clonedProblemData);
    115103    }
    116104
    117     public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, bool selfMatch = false, double[] weights = null) {
     105    public static INearestNeighbourModel Train(IRegressionProblemData problemData, int k, double[] weights = null) {
    118106      return new NearestNeighbourModel(problemData.Dataset,
    119107        problemData.TrainingIndices,
    120108        k,
    121         selfMatch,
    122109        problemData.TargetVariable,
    123110        problemData.AllowedInputVariables,
  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkClassification.cs

    r17180 r18006  
    209209        alglib.mlpcreatec2(allowedInputVariables.Count(), nHiddenNodes1, nHiddenNodes2, nClasses, out multiLayerPerceptron);
    210210      } else throw new ArgumentException("Number of layers must be zero, one, or two.", "nLayers");
     211
    211212      alglib.mlpreport rep;
    212 
    213213      int info;
    214214      // using mlptrainlm instead of mlptraines or mlptrainbfgs because only one parameter is necessary
  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkModel.cs

    r17180 r18006  
    2020#endregion
    2121
     22extern alias alglib_3_7;
    2223using System;
    2324using System.Collections.Generic;
     
    3233  /// Represents a neural network model for regression and classification
    3334  /// </summary>
    34   [StorableType("AEB9B960-FCA6-4A6D-BD5F-27BCE9CC5BEA")]
     35  [StorableType("DABDBD64-E93B-4F50-A343-C8A92C1C48A4")]
    3536  [Item("NeuralNetworkModel", "Represents a neural network for regression and classification.")]
    3637  public sealed class NeuralNetworkModel : ClassificationModel, INeuralNetworkModel {
    3738
    3839    private object mlpLocker = new object();
     40
     41
     42
    3943    private alglib.multilayerperceptron multiLayerPerceptron;
     44    [Storable]
     45    private string SerializedMultiLayerPerceptron {
     46      get { alglib.mlpserialize(multiLayerPerceptron, out var ser); return ser; }
     47      set { if (value != null) alglib.mlpunserialize(value, out multiLayerPerceptron); }
     48    }
    4049
    4150    public override IEnumerable<string> VariablesUsedForPrediction {
     
    4857    private double[] classValues;
    4958    [StorableConstructor]
    50     private NeuralNetworkModel(StorableConstructorFlag _) : base(_) {
    51       multiLayerPerceptron = new alglib.multilayerperceptron();
    52     }
     59    private NeuralNetworkModel(StorableConstructorFlag _) : base(_) { }
    5360    private NeuralNetworkModel(NeuralNetworkModel original, Cloner cloner)
    5461      : base(original, cloner) {
    55       multiLayerPerceptron = new alglib.multilayerperceptron();
    56       multiLayerPerceptron.innerobj.chunks = (double[,])original.multiLayerPerceptron.innerobj.chunks.Clone();
    57       multiLayerPerceptron.innerobj.columnmeans = (double[])original.multiLayerPerceptron.innerobj.columnmeans.Clone();
    58       multiLayerPerceptron.innerobj.columnsigmas = (double[])original.multiLayerPerceptron.innerobj.columnsigmas.Clone();
    59       multiLayerPerceptron.innerobj.derror = (double[])original.multiLayerPerceptron.innerobj.derror.Clone();
    60       multiLayerPerceptron.innerobj.dfdnet = (double[])original.multiLayerPerceptron.innerobj.dfdnet.Clone();
    61       multiLayerPerceptron.innerobj.neurons = (double[])original.multiLayerPerceptron.innerobj.neurons.Clone();
    62       multiLayerPerceptron.innerobj.nwbuf = (double[])original.multiLayerPerceptron.innerobj.nwbuf.Clone();
    63       multiLayerPerceptron.innerobj.structinfo = (int[])original.multiLayerPerceptron.innerobj.structinfo.Clone();
    64       multiLayerPerceptron.innerobj.weights = (double[])original.multiLayerPerceptron.innerobj.weights.Clone();
    65       multiLayerPerceptron.innerobj.x = (double[])original.multiLayerPerceptron.innerobj.x.Clone();
    66       multiLayerPerceptron.innerobj.y = (double[])original.multiLayerPerceptron.innerobj.y.Clone();
     62      if (original.multiLayerPerceptron != null)
     63        multiLayerPerceptron = (alglib.multilayerperceptron)original.multiLayerPerceptron.make_copy();
    6764      allowedInputVariables = (string[])original.allowedInputVariables.Clone();
    6865      if (original.classValues != null)
     
    7370      this.name = ItemName;
    7471      this.description = ItemDescription;
    75       this.multiLayerPerceptron = multiLayerPerceptron;
     72      this.multiLayerPerceptron = (alglib.multilayerperceptron)multiLayerPerceptron.make_copy();
    7673      this.allowedInputVariables = allowedInputVariables.ToArray();
    7774      if (classValues != null)
     
    9592          x[column] = inputData[row, column];
    9693        }
    97         // NOTE: mlpprocess changes data in multiLayerPerceptron and is therefore not thread-save!
     94        // NOTE: mlpprocess changes data in multiLayerPerceptron and is therefore not thread-safe!
    9895        lock (mlpLocker) {
    9996          alglib.mlpprocess(multiLayerPerceptron, x, ref y);
     
    115112          x[column] = inputData[row, column];
    116113        }
    117         // NOTE: mlpprocess changes data in multiLayerPerceptron and is therefore not thread-save!
     114        // NOTE: mlpprocess changes data in multiLayerPerceptron and is therefore not thread-safe!
    118115        lock (mlpLocker) {
    119116          alglib.mlpprocess(multiLayerPerceptron, x, ref y);
     
    156153      return new NeuralNetworkClassificationSolution(this, new ClassificationProblemData(problemData));
    157154    }
    158 
    159     #region persistence
    160     [Storable]
    161     private double[,] MultiLayerPerceptronChunks {
    162       get {
    163         return multiLayerPerceptron.innerobj.chunks;
    164       }
    165       set {
    166         multiLayerPerceptron.innerobj.chunks = value;
    167       }
    168     }
    169     [Storable]
    170     private double[] MultiLayerPerceptronColumnMeans {
    171       get {
    172         return multiLayerPerceptron.innerobj.columnmeans;
    173       }
    174       set {
    175         multiLayerPerceptron.innerobj.columnmeans = value;
    176       }
    177     }
    178     [Storable]
    179     private double[] MultiLayerPerceptronColumnSigmas {
    180       get {
    181         return multiLayerPerceptron.innerobj.columnsigmas;
    182       }
    183       set {
    184         multiLayerPerceptron.innerobj.columnsigmas = value;
    185       }
    186     }
    187     [Storable]
    188     private double[] MultiLayerPerceptronDError {
    189       get {
    190         return multiLayerPerceptron.innerobj.derror;
    191       }
    192       set {
    193         multiLayerPerceptron.innerobj.derror = value;
    194       }
    195     }
    196     [Storable]
    197     private double[] MultiLayerPerceptronDfdnet {
    198       get {
    199         return multiLayerPerceptron.innerobj.dfdnet;
    200       }
    201       set {
    202         multiLayerPerceptron.innerobj.dfdnet = value;
    203       }
    204     }
    205     [Storable]
    206     private double[] MultiLayerPerceptronNeurons {
    207       get {
    208         return multiLayerPerceptron.innerobj.neurons;
    209       }
    210       set {
    211         multiLayerPerceptron.innerobj.neurons = value;
    212       }
    213     }
    214     [Storable]
    215     private double[] MultiLayerPerceptronNwbuf {
    216       get {
    217         return multiLayerPerceptron.innerobj.nwbuf;
    218       }
    219       set {
    220         multiLayerPerceptron.innerobj.nwbuf = value;
    221       }
    222     }
    223     [Storable]
    224     private int[] MultiLayerPerceptronStuctinfo {
    225       get {
    226         return multiLayerPerceptron.innerobj.structinfo;
    227       }
    228       set {
    229         multiLayerPerceptron.innerobj.structinfo = value;
    230       }
    231     }
    232     [Storable]
    233     private double[] MultiLayerPerceptronWeights {
    234       get {
    235         return multiLayerPerceptron.innerobj.weights;
    236       }
    237       set {
    238         multiLayerPerceptron.innerobj.weights = value;
    239       }
    240     }
    241     [Storable]
    242     private double[] MultiLayerPerceptronX {
    243       get {
    244         return multiLayerPerceptron.innerobj.x;
    245       }
    246       set {
    247         multiLayerPerceptron.innerobj.x = value;
    248       }
    249     }
    250     [Storable]
    251     private double[] MultiLayerPerceptronY {
    252       get {
    253         return multiLayerPerceptron.innerobj.y;
    254       }
    255       set {
    256         multiLayerPerceptron.innerobj.y = value;
    257       }
    258     }
    259     #endregion
    260155  }
    261156}
  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkRegression.cs

    r17180 r18006  
    186186      IEnumerable<int> rows = problemData.TrainingIndices;
    187187      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     188      int nRows = inputMatrix.GetLength(0);
    188189      if (inputMatrix.ContainsNanOrInfinity())
    189190        throw new NotSupportedException("Neural network regression does not support NaN or infinity values in the input dataset.");
     
    197198        alglib.mlpcreate2(allowedInputVariables.Count(), nHiddenNodes1, nHiddenNodes2, 1, out multiLayerPerceptron);
    198199      } else throw new ArgumentException("Number of layers must be zero, one, or two.", "nLayers");
    199       alglib.mlpreport rep;
    200       int nRows = inputMatrix.GetLength(0);
    201200
    202201      int info;
    203202      // using mlptrainlm instead of mlptraines or mlptrainbfgs because only one parameter is necessary
    204       alglib.mlptrainlm(multiLayerPerceptron, inputMatrix, nRows, decay, restarts, out info, out rep);
     203      alglib.mlptrainlm(multiLayerPerceptron, inputMatrix, nRows, decay, restarts, out info, out _);
    205204      if (info != 2) throw new ArgumentException("Error in calculation of neural network regression solution");
    206205
  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/Plugin.cs.frame

    r17184 r18006  
    2929  [PluginFile("HeuristicLab.Algorithms.DataAnalysis-3.4.dll", PluginFileType.Assembly)]
    3030  [PluginDependency("HeuristicLab.ALGLIB", "3.7.0")]
     31  [PluginDependency("HeuristicLab.ALGLIB", "3.17")]
    3132  [PluginDependency("HeuristicLab.Algorithms.OffspringSelectionGeneticAlgorithm", "3.3")] // for GBM
    3233  [PluginDependency("HeuristicLab.Algorithms.GradientDescent", "3.3")]
  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassification.cs

    r17180 r18006  
    207207        inputMatrix[row, nColumns - 1] = classIndices[inputMatrix[row, nColumns - 1]];
    208208      }
    209 
    210       alglib.dfreport rep;
    211       var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out rep);
     209     
     210      var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out var rep);
    212211
    213212      rmsError = rep.rmserror;
     
    216215      outOfBagRelClassificationError = rep.oobrelclserror;
    217216
    218       return new RandomForestModelFull(dForest, problemData.TargetVariable, problemData.AllowedInputVariables, classValues);
     217      return new RandomForestModelFull(dForest, nTrees, problemData.TargetVariable, problemData.AllowedInputVariables, classValues);
    219218    }
    220219    #endregion
  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs

    r17180 r18006  
    2020#endregion
    2121
     22extern alias alglib_3_7;
     23
    2224using System;
    2325using System.Collections.Generic;
     
    3941  public sealed class RandomForestModel : ClassificationModel, IRandomForestModel {
    4042    // not persisted
    41     private alglib.decisionforest randomForest;
    42     private alglib.decisionforest RandomForest {
     43    private alglib_3_7.alglib.decisionforest randomForest;
     44    private alglib_3_7.alglib.decisionforest RandomForest {
    4345      get {
    4446        // recalculate lazily
     
    7476    private RandomForestModel(StorableConstructorFlag _) : base(_) {
    7577      // for backwards compatibility (loading old solutions)
    76       randomForest = new alglib.decisionforest();
     78      randomForest = new alglib_3_7.alglib.decisionforest();
    7779    }
    7880    private RandomForestModel(RandomForestModel original, Cloner cloner)
    7981      : base(original, cloner) {
    80       randomForest = new alglib.decisionforest();
     82      randomForest = new alglib_3_7.alglib.decisionforest();
    8183      randomForest.innerobj.bufsize = original.randomForest.innerobj.bufsize;
    8284      randomForest.innerobj.nclasses = original.randomForest.innerobj.nclasses;
     
    100102
    101103    // random forest models can only be created through the static factory methods CreateRegressionModel and CreateClassificationModel
    102     private RandomForestModel(string targetVariable, alglib.decisionforest randomForest,
     104    private RandomForestModel(string targetVariable, alglib_3_7.alglib.decisionforest randomForest,
    103105      int seed, IDataAnalysisProblemData originalTrainingData,
    104106      int nTrees, double r, double m, double[] classValues = null)
     
    151153          x[column] = inputData[row, column];
    152154        }
    153         alglib.dfprocess(RandomForest, x, ref y);
     155        alglib_3_7.alglib.dfprocess(RandomForest, x, ref y);
    154156        yield return y[0];
    155157      }
     
    169171          x[column] = inputData[row, column];
    170172        }
    171         alglib.dforest.dfprocessraw(RandomForest.innerobj, x, ref ys);
     173        alglib_3_7.alglib.dforest.dfprocessraw(RandomForest.innerobj, x, ref ys);
    172174        yield return ys.VariancePop();
    173175      }
     
    187189          x[column] = inputData[row, column];
    188190        }
    189         alglib.dfprocess(randomForest, x, ref y);
     191        alglib_3_7.alglib.dfprocess(randomForest, x, ref y);
    190192        // find class for with the largest probability value
    191193        int maxProbClassIndex = 0;
     
    315317      double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices);
    316318
    317       alglib.dfreport rep;
    318       var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, 1, out rep);
     319      var dForest = RandomForestUtil.CreateRandomForestModelAlglib_3_7(seed, inputMatrix, nTrees, r, m, 1, out var rep);
    319320
    320321      rmsError = rep.rmserror;
     
    353354      }
    354355
    355       alglib.dfreport rep;
    356       var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out rep);
     356      var dForest = RandomForestUtil.CreateRandomForestModelAlglib_3_7(seed, inputMatrix, nTrees, r, m, nClasses, out var rep);
    357357
    358358      rmsError = rep.rmserror;
  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModelFull.cs

    r17180 r18006  
    3131
    3232namespace HeuristicLab.Algorithms.DataAnalysis {
    33   [StorableType("9C797DF0-1169-4381-A732-6DAB90802839")]
     33  [StorableType("55412E08-DAD4-4C2E-9181-C142E7EA9474")]
    3434  [Item("RandomForestModelFull", "Represents a random forest for regression and classification.")]
    3535  public sealed class RandomForestModelFull : ClassificationModel, IRandomForestModel {
     
    4242    private double[] classValues;
    4343
     44    public int NumClasses => classValues == null ? 0 : classValues.Length;
     45
    4446    [Storable]
    4547    private string[] inputVariables;
    4648
     49    [Storable]
    4750    public int NumberOfTrees {
    48       get { return RandomForestNTrees; }
     51      get; private set;
    4952    }
    5053
     
    5356
    5457    [Storable]
    55     private int RandomForestBufSize {
    56       get { return randomForest.innerobj.bufsize; }
    57       set { randomForest.innerobj.bufsize = value; }
    58     }
    59     [Storable]
    60     private int RandomForestNClasses {
    61       get { return randomForest.innerobj.nclasses; }
    62       set { randomForest.innerobj.nclasses = value; }
    63     }
    64     [Storable]
    65     private int RandomForestNTrees {
    66       get { return randomForest.innerobj.ntrees; }
    67       set { randomForest.innerobj.ntrees = value; }
    68     }
    69     [Storable]
    70     private int RandomForestNVars {
    71       get { return randomForest.innerobj.nvars; }
    72       set { randomForest.innerobj.nvars = value; }
    73     }
    74     [Storable]
    75     private double[] RandomForestTrees {
    76       get { return randomForest.innerobj.trees; }
    77       set { randomForest.innerobj.trees = value; }
     58    private string RandomForestSerialized {
     59      get { alglib.dfserialize(randomForest, out var serialized); return serialized; }
     60      set { if (value != null) alglib.dfunserialize(value, out randomForest); }
    7861    }
    7962
    8063    [StorableConstructor]
    81     private RandomForestModelFull(StorableConstructorFlag _) : base(_) {
    82       randomForest = new alglib.decisionforest();
    83     }
     64    private RandomForestModelFull(StorableConstructorFlag _) : base(_) { }
    8465
    8566    private RandomForestModelFull(RandomForestModelFull original, Cloner cloner) : base(original, cloner) {
    86       randomForest = new alglib.decisionforest();
    87       randomForest.innerobj.bufsize = original.randomForest.innerobj.bufsize;
    88       randomForest.innerobj.nclasses = original.randomForest.innerobj.nclasses;
    89       randomForest.innerobj.ntrees = original.randomForest.innerobj.ntrees;
    90       randomForest.innerobj.nvars = original.randomForest.innerobj.nvars;
    91       randomForest.innerobj.trees = (double[])original.randomForest.innerobj.trees.Clone();
     67      if (original.randomForest != null)
     68        randomForest = (alglib.decisionforest)original.randomForest.make_copy();
     69      NumberOfTrees = original.NumberOfTrees;
    9270
    9371      // following fields are immutable so we don't need to clone them
     
    9977    }
    10078
    101     public RandomForestModelFull(alglib.decisionforest decisionForest, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<double> classValues = null) : base(targetVariable) {
     79    public RandomForestModelFull(alglib.decisionforest decisionForest, int nTrees, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<double> classValues = null) : base(targetVariable) {
    10280      this.name = ItemName;
    10381      this.description = ItemDescription;
    10482
    105       randomForest = decisionForest;
     83      randomForest = (alglib.decisionforest)decisionForest.make_copy();
     84      NumberOfTrees = nTrees;
    10685
    10786      this.inputVariables = inputVariables.ToArray();
     
    147126      double[] y = new double[1];
    148127
     128      alglib.dfcreatebuffer(randomForest, out var buf);
    149129      for (int row = 0; row < n; row++) {
    150130        for (int column = 0; column < columns; column++) {
    151131          x[column] = inputData[row, column];
    152132        }
    153         alglib.dfprocess(randomForest, x, ref y);
     133        alglib.dftsprocess(randomForest, buf, x, ref y); // thread-safe process (as long as separate buffers are used)
    154134        yield return y[0];
    155135      }
     
    168148          x[column] = inputData[row, column];
    169149        }
    170         alglib.dforest.dfprocessraw(randomForest.innerobj, x, ref ys);
     150        lock (randomForest)
     151          alglib.dforest.dfprocessraw(randomForest.innerobj, x, ref ys, null);
    171152        yield return ys.VariancePop();
    172153      }
     
    180161      int columns = inputData.GetLength(1);
    181162      double[] x = new double[columns];
    182       double[] y = new double[randomForest.innerobj.nclasses];
    183 
     163      double[] y = new double[NumClasses];
     164
     165      alglib.dfcreatebuffer(randomForest, out var buf);
    184166      for (int row = 0; row < n; row++) {
    185167        for (int column = 0; column < columns; column++) {
    186168          x[column] = inputData[row, column];
    187169        }
    188         alglib.dfprocess(randomForest, x, ref y);
     170        alglib.dftsprocess(randomForest, buf, x, ref y);
    189171        // find class for with the largest probability value
    190172        int maxProbClassIndex = 0;
  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModelSurrogate.cs

    r17278 r18006  
    135135    }
    136136
     137    public override bool IsProblemDataCompatible(IDataAnalysisProblemData problemData, out string errorMessage) {
     138      return ActualModel.IsProblemDataCompatible(problemData, out errorMessage);
     139    }
     140
    137141    //RegressionModel methods
    138142    public bool IsProblemDataCompatible(IRegressionProblemData problemData, out string errorMessage) {
  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegression.cs

    r17180 r18006  
    2020#endregion
    2121
     22extern alias alglib_3_7;
     23using alglib_3_7;
    2224using System.Collections.Generic;
    2325using System.Linq;
     
    200202      outOfBagAvgRelError = rep.oobavgrelerror;
    201203
    202       return new RandomForestModelFull(dForest, problemData.TargetVariable, problemData.AllowedInputVariables);
     204      return new RandomForestModelFull(dForest, nTrees, problemData.TargetVariable, problemData.AllowedInputVariables);
    203205    }
    204206
  • branches/3087_Ceres_Integration/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestUtil.cs

    r17180 r18006  
    2222#endregion
    2323
     24extern alias alglib_3_7;
     25
    2426using System;
    2527using System.Collections.Generic;
     
    9597
    9698    public static void AssertInputMatrix(double[,] inputMatrix) {
    97       if (inputMatrix.ContainsNanOrInfinity())
    98         throw new NotSupportedException("Random forest modeling does not support NaN or infinity values in the input dataset.");
     99      foreach(var val in inputMatrix) if(double.IsNaN(val))
     100        throw new NotSupportedException("Random forest modeling does not support NaN values in the input dataset.");
    99101    }
    100102
     
    103105      RandomForestUtil.AssertInputMatrix(inputMatrix);
    104106
     107      int nRows = inputMatrix.GetLength(0);
     108      int nColumns = inputMatrix.GetLength(1);
     109
     110      alglib.dfbuildercreate(out var dfbuilder);
     111      alglib.dfbuildersetdataset(dfbuilder, inputMatrix, nRows, nColumns - 1, nClasses);
     112      alglib.dfbuildersetimportancenone(dfbuilder); // do not calculate importance (TODO add this feature)
     113      alglib.dfbuildersetrdfalgo(dfbuilder, 0); // only one algorithm supported in version 3.17
     114      alglib.dfbuildersetrdfsplitstrength(dfbuilder, 2); // 0 = split at the random position, fastest one
     115                                                         // 1 = split at the middle of the range
     116                                                         // 2 = strong split at the best point of the range (default)
     117      alglib.dfbuildersetrndvarsratio(dfbuilder, m);
     118      alglib.dfbuildersetsubsampleratio(dfbuilder, r);
     119      alglib.dfbuildersetseed(dfbuilder, seed);
     120      alglib.dfbuilderbuildrandomforest(dfbuilder, nTrees, out var dForest, out rep);
     121      return dForest;
     122    }
     123    internal static alglib_3_7.alglib.decisionforest CreateRandomForestModelAlglib_3_7(int seed, double[,] inputMatrix, int nTrees, double r, double m, int nClasses, out alglib_3_7.alglib.dfreport rep) {
     124      RandomForestUtil.AssertParameters(r, m);
     125      RandomForestUtil.AssertInputMatrix(inputMatrix);
     126
    105127      int info = 0;
    106       alglib.math.rndobject = new System.Random(seed);
    107       var dForest = new alglib.decisionforest();
    108       rep = new alglib.dfreport();
     128      alglib_3_7.alglib.math.rndobject = new System.Random(seed);
     129      var dForest = new alglib_3_7.alglib.decisionforest();
     130      rep = new alglib_3_7.alglib.dfreport();
    109131      int nRows = inputMatrix.GetLength(0);
    110132      int nColumns = inputMatrix.GetLength(1);
     
    112134      int nFeatures = Math.Max((int)Math.Round(m * (nColumns - 1)), 1);
    113135
    114       alglib.dforest.dfbuildinternal(inputMatrix, nRows, nColumns - 1, nClasses, nTrees, sampleSize, nFeatures, alglib.dforest.dfusestrongsplits + alglib.dforest.dfuseevs, ref info, dForest.innerobj, rep.innerobj);
     136      alglib_3_7.alglib.dforest.dfbuildinternal(inputMatrix, nRows, nColumns - 1, nClasses, nTrees, sampleSize, nFeatures, alglib_3_7.alglib.dforest.dfusestrongsplits + alglib_3_7.alglib.dforest.dfuseevs, ref info, dForest.innerobj, rep.innerobj);
    115137      if (info != 1) throw new ArgumentException("Error in calculation of random forest model");
    116138      return dForest;
     
    123145      var targetVariable = GetTargetVariableName(problemData);
    124146      foreach (var tuple in partitions) {
    125         double rmsError, avgRelError, outOfBagAvgRelError, outOfBagRmsError;
    126147        var trainingRandomForestPartition = tuple.Item1;
    127148        var testRandomForestPartition = tuple.Item2;
    128         var model = RandomForestModel.CreateRegressionModel(problemData, trainingRandomForestPartition, nTrees, r, m, seed, out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError);
     149        var model = RandomForestRegression.CreateRandomForestRegressionModel(problemData, trainingRandomForestPartition, nTrees, r, m, seed,
     150                                                                             out var rmsError, out var avgRelError, out var outOfBagRmsError, out var outOfBagAvgRelError);
    129151        var estimatedValues = model.GetEstimatedValues(ds, testRandomForestPartition);
    130152        var targetValues = ds.GetDoubleValues(targetVariable, testRandomForestPartition);
     
    143165      var targetVariable = GetTargetVariableName(problemData);
    144166      foreach (var tuple in partitions) {
    145         double rmsError, avgRelError, outOfBagAvgRelError, outOfBagRmsError;
    146167        var trainingRandomForestPartition = tuple.Item1;
    147168        var testRandomForestPartition = tuple.Item2;
    148         var model = RandomForestModel.CreateClassificationModel(problemData, trainingRandomForestPartition, nTrees, r, m, seed, out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError);
     169        var model = RandomForestClassification.CreateRandomForestClassificationModel(problemData, trainingRandomForestPartition, nTrees, r, m, seed,
     170                                                                                     out var rmsError, out var avgRelError, out var outOfBagRmsError, out var outOfBagAvgRelError);
    149171        var estimatedValues = model.GetEstimatedClassValues(ds, testRandomForestPartition);
    150172        var targetValues = ds.GetDoubleValues(targetVariable, testRandomForestPartition);
     
    176198        var parameters = new RFParameter();
    177199        for (int i = 0; i < setters.Count; ++i) { setters[i](parameters, parameterValues[i]); }
    178         double rmsError, outOfBagRmsError, avgRelError, outOfBagAvgRelError;
    179         RandomForestModel.CreateRegressionModel(problemData, problemData.TrainingIndices, parameters.N, parameters.R, parameters.M, seed, out rmsError, out outOfBagRmsError, out avgRelError, out outOfBagAvgRelError);
     200        RandomForestRegression.CreateRandomForestRegressionModel(problemData, problemData.TrainingIndices, parameters.N, parameters.R, parameters.M, seed,
     201                                                                 out var rmsError, out var avgRelError, out var outOfBagRmsError, out var outOfBagAvgRelError);
    180202
    181203        lock (locker) {
     
    208230        var parameters = new RFParameter();
    209231        for (int i = 0; i < setters.Count; ++i) { setters[i](parameters, parameterValues[i]); }
    210         double rmsError, outOfBagRmsError, avgRelError, outOfBagAvgRelError;
    211         RandomForestModel.CreateClassificationModel(problemData, problemData.TrainingIndices, parameters.N, parameters.R, parameters.M, seed,
    212                                                                 out rmsError, out outOfBagRmsError, out avgRelError, out outOfBagAvgRelError);
     232        RandomForestClassification.CreateRandomForestClassificationModel(problemData, problemData.TrainingIndices, parameters.N, parameters.R, parameters.M, seed,
     233                                                                         out var rmsError, out var avgRelError, out var outOfBagRmsError, out var outOfBagAvgRelError);
    213234
    214235        lock (locker) {
     
    227248    /// <param name="problemData">The regression problem data</param>
    228249    /// <param name="numberOfFolds">The number of folds for crossvalidation</param>
    229     /// <param name="shuffleFolds">Specifies whether the folds should be shuffled</param>
    230250    /// <param name="parameterRanges">The ranges for each parameter in the grid search</param>
    231251    /// <param name="seed">The random seed (required by the random forest model)</param>
    232252    /// <param name="maxDegreeOfParallelism">The maximum allowed number of threads (to parallelize the grid search)</param>
    233253    /// <returns>The best parameter values found by the grid search</returns>
    234     public static RFParameter GridSearch(IRegressionProblemData problemData, int numberOfFolds, bool shuffleFolds, Dictionary<string, IEnumerable<double>> parameterRanges, int seed = 12345, int maxDegreeOfParallelism = 1) {
     254    public static RFParameter GridSearch(IRegressionProblemData problemData, int numberOfFolds, Dictionary<string, IEnumerable<double>> parameterRanges, int seed = 12345, int maxDegreeOfParallelism = 1) {
    235255      DoubleValue mse = new DoubleValue(Double.MaxValue);
    236256      RFParameter bestParameter = new RFParameter();
Note: See TracChangeset for help on using the changeset viewer.