Changeset 16158


Ignore:
Timestamp:
09/20/18 13:12:17 (13 months ago)
Author:
pfleck
Message:

#2883 merged trunk

Location:
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis
Files:
1 deleted
25 edited
1 copied

Legend:

Unmodified
Added
Removed
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis

  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4

  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/GBM/GradientBoostingRegressionAlgorithm.cs

    r15583 r16158  
    210210    protected override void Run(CancellationToken cancellationToken) {
    211211      // Set up the algorithm
    212       if (SetSeedRandomly) Seed = new System.Random().Next();
     212      if (SetSeedRandomly) Seed = RandomSeedGenerator.GetSeed();
    213213      var rand = new MersenneTwister((uint)Seed);
    214214
     
    258258
    259259          modifiableDataset.RemoveVariable(targetVarName);
    260           modifiableDataset.AddVariable(targetVarName, curY.Concat(curYTest));
     260          modifiableDataset.AddVariable(targetVarName, curY.Concat(curYTest).ToList());
    261261
    262262          SampleTrainingData(rand, modifiableDataset, rRows, problemData.Dataset, curY, problemData.TargetVariable, problemData.TrainingIndices); // all training indices from the original problem data are allowed
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithm.cs

    r15732 r16158  
    223223    protected override void Run(CancellationToken cancellationToken) {
    224224      // Set up the algorithm
    225       if (SetSeedRandomly) Seed = new System.Random().Next();
     225      if (SetSeedRandomly) Seed = Random.RandomSeedGenerator.GetSeed();
    226226
    227227      // Set up the results display
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r15687 r16158  
    223223      <SubType>Code</SubType>
    224224    </Compile>
     225    <Compile Include="DoubleArrayExtensions.cs" />
    225226    <Compile Include="FixedDataAnalysisAlgorithm.cs" />
    226227    <Compile Include="GaussianProcess\CovarianceFunctions\CovarianceSpectralMixture.cs" />
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearDiscriminantAnalysis.cs

    r15583 r16158  
    8080      inputMatrix = factorMatrix.HorzCat(inputMatrix);
    8181
    82       if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
     82      if (inputMatrix.ContainsNanOrInfinity())
    8383        throw new NotSupportedException("Linear discriminant analysis does not support NaN or infinity values in the input dataset.");
    8484
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

    r15583 r16158  
    8080      var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix);
    8181
    82       if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
     82      if (inputMatrix.ContainsNanOrInfinity())
    8383        throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
    8484
     
    100100      int nVarCoeff = doubleVariables.Count();
    101101      var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(),
    102         doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(), 
     102        doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(),
    103103        @const: coefficients[nFeatures]);
    104      
     104
    105105      SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone());
    106106      solution.Model.Name = "Linear Regression Model";
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassification.cs

    r15583 r16158  
    7878      inputMatrix = factorMatrix.HorzCat(inputMatrix);
    7979
    80       if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
     80      if (inputMatrix.ContainsNanOrInfinity())
    8181        throw new NotSupportedException("Multinomial logit classification does not support NaN or infinity values in the input dataset.");
    8282
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaModel.cs

    r15583 r16158  
    6565
    6666      var ds = ReduceDataset(dataset, rows);
    67       nnModel = new NearestNeighbourModel(ds, Enumerable.Range(0, ds.Rows), k, ds.VariableNames.Last(), ds.VariableNames.Take(transformationMatrix.GetLength(1)), classValues);
     67      nnModel = new NearestNeighbourModel(ds, Enumerable.Range(0, ds.Rows), k, ds.VariableNames.Last(), ds.VariableNames.Take(transformationMatrix.GetLength(1)), classValues: classValues);
    6868    }
    6969
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs

    r15583 r16158  
    130130          // automatic determination of weights (all features should have variance = 1)
    131131          this.weights = this.allowedInputVariables
    132             .Select(name => 1.0 / dataset.GetDoubleValues(name, rows).StandardDeviationPop())
     132            .Select(name => {
     133              var pop = dataset.GetDoubleValues(name, rows).StandardDeviationPop();
     134              return  pop.IsAlmost(0) ? 1.0 : 1.0/pop;
     135            })
    133136            .Concat(new double[] { 1.0 }) // no scaling for target variable
    134137            .ToArray();
     
    142145      }
    143146
    144       if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
     147      if (inputMatrix.ContainsNanOrInfinity())
    145148        throw new NotSupportedException(
    146149          "Nearest neighbour model does not support NaN or infinity values in the input dataset.");
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkClassification.cs

    r15583 r16158  
    115115    public NeuralNetworkClassification()
    116116      : base() {
    117       var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 
    118         (IntValue)new IntValue(0).AsReadOnly(), 
    119         (IntValue)new IntValue(1).AsReadOnly(), 
     117      var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] {
     118        (IntValue)new IntValue(0).AsReadOnly(),
     119        (IntValue)new IntValue(1).AsReadOnly(),
    120120        (IntValue)new IntValue(2).AsReadOnly() });
    121121      var selectedHiddenLayerValue = (from v in validHiddenLayerValues
     
    185185      IEnumerable<int> rows = problemData.TrainingIndices;
    186186      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    187       if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
     187      if (inputMatrix.ContainsNanOrInfinity())
    188188        throw new NotSupportedException("Neural network classification does not support NaN or infinity values in the input dataset.");
    189189
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleClassification.cs

    r15583 r16158  
    171171      IEnumerable<int> rows = problemData.TrainingIndices;
    172172      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    173       if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
     173      if (inputMatrix.ContainsNanOrInfinity())
    174174        throw new NotSupportedException("Neural network ensemble classification does not support NaN or infinity values in the input dataset.");
    175175
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleModel.cs

    r15583 r16158  
    102102          x[column] = inputData[row, column];
    103103        }
    104         alglib.mlpeprocess(mlpEnsemble, x, ref y);
     104        // mlpeprocess writes data in mlpEnsemble and is therefore not thread-safe
     105        lock (mlpEnsemble) {
     106          alglib.mlpeprocess(mlpEnsemble, x, ref y);
     107        }
    105108        yield return y[0];
    106109      }
     
    119122          x[column] = inputData[row, column];
    120123        }
    121         alglib.mlpeprocess(mlpEnsemble, x, ref y);
     124        // mlpeprocess writes data in mlpEnsemble and is therefore not thread-safe
     125        lock (mlpEnsemble) {
     126          alglib.mlpeprocess(mlpEnsemble, x, ref y);
     127        }
    122128        // find class for with the largest probability value
    123129        int maxProbClassIndex = 0;
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleRegression.cs

    r15583 r16158  
    125125    public NeuralNetworkEnsembleRegression()
    126126      : base() {
    127       var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 
    128         (IntValue)new IntValue(0).AsReadOnly(), 
    129         (IntValue)new IntValue(1).AsReadOnly(), 
     127      var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] {
     128        (IntValue)new IntValue(0).AsReadOnly(),
     129        (IntValue)new IntValue(1).AsReadOnly(),
    130130        (IntValue)new IntValue(2).AsReadOnly() });
    131131      var selectedHiddenLayerValue = (from v in validHiddenLayerValues
     
    170170      IEnumerable<int> rows = problemData.TrainingIndices;
    171171      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    172       if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
     172      if (inputMatrix.ContainsNanOrInfinity())
    173173        throw new NotSupportedException("Neural network ensemble regression does not support NaN or infinity values in the input dataset.");
    174174
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkModel.cs

    r15583 r16158  
    106106          x[column] = inputData[row, column];
    107107        }
    108         alglib.mlpprocess(multiLayerPerceptron, x, ref y);
     108        // NOTE: mlpprocess changes data in multiLayerPerceptron and is therefore not thread-save!
     109        lock (multiLayerPerceptron) {
     110          alglib.mlpprocess(multiLayerPerceptron, x, ref y);
     111        }
    109112        yield return y[0];
    110113      }
     
    112115
    113116    public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    114       double[,] inputData = dataset.ToArray( allowedInputVariables, rows);
     117      double[,] inputData = dataset.ToArray(allowedInputVariables, rows);
    115118
    116119      int n = inputData.GetLength(0);
     
    123126          x[column] = inputData[row, column];
    124127        }
    125         alglib.mlpprocess(multiLayerPerceptron, x, ref y);
     128        // NOTE: mlpprocess changes data in multiLayerPerceptron and is therefore not thread-save!
     129        lock (multiLayerPerceptron) {
     130          alglib.mlpprocess(multiLayerPerceptron, x, ref y);
     131        }
    126132        // find class for with the largest probability value
    127133        int maxProbClassIndex = 0;
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkRegression.cs

    r15583 r16158  
    115115    public NeuralNetworkRegression()
    116116      : base() {
    117       var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 
    118         (IntValue)new IntValue(0).AsReadOnly(), 
    119         (IntValue)new IntValue(1).AsReadOnly(), 
     117      var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] {
     118        (IntValue)new IntValue(0).AsReadOnly(),
     119        (IntValue)new IntValue(1).AsReadOnly(),
    120120        (IntValue)new IntValue(2).AsReadOnly() });
    121121      var selectedHiddenLayerValue = (from v in validHiddenLayerValues
     
    186186      IEnumerable<int> rows = problemData.TrainingIndices;
    187187      double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    188       if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
     188      if (inputMatrix.ContainsNanOrInfinity())
    189189        throw new NotSupportedException("Neural network regression does not support NaN or infinity values in the input dataset.");
    190190
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/NonlinearRegression/NonlinearRegression.cs

    r15583 r16158  
    186186        qualityTable.Rows.Add(testRMSERow);
    187187        Results.Add(new Result(qualityTable.Name, qualityTable.Name + " for all restarts", qualityTable));
    188         if (SetSeedRandomly) Seed = (new System.Random()).Next();
     188        if (SetSeedRandomly) Seed = RandomSeedGenerator.GetSeed();
    189189        var rand = new MersenneTwister((uint)Seed);
    190190        bestSolution = CreateRegressionSolution(Problem.ProblemData, ModelStructure, Iterations, ApplyLinearScaling, rand);
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassification.cs

    r15583 r16158  
    135135    protected override void Run(CancellationToken cancellationToken) {
    136136      double rmsError, relClassificationError, outOfBagRmsError, outOfBagRelClassificationError;
    137       if (SetSeedRandomly) Seed = new System.Random().Next();
     137      if (SetSeedRandomly) Seed = Random.RandomSeedGenerator.GetSeed();
    138138
    139139      var model = CreateRandomForestClassificationModel(Problem.ProblemData, NumberOfTrees, R, M, Seed, out rmsError, out relClassificationError, out outOfBagRmsError, out outOfBagRelClassificationError);
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs

    r15583 r16158  
    310310    public static RandomForestModel CreateClassificationModel(IClassificationProblemData problemData, int nTrees, double r, double m, int seed,
    311311      out double rmsError, out double outOfBagRmsError, out double relClassificationError, out double outOfBagRelClassificationError) {
    312       return CreateClassificationModel(problemData, problemData.TrainingIndices, nTrees, r, m, seed, 
     312      return CreateClassificationModel(problemData, problemData.TrainingIndices, nTrees, r, m, seed,
    313313        out rmsError, out outOfBagRmsError, out relClassificationError, out outOfBagRelClassificationError);
    314314    }
     
    370370
    371371    private static void AssertInputMatrix(double[,] inputMatrix) {
    372       if (inputMatrix.Cast<double>().Any(x => Double.IsNaN(x) || Double.IsInfinity(x)))
     372      if (inputMatrix.ContainsNanOrInfinity())
    373373        throw new NotSupportedException("Random forest modeling does not support NaN or infinity values in the input dataset.");
    374374    }
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegression.cs

    r15583 r16158  
    134134    protected override void Run(CancellationToken cancellationToken) {
    135135      double rmsError, avgRelError, outOfBagRmsError, outOfBagAvgRelError;
    136       if (SetSeedRandomly) Seed = new System.Random().Next();
     136      if (SetSeedRandomly) Seed = Random.RandomSeedGenerator.GetSeed();
    137137      var model = CreateRandomForestRegressionModel(Problem.ProblemData, NumberOfTrees, R, M, Seed,
    138138        out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError);
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorMachineModel.cs

    r15583 r16158  
    155155    private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) {
    156156      // calculate predictions for the currently requested rows
    157       svm_problem problem = SupportVectorMachineUtil.CreateSvmProblem(dataset, TargetVariable, allowedInputVariables, rows);
     157      svm_problem problem = SupportVectorMachineUtil.CreateSvmProblem(dataset, allowedInputVariables, rows);
    158158      svm_problem scaledProblem = rangeTransform.Scale(problem);
    159159
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorMachineUtil.cs

    r15583 r16158  
    3535  public class SupportVectorMachineUtil {
    3636    /// <summary>
    37     /// Transforms <paramref name="problemData"/> into a data structure as needed by libSVM.
    38     /// </summary>
    39     /// <param name="problemData">The problem data to transform</param>
     37    /// Transforms <paramref name="dataset"/> into a data structure as needed by libSVM.
     38    /// </summary>
     39    /// <param name="dataset">The source dataset</param>
     40    /// <param name="targetVariable">The target variable</param>
     41    /// <param name="inputVariables">The selected input variables to include in the svm_problem.</param>
    4042    /// <param name="rowIndices">The rows of the dataset that should be contained in the resulting SVM-problem</param>
    4143    /// <returns>A problem data type that can be used to train a support vector machine.</returns>
    4244    public static svm_problem CreateSvmProblem(IDataset dataset, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<int> rowIndices) {
    43       double[] targetVector = dataset.GetDoubleValues(targetVariable, rowIndices).ToArray();
    44       svm_node[][] nodes = new svm_node[targetVector.Length][];
     45      double[] targetVector ;
     46      var nRows = rowIndices.Count();
     47      if (string.IsNullOrEmpty(targetVariable)) {
     48        // if the target variable is not set (e.g. for prediction of a trained model) we just use a zero vector
     49        targetVector = new double[nRows];
     50      } else {
     51        targetVector = dataset.GetDoubleValues(targetVariable, rowIndices).ToArray();
     52      }
     53      svm_node[][] nodes = new svm_node[nRows][];
    4554      int maxNodeIndex = 0;
    4655      int svmProblemRowIndex = 0;
     
    6675
    6776    /// <summary>
     77    /// Transforms <paramref name="dataset"/> into a data structure as needed by libSVM for prediction.
     78    /// </summary>
     79    /// <param name="dataset">The problem data to transform</param>
     80    /// <param name="inputVariables">The selected input variables to include in the svm_problem.</param>
     81    /// <param name="rowIndices">The rows of the dataset that should be contained in the resulting SVM-problem</param>
     82    /// <returns>A problem data type that can be used for prediction with a trained support vector machine.</returns>
     83    public static svm_problem CreateSvmProblem(IDataset dataset, IEnumerable<string> inputVariables, IEnumerable<int> rowIndices) {
     84      // for prediction we don't need a target variable
     85      return CreateSvmProblem(dataset, string.Empty, inputVariables, rowIndices);
     86    }
     87
     88    /// <summary>
    6889    /// Instantiate and return a svm_parameter object with default values.
    6990    /// </summary>
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAlgorithm.cs

    r15583 r16158  
    275275      if (wdist != null) wdist.Initialize(problemData);
    276276      if (state == null) {
    277         if (SetSeedRandomly) Seed = new System.Random().Next();
     277        if (SetSeedRandomly) Seed = RandomSeedGenerator.GetSeed();
    278278        var random = new MersenneTwister((uint)Seed);
    279279        var dataset = problemData.Dataset;
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/TimeSeries/AutoregressiveModeling.cs

    r15583 r16158  
    2626using HeuristicLab.Core;
    2727using HeuristicLab.Data;
    28 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    2928using HeuristicLab.Optimization;
    3029using HeuristicLab.Parameters;
     
    9796        inputMatrix[i, timeOffset] = targetValues[i + problemData.TrainingPartition.Start];
    9897
    99       if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
     98      if (inputMatrix.ContainsNanOrInfinity())
    10099        throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
    101100
  • branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClustering.cs

    r15583 r16158  
    9191      int[] xyc;
    9292      double[,] inputMatrix = dataset.ToArray(allowedInputVariables, rows);
    93       if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
     93      if (inputMatrix.ContainsNanOrInfinity())
    9494        throw new NotSupportedException("k-Means clustering does not support NaN or infinity values in the input dataset.");
    9595
Note: See TracChangeset for help on using the changeset viewer.