Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
06/25/15 18:21:19 (10 years ago)
Author:
dglaser
Message:

#2388: Merged trunk into HiveStatistics branch

Location:
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis
Files:
36 edited

Legend:

Unmodified
Added
Removed
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis

  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/CrossValidation.cs

    r12012 r12515  
    3636namespace HeuristicLab.Algorithms.DataAnalysis {
    3737  [Item("Cross Validation", "Cross-validation wrapper for data analysis algorithms.")]
    38   [Creatable("Data Analysis")]
     38  [Creatable(CreatableAttribute.Categories.DataAnalysis, Priority = 100)]
    3939  [StorableClass]
    4040  public sealed class CrossValidation : ParameterizedNamedItem, IAlgorithm, IStorableContent {
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessClassification.cs

    r12012 r12515  
    2323using System;
    2424using System.Linq;
    25 using HeuristicLab.Algorithms.GradientDescent;
    2625using HeuristicLab.Common;
    2726using HeuristicLab.Core;
    28 using HeuristicLab.Data;
    29 using HeuristicLab.Operators;
    3027using HeuristicLab.Optimization;
    3128using HeuristicLab.Parameters;
     
    3936  /// </summary>
    4037  [Item("Gaussian Process Least-Squares Classification", "Gaussian process least-squares classification data analysis algorithm.")]
    41   [Creatable("Data Analysis")]
     38  [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 160)]
    4239  [StorableClass]
    4340  public sealed class GaussianProcessClassification : GaussianProcessBase, IStorableContent {
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs

    r12012 r12515  
    117117      this.x = original.x;
    118118    }
    119     public GaussianProcessModel(Dataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,
     119    public GaussianProcessModel(IDataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,
    120120      IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction)
    121121      : base() {
     
    141141    }
    142142
    143     private void CalculateModel(Dataset ds, IEnumerable<int> rows) {
     143    private void CalculateModel(IDataset ds, IEnumerable<int> rows) {
    144144      inputScaling = new Scaling(ds, allowedInputVariables, rows);
    145145      x = AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputVariables, rows, inputScaling);
     
    245245
    246246    #region IRegressionModel Members
    247     public IEnumerable<double> GetEstimatedValues(Dataset dataset, IEnumerable<int> rows) {
     247    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    248248      return GetEstimatedValuesHelper(dataset, rows);
    249249    }
     
    257257
    258258
    259     private IEnumerable<double> GetEstimatedValuesHelper(Dataset dataset, IEnumerable<int> rows) {
     259    private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) {
    260260      var newX = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, inputScaling);
    261261      int newN = newX.GetLength(0);
     
    277277    }
    278278
    279     public IEnumerable<double> GetEstimatedVariance(Dataset dataset, IEnumerable<int> rows) {
     279    public IEnumerable<double> GetEstimatedVariance(IDataset dataset, IEnumerable<int> rows) {
    280280      var newX = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, inputScaling);
    281281      int newN = newX.GetLength(0);
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessRegression.cs

    r12012 r12515  
    2323using System;
    2424using System.Linq;
    25 using HeuristicLab.Algorithms.GradientDescent;
    2625using HeuristicLab.Common;
    2726using HeuristicLab.Core;
    28 using HeuristicLab.Data;
    29 using HeuristicLab.Operators;
    3027using HeuristicLab.Optimization;
    3128using HeuristicLab.Parameters;
     
    3936  /// </summary>
    4037  [Item("Gaussian Process Regression", "Gaussian process regression data analysis algorithm.")]
    41   [Creatable("Data Analysis")]
     38  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 160)]
    4239  [StorableClass]
    4340  public sealed class GaussianProcessRegression : GaussianProcessBase, IStorableContent {
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/IGaussianProcessModel.cs

    r12012 r12515  
    3434    double[] HyperparameterGradients { get; }
    3535
    36     IEnumerable<double> GetEstimatedVariance(Dataset ds, IEnumerable<int> rows);
     36    IEnumerable<double> GetEstimatedVariance(IDataset ds, IEnumerable<int> rows);
    3737    void FixParameters();
    3838  }
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/INcaModel.cs

    r12012 r12515  
    2727    new INcaClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData);
    2828
    29     double[,] Reduce(Dataset dataset, IEnumerable<int> rows);
     29    double[,] Reduce(IDataset dataset, IEnumerable<int> rows);
    3030  }
    3131}
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/AlglibUtil.cs

    r12012 r12515  
    2626namespace HeuristicLab.Algorithms.DataAnalysis {
    2727  public static class AlglibUtil {
    28     public static double[,] PrepareInputMatrix(Dataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) {
     28    public static double[,] PrepareInputMatrix(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) {
    2929      List<string> variablesList = variables.ToList();
    3030      List<int> rowsList = rows.ToList();
     
    4545      return matrix;
    4646    }
    47     public static double[,] PrepareAndScaleInputMatrix(Dataset dataset, IEnumerable<string> variables, IEnumerable<int> rows, Scaling scaling) {
     47    public static double[,] PrepareAndScaleInputMatrix(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows, Scaling scaling) {
    4848      List<string> variablesList = variables.ToList();
    4949      List<int> rowsList = rows.ToList();
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearDiscriminantAnalysis.cs

    r12012 r12515  
    3737  /// </summary>
    3838  [Item("Linear Discriminant Analysis", "Linear discriminant analysis classification algorithm (wrapper for ALGLIB).")]
    39   [Creatable("Data Analysis")]
     39  [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 100)]
    4040  [StorableClass]
    4141  public sealed class LinearDiscriminantAnalysis : FixedDataAnalysisAlgorithm<IClassificationProblem> {
     
    6565
    6666    public static IClassificationSolution CreateLinearDiscriminantAnalysisSolution(IClassificationProblemData problemData) {
    67       Dataset dataset = problemData.Dataset;
     67      var dataset = problemData.Dataset;
    6868      string targetVariable = problemData.TargetVariable;
    6969      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

    r12012 r12515  
    3838  /// </summary>
    3939  [Item("Linear Regression", "Linear regression data analysis algorithm (wrapper for ALGLIB).")]
    40   [Creatable("Data Analysis")]
     40  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 100)]
    4141  [StorableClass]
    4242  public sealed class LinearRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> {
     
    6969
    7070    public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) {
    71       Dataset dataset = problemData.Dataset;
     71      var dataset = problemData.Dataset;
    7272      string targetVariable = problemData.TargetVariable;
    7373      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassification.cs

    r12012 r12515  
    3535  /// </summary>
    3636  [Item("Multinomial Logit Classification", "Multinomial logit classification data analysis algorithm (wrapper for ALGLIB).")]
    37   [Creatable("Data Analysis")]
     37  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 180)]
    3838  [StorableClass]
    3939  public sealed class MultiNomialLogitClassification : FixedDataAnalysisAlgorithm<IClassificationProblem> {
     
    6666
    6767    public static IClassificationSolution CreateLogitClassificationSolution(IClassificationProblemData problemData, out double rmsError, out double relClassError) {
    68       Dataset dataset = problemData.Dataset;
     68      var dataset = problemData.Dataset;
    6969      string targetVariable = problemData.TargetVariable;
    7070      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitModel.cs

    r12012 r12515  
    8282    }
    8383
    84     public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) {
     84    public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    8585      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    8686
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/Scaling.cs

    r12012 r12515  
    4141        scalingParameters.Add(pair.Key, Tuple.Create(pair.Value.Item1, pair.Value.Item2));
    4242    }
    43     public Scaling(Dataset ds, IEnumerable<string> variables, IEnumerable<int> rows) {
     43    public Scaling(IDataset ds, IEnumerable<string> variables, IEnumerable<int> rows) {
    4444      foreach (var variable in variables) {
    4545        var values = ds.GetDoubleValues(variable, rows);
     
    5454    }
    5555
    56     public IEnumerable<double> GetScaledValues(Dataset ds, string variable, IEnumerable<int> rows) {
     56    public IEnumerable<double> GetScaledValues(IDataset ds, string variable, IEnumerable<int> rows) {
    5757      double min = scalingParameters[variable].Item1;
    5858      double max = scalingParameters[variable].Item2;
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaAlgorithm.cs

    r12012 r12515  
    4343with additional regularizations described in Z. Yang, J. Laaksonen. 2007.
    4444Regularized Neighborhood Component Analysis. Lecture Notes in Computer Science, 4522. pp. 253-262.")]
    45   [Creatable("Data Analysis")]
     45  [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 170)]
    4646  [StorableClass]
    4747  public sealed class NcaAlgorithm : EngineAlgorithm {
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaModel.cs

    r12012 r12515  
    5656      this.classValues = (double[])original.classValues.Clone();
    5757    }
    58     public NcaModel(int k, double[,] transformationMatrix, Dataset dataset, IEnumerable<int> rows, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues) {
     58    public NcaModel(int k, double[,] transformationMatrix, IDataset dataset, IEnumerable<int> rows, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues) {
    5959      Name = ItemName;
    6060      Description = ItemDescription;
     
    7272    }
    7373
    74     public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) {
     74    public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    7575      var ds = ReduceDataset(dataset, rows);
    7676      return nnModel.GetEstimatedClassValues(ds, Enumerable.Range(0, ds.Rows));
     
    8585    }
    8686
    87     public double[,] Reduce(Dataset dataset, IEnumerable<int> rows) {
     87    public double[,] Reduce(IDataset dataset, IEnumerable<int> rows) {
    8888      var data = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    8989
     
    100100    }
    101101
    102     public Dataset ReduceDataset(Dataset dataset, IEnumerable<int> rows) {
     102    public Dataset ReduceDataset(IDataset dataset, IEnumerable<int> rows) {
    103103      return new Dataset(Enumerable
    104104          .Range(0, transformationMatrix.GetLength(1))
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs

    r12012 r12515  
    3535  /// </summary>
    3636  [Item("Nearest Neighbour Classification", "Nearest neighbour classification data analysis algorithm (wrapper for ALGLIB).")]
    37   [Creatable("Data Analysis")]
     37  [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 150)]
    3838  [StorableClass]
    3939  public sealed class NearestNeighbourClassification : FixedDataAnalysisAlgorithm<IClassificationProblem> {
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs

    r12012 r12515  
    9696        this.classValues = (double[])original.classValues.Clone();
    9797    }
    98     public NearestNeighbourModel(Dataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues = null) {
     98    public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues = null) {
    9999      Name = ItemName;
    100100      Description = ItemDescription;
     
    135135    }
    136136
    137     public IEnumerable<double> GetEstimatedValues(Dataset dataset, IEnumerable<int> rows) {
     137    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    138138      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    139139
     
    163163    }
    164164
    165     public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) {
     165    public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    166166      if (classValues == null) throw new InvalidOperationException("No class values are defined.");
    167167      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs

    r12012 r12515  
    3434  /// </summary>
    3535  [Item("Nearest Neighbour Regression", "Nearest neighbour regression data analysis algorithm (wrapper for ALGLIB).")]
    36   [Creatable("Data Analysis")]
     36  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 150)]
    3737  [StorableClass]
    3838  public sealed class NearestNeighbourRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> {
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkClassification.cs

    r12012 r12515  
    3636  /// </summary>
    3737  [Item("Neural Network Classification", "Neural network classification data analysis algorithm (wrapper for ALGLIB). Further documentation: http://www.alglib.net/dataanalysis/neuralnetworks.php")]
    38   [Creatable("Data Analysis")]
     38  [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 130)]
    3939  [StorableClass]
    4040  public sealed class NeuralNetworkClassification : FixedDataAnalysisAlgorithm<IClassificationProblem> {
     
    179179    public static IClassificationSolution CreateNeuralNetworkClassificationSolution(IClassificationProblemData problemData, int nLayers, int nHiddenNodes1, int nHiddenNodes2, double decay, int restarts,
    180180      out double rmsError, out double avgRelError, out double relClassError) {
    181       Dataset dataset = problemData.Dataset;
     181      var dataset = problemData.Dataset;
    182182      string targetVariable = problemData.TargetVariable;
    183183      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleClassification.cs

    r12012 r12515  
    3636  /// </summary>
    3737  [Item("Neural Network Ensemble Classification", "Neural network ensemble classification data analysis algorithm (wrapper for ALGLIB). Further documentation: http://www.alglib.net/dataanalysis/mlpensembles.php")]
    38   [Creatable("Data Analysis")]
     38  [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 140)]
    3939  [StorableClass]
    4040  public sealed class NeuralNetworkEnsembleClassification : FixedDataAnalysisAlgorithm<IClassificationProblem> {
     
    124124    public NeuralNetworkEnsembleClassification()
    125125      : base() {
    126         var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] {
     126      var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] {
    127127        (IntValue)new IntValue(0).AsReadOnly(),
    128128        (IntValue)new IntValue(1).AsReadOnly(),
     
    165165    public static IClassificationSolution CreateNeuralNetworkEnsembleClassificationSolution(IClassificationProblemData problemData, int ensembleSize, int nLayers, int nHiddenNodes1, int nHiddenNodes2, double decay, int restarts,
    166166      out double rmsError, out double avgRelError, out double relClassError) {
    167       Dataset dataset = problemData.Dataset;
     167      var dataset = problemData.Dataset;
    168168      string targetVariable = problemData.TargetVariable;
    169169      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleModel.cs

    r12012 r12515  
    8686    }
    8787
    88     public IEnumerable<double> GetEstimatedValues(Dataset dataset, IEnumerable<int> rows) {
     88    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    8989      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    9090
     
    103103    }
    104104
    105     public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) {
     105    public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    106106      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    107107
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleRegression.cs

    r12012 r12515  
    3636  /// </summary>
    3737  [Item("Neural Network Ensemble Regression", "Neural network ensemble regression data analysis algorithm (wrapper for ALGLIB). Further documentation: http://www.alglib.net/dataanalysis/mlpensembles.php")]
    38   [Creatable("Data Analysis")]
     38  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 140)]
    3939  [StorableClass]
    4040  public sealed class NeuralNetworkEnsembleRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> {
     
    124124    public NeuralNetworkEnsembleRegression()
    125125      : base() {
    126         var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] {
     126      var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] {
    127127        (IntValue)new IntValue(0).AsReadOnly(),
    128128        (IntValue)new IntValue(1).AsReadOnly(),
     
    164164    public static IRegressionSolution CreateNeuralNetworkEnsembleRegressionSolution(IRegressionProblemData problemData, int ensembleSize, int nLayers, int nHiddenNodes1, int nHiddenNodes2, double decay, int restarts,
    165165      out double rmsError, out double avgRelError) {
    166       Dataset dataset = problemData.Dataset;
     166      var dataset = problemData.Dataset;
    167167      string targetVariable = problemData.TargetVariable;
    168168      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkModel.cs

    r12012 r12515  
    9494    }
    9595
    96     public IEnumerable<double> GetEstimatedValues(Dataset dataset, IEnumerable<int> rows) {
     96    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    9797      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    9898
     
    111111    }
    112112
    113     public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) {
     113    public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    114114      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    115115
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkRegression.cs

    r12012 r12515  
    3636  /// </summary>
    3737  [Item("Neural Network Regression", "Neural network regression data analysis algorithm (wrapper for ALGLIB). Further documentation: http://www.alglib.net/dataanalysis/neuralnetworks.php")]
    38   [Creatable("Data Analysis")]
     38  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 130)]
    3939  [StorableClass]
    4040  public sealed class NeuralNetworkRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> {
     
    180180    public static IRegressionSolution CreateNeuralNetworkRegressionSolution(IRegressionProblemData problemData, int nLayers, int nHiddenNodes1, int nHiddenNodes2, double decay, int restarts,
    181181      out double rmsError, out double avgRelError) {
    182       Dataset dataset = problemData.Dataset;
     182      var dataset = problemData.Dataset;
    183183      string targetVariable = problemData.TargetVariable;
    184184      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassification.cs

    r12012 r12515  
    2020#endregion
    2121
    22 using System;
    23 using System.Collections.Generic;
    24 using System.Linq;
    2522using HeuristicLab.Common;
    2623using HeuristicLab.Core;
     
    3633  /// </summary>
    3734  [Item("Random Forest Classification", "Random forest classification data analysis algorithm (wrapper for ALGLIB).")]
    38   [Creatable("Data Analysis")]
     35  [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 120)]
    3936  [StorableClass]
    4037  public sealed class RandomForestClassification : FixedDataAnalysisAlgorithm<IClassificationProblem> {
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs

    r12012 r12515  
    129129    }
    130130
    131     public IEnumerable<double> GetEstimatedValues(Dataset dataset, IEnumerable<int> rows) {
     131    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    132132      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows);
    133133      AssertInputMatrix(inputData);
     
    147147    }
    148148
    149     public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) {
     149    public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    150150      double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows);
    151151      AssertInputMatrix(inputData);
     
    205205      outOfBagRmsError = rep.oobrmserror;
    206206
    207       return new RandomForestModel(dForest,seed, problemData,nTrees, r, m);
     207      return new RandomForestModel(dForest, seed, problemData, nTrees, r, m);
    208208    }
    209209
     
    242242      outOfBagRelClassificationError = rep.oobrelclserror;
    243243
    244       return new RandomForestModel(dForest,seed, problemData,nTrees, r, m, classValues);
     244      return new RandomForestModel(dForest, seed, problemData, nTrees, r, m, classValues);
    245245    }
    246246
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegression.cs

    r12012 r12515  
    2020#endregion
    2121
    22 using System;
    23 using System.Collections.Generic;
    24 using System.Linq;
    2522using HeuristicLab.Common;
    2623using HeuristicLab.Core;
     
    3633  /// </summary>
    3734  [Item("Random Forest Regression", "Random forest regression data analysis algorithm (wrapper for ALGLIB).")]
    38   [Creatable("Data Analysis")]
     35  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 120)]
    3936  [StorableClass]
    4037  public sealed class RandomForestRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> {
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestUtil.cs

    r12012 r12515  
    9090
    9191  public static class RandomForestUtil {
    92     private static readonly object locker = new object();
    93 
    9492    private static void CrossValidate(IRegressionProblemData problemData, Tuple<IEnumerable<int>, IEnumerable<int>>[] partitions, int nTrees, double r, double m, int seed, out double avgTestMse) {
    9593      avgTestMse = 0;
     
    132130    }
    133131
    134     // grid search without cross-validation since in the case of random forests, the out-of-bag estimate is unbiased
     132    /// <summary>
     133    /// Grid search without crossvalidation (since for random forests the out-of-bag estimate is unbiased)
     134    /// </summary>
     135    /// <param name="problemData">The regression problem data</param>
     136    /// <param name="parameterRanges">The ranges for each parameter in the grid search</param>
     137    /// <param name="seed">The random seed (required by the random forest model)</param>
     138    /// <param name="maxDegreeOfParallelism">The maximum allowed number of threads (to parallelize the grid search)</param>
    135139    public static RFParameter GridSearch(IRegressionProblemData problemData, Dictionary<string, IEnumerable<double>> parameterRanges, int seed = 12345, int maxDegreeOfParallelism = 1) {
    136140      var setters = parameterRanges.Keys.Select(GenerateSetter).ToList();
     
    139143      RFParameter bestParameters = new RFParameter();
    140144
     145      var locker = new object();
    141146      Parallel.ForEach(crossProduct, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, parameterCombination => {
    142147        var parameterValues = parameterCombination.ToList();
     
    156161    }
    157162
     163    /// <summary>
     164    /// Grid search without crossvalidation (since for random forests the out-of-bag estimate is unbiased)
     165    /// </summary>
     166    /// <param name="problemData">The classification problem data</param>
     167    /// <param name="parameterRanges">The ranges for each parameter in the grid search</param>
     168    /// <param name="seed">The random seed (required by the random forest model)</param>
     169    /// <param name="maxDegreeOfParallelism">The maximum allowed number of threads (to parallelize the grid search)</param>
    158170    public static RFParameter GridSearch(IClassificationProblemData problemData, Dictionary<string, IEnumerable<double>> parameterRanges, int seed = 12345, int maxDegreeOfParallelism = 1) {
    159171      var setters = parameterRanges.Keys.Select(GenerateSetter).ToList();
     
    163175      RFParameter bestParameters = new RFParameter();
    164176
     177      var locker = new object();
    165178      Parallel.ForEach(crossProduct, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, parameterCombination => {
    166179        var parameterValues = parameterCombination.ToList();
     
    181194    }
    182195
     196    /// <summary>
     197    /// Grid search with crossvalidation
     198    /// </summary>
     199    /// <param name="problemData">The regression problem data</param>
     200    /// <param name="numberOfFolds">The number of folds for crossvalidation</param>
     201    /// <param name="shuffleFolds">Specifies whether the folds should be shuffled</param>
     202    /// <param name="parameterRanges">The ranges for each parameter in the grid search</param>
     203    /// <param name="seed">The random seed (required by the random forest model)</param>
     204    /// <param name="maxDegreeOfParallelism">The maximum allowed number of threads (to parallelize the grid search)</param>
     205    /// <returns>The best parameter values found by the grid search</returns>
    183206    public static RFParameter GridSearch(IRegressionProblemData problemData, int numberOfFolds, bool shuffleFolds, Dictionary<string, IEnumerable<double>> parameterRanges, int seed = 12345, int maxDegreeOfParallelism = 1) {
    184207      DoubleValue mse = new DoubleValue(Double.MaxValue);
     
    189212      var crossProduct = parameterRanges.Values.CartesianProduct();
    190213
     214      var locker = new object();
    191215      Parallel.ForEach(crossProduct, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, parameterCombination => {
    192216        var parameterValues = parameterCombination.ToList();
     
    208232    }
    209233
     234    /// <summary>
     235    /// Grid search with crossvalidation
     236    /// </summary>
     237    /// <param name="problemData">The classification problem data</param>
     238    /// <param name="numberOfFolds">The number of folds for crossvalidation</param>
     239    /// <param name="shuffleFolds">Specifies whether the folds should be shuffled</param>
     240    /// <param name="parameterRanges">The ranges for each parameter in the grid search</param>
     241    /// <param name="seed">The random seed (for shuffling)</param>
     242    /// <param name="maxDegreeOfParallelism">The maximum allowed number of threads (to parallelize the grid search)</param>
    210243    public static RFParameter GridSearch(IClassificationProblemData problemData, int numberOfFolds, bool shuffleFolds, Dictionary<string, IEnumerable<double>> parameterRanges, int seed = 12345, int maxDegreeOfParallelism = 1) {
    211244      DoubleValue accuracy = new DoubleValue(0);
     
    216249      var partitions = GenerateRandomForestPartitions(problemData, numberOfFolds, shuffleFolds);
    217250
     251      var locker = new object();
    218252      Parallel.ForEach(crossProduct, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, parameterCombination => {
    219253        var parameterValues = parameterCombination.ToList();
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorClassification.cs

    r12012 r12515  
    3737  /// </summary>
    3838  [Item("Support Vector Classification", "Support vector machine classification data analysis algorithm (wrapper for libSVM).")]
    39   [Creatable("Data Analysis")]
     39  [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 110)]
    4040  [StorableClass]
    4141  public sealed class SupportVectorClassification : FixedDataAnalysisAlgorithm<IClassificationProblem> {
     
    149149    public static SupportVectorClassificationSolution CreateSupportVectorClassificationSolution(IClassificationProblemData problemData, IEnumerable<string> allowedInputVariables,
    150150      int svmType, int kernelType, double cost, double nu, double gamma, int degree, out double trainingAccuracy, out double testAccuracy, out int nSv) {
    151       Dataset dataset = problemData.Dataset;
     151      var dataset = problemData.Dataset;
    152152      string targetVariable = problemData.TargetVariable;
    153153      IEnumerable<int> rows = problemData.TrainingIndices;
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorMachineModel.cs

    r12012 r12515  
    120120
    121121    #region IRegressionModel Members
    122     public IEnumerable<double> GetEstimatedValues(Dataset dataset, IEnumerable<int> rows) {
     122    public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) {
    123123      return GetEstimatedValuesHelper(dataset, rows);
    124124    }
     
    132132
    133133    #region IClassificationModel Members
    134     public IEnumerable<double> GetEstimatedClassValues(Dataset dataset, IEnumerable<int> rows) {
     134    public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) {
    135135      if (classValues == null) throw new NotSupportedException();
    136136      // return the original class value instead of the predicted value of the model
     
    159159    }
    160160    #endregion
    161     private IEnumerable<double> GetEstimatedValuesHelper(Dataset dataset, IEnumerable<int> rows) {
     161    private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) {
    162162      // calculate predictions for the currently requested rows
    163163      svm_problem problem = SupportVectorMachineUtil.CreateSvmProblem(dataset, targetVariable, allowedInputVariables, rows);
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorMachineUtil.cs

    r12012 r12515  
    4040    /// <param name="rowIndices">The rows of the dataset that should be contained in the resulting SVM-problem</param>
    4141    /// <returns>A problem data type that can be used to train a support vector machine.</returns>
    42     public static svm_problem CreateSvmProblem(Dataset dataset, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<int> rowIndices) {
     42    public static svm_problem CreateSvmProblem(IDataset dataset, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<int> rowIndices) {
    4343      double[] targetVector = dataset.GetDoubleValues(targetVariable, rowIndices).ToArray();
    4444      svm_node[][] nodes = new svm_node[targetVector.Length][];
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorRegression.cs

    r12012 r12515  
    3737  /// </summary>
    3838  [Item("Support Vector Regression", "Support vector machine regression data analysis algorithm (wrapper for libSVM).")]
    39   [Creatable("Data Analysis")]
     39  [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 110)]
    4040  [StorableClass]
    4141  public sealed class SupportVectorRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> {
     
    152152      string svmType, string kernelType, double cost, double nu, double gamma, double epsilon, int degree,
    153153      out double trainingR2, out double testR2, out int nSv) {
    154       Dataset dataset = problemData.Dataset;
     154      var dataset = problemData.Dataset;
    155155      string targetVariable = problemData.TargetVariable;
    156156      IEnumerable<int> rows = problemData.TrainingIndices;
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TimeSeries/AutoregressiveModeling.cs

    r12012 r12515  
    3535namespace HeuristicLab.Algorithms.DataAnalysis.TimeSeries {
    3636  [Item("Autoregressive Modeling", "Timeseries modeling algorithm that creates AR-N models.")]
    37   [Creatable("Data Analysis")]
     37  [Creatable(CreatableAttribute.Categories.DataAnalysis, Priority = 130)]
    3838  [StorableClass]
    3939  public class AutoregressiveModeling : FixedDataAnalysisAlgorithm<ITimeSeriesPrognosisProblem> {
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClustering.cs

    r12012 r12515  
    3636  /// </summary>
    3737  [Item("k-Means", "The k-Means clustering algorithm (wrapper for ALGLIB).")]
    38   [Creatable("Data Analysis")]
     38  [Creatable(CreatableAttribute.Categories.DataAnalysis, Priority = 10)]
    3939  [StorableClass]
    4040  public sealed class KMeansClustering : FixedDataAnalysisAlgorithm<IClusteringProblem> {
     
    8383
    8484    public static KMeansClusteringSolution CreateKMeansSolution(IClusteringProblemData problemData, int k, int restarts) {
    85       Dataset dataset = problemData.Dataset;
     85      var dataset = problemData.Dataset;
    8686      IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables;
    8787      IEnumerable<int> rows = problemData.TrainingIndices;
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClusteringModel.cs

    r12012 r12515  
    8181
    8282
    83     public IEnumerable<int> GetClusterValues(Dataset dataset, IEnumerable<int> rows) {
     83    public IEnumerable<int> GetClusterValues(IDataset dataset, IEnumerable<int> rows) {
    8484      return KMeansClusteringUtil.FindClosestCenters(centers, dataset, allowedInputVariables, rows);
    8585    }
  • branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClusteringUtil.cs

    r12012 r12515  
    2727namespace HeuristicLab.Algorithms.DataAnalysis {
    2828  public static class KMeansClusteringUtil {
    29     public static IEnumerable<int> FindClosestCenters(IEnumerable<double[]> centers, Dataset dataset, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows) {
     29    public static IEnumerable<int> FindClosestCenters(IEnumerable<double[]> centers, IDataset dataset, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows) {
    3030      int nRows = rows.Count();
    3131      int nCols = allowedInputVariables.Count();
     
    5858    }
    5959
    60     public static double CalculateIntraClusterSumOfSquares(KMeansClusteringModel model, Dataset dataset, IEnumerable<int> rows) {
     60    public static double CalculateIntraClusterSumOfSquares(KMeansClusteringModel model, IDataset dataset, IEnumerable<int> rows) {
    6161      List<int> clusterValues = model.GetClusterValues(dataset, rows).ToList();
    6262      List<string> allowedInputVariables = model.AllowedInputVariables.ToList();
Note: See TracChangeset for help on using the changeset viewer.