- Timestamp:
- 06/25/15 18:21:19 (10 years ago)
- Location:
- branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis
- Files:
-
- 36 edited
Legend:
- Unmodified
- Added
- Removed
-
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
/trunk/sources/HeuristicLab.Algorithms.DataAnalysis (added) merged: 12504,12509
- Property svn:mergeinfo changed
-
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/CrossValidation.cs
r12012 r12515 36 36 namespace HeuristicLab.Algorithms.DataAnalysis { 37 37 [Item("Cross Validation", "Cross-validation wrapper for data analysis algorithms.")] 38 [Creatable( "Data Analysis")]38 [Creatable(CreatableAttribute.Categories.DataAnalysis, Priority = 100)] 39 39 [StorableClass] 40 40 public sealed class CrossValidation : ParameterizedNamedItem, IAlgorithm, IStorableContent { -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessClassification.cs
r12012 r12515 23 23 using System; 24 24 using System.Linq; 25 using HeuristicLab.Algorithms.GradientDescent;26 25 using HeuristicLab.Common; 27 26 using HeuristicLab.Core; 28 using HeuristicLab.Data;29 using HeuristicLab.Operators;30 27 using HeuristicLab.Optimization; 31 28 using HeuristicLab.Parameters; … … 39 36 /// </summary> 40 37 [Item("Gaussian Process Least-Squares Classification", "Gaussian process least-squares classification data analysis algorithm.")] 41 [Creatable( "Data Analysis")]38 [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 160)] 42 39 [StorableClass] 43 40 public sealed class GaussianProcessClassification : GaussianProcessBase, IStorableContent { -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs
r12012 r12515 117 117 this.x = original.x; 118 118 } 119 public GaussianProcessModel( Dataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows,119 public GaussianProcessModel(IDataset ds, string targetVariable, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows, 120 120 IEnumerable<double> hyp, IMeanFunction meanFunction, ICovarianceFunction covarianceFunction) 121 121 : base() { … … 141 141 } 142 142 143 private void CalculateModel( Dataset ds, IEnumerable<int> rows) {143 private void CalculateModel(IDataset ds, IEnumerable<int> rows) { 144 144 inputScaling = new Scaling(ds, allowedInputVariables, rows); 145 145 x = AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputVariables, rows, inputScaling); … … 245 245 246 246 #region IRegressionModel Members 247 public IEnumerable<double> GetEstimatedValues( Dataset dataset, IEnumerable<int> rows) {247 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 248 248 return GetEstimatedValuesHelper(dataset, rows); 249 249 } … … 257 257 258 258 259 private IEnumerable<double> GetEstimatedValuesHelper( Dataset dataset, IEnumerable<int> rows) {259 private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) { 260 260 var newX = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, inputScaling); 261 261 int newN = newX.GetLength(0); … … 277 277 } 278 278 279 public IEnumerable<double> GetEstimatedVariance( Dataset dataset, IEnumerable<int> rows) {279 public IEnumerable<double> GetEstimatedVariance(IDataset dataset, IEnumerable<int> rows) { 280 280 var newX = AlglibUtil.PrepareAndScaleInputMatrix(dataset, allowedInputVariables, rows, inputScaling); 281 281 int newN = newX.GetLength(0); -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessRegression.cs
r12012 r12515 23 23 using System; 24 24 using System.Linq; 25 using HeuristicLab.Algorithms.GradientDescent;26 25 using HeuristicLab.Common; 27 26 using HeuristicLab.Core; 28 using HeuristicLab.Data;29 using HeuristicLab.Operators;30 27 using HeuristicLab.Optimization; 31 28 using HeuristicLab.Parameters; … … 39 36 /// </summary> 40 37 [Item("Gaussian Process Regression", "Gaussian process regression data analysis algorithm.")] 41 [Creatable( "Data Analysis")]38 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 160)] 42 39 [StorableClass] 43 40 public sealed class GaussianProcessRegression : GaussianProcessBase, IStorableContent { -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/IGaussianProcessModel.cs
r12012 r12515 34 34 double[] HyperparameterGradients { get; } 35 35 36 IEnumerable<double> GetEstimatedVariance( Dataset ds, IEnumerable<int> rows);36 IEnumerable<double> GetEstimatedVariance(IDataset ds, IEnumerable<int> rows); 37 37 void FixParameters(); 38 38 } -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Interfaces/INcaModel.cs
r12012 r12515 27 27 new INcaClassificationSolution CreateClassificationSolution(IClassificationProblemData problemData); 28 28 29 double[,] Reduce( Dataset dataset, IEnumerable<int> rows);29 double[,] Reduce(IDataset dataset, IEnumerable<int> rows); 30 30 } 31 31 } -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/AlglibUtil.cs
r12012 r12515 26 26 namespace HeuristicLab.Algorithms.DataAnalysis { 27 27 public static class AlglibUtil { 28 public static double[,] PrepareInputMatrix( Dataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) {28 public static double[,] PrepareInputMatrix(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) { 29 29 List<string> variablesList = variables.ToList(); 30 30 List<int> rowsList = rows.ToList(); … … 45 45 return matrix; 46 46 } 47 public static double[,] PrepareAndScaleInputMatrix( Dataset dataset, IEnumerable<string> variables, IEnumerable<int> rows, Scaling scaling) {47 public static double[,] PrepareAndScaleInputMatrix(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows, Scaling scaling) { 48 48 List<string> variablesList = variables.ToList(); 49 49 List<int> rowsList = rows.ToList(); -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearDiscriminantAnalysis.cs
r12012 r12515 37 37 /// </summary> 38 38 [Item("Linear Discriminant Analysis", "Linear discriminant analysis classification algorithm (wrapper for ALGLIB).")] 39 [Creatable( "Data Analysis")]39 [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 100)] 40 40 [StorableClass] 41 41 public sealed class LinearDiscriminantAnalysis : FixedDataAnalysisAlgorithm<IClassificationProblem> { … … 65 65 66 66 public static IClassificationSolution CreateLinearDiscriminantAnalysisSolution(IClassificationProblemData problemData) { 67 Datasetdataset = problemData.Dataset;67 var dataset = problemData.Dataset; 68 68 string targetVariable = problemData.TargetVariable; 69 69 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs
r12012 r12515 38 38 /// </summary> 39 39 [Item("Linear Regression", "Linear regression data analysis algorithm (wrapper for ALGLIB).")] 40 [Creatable( "Data Analysis")]40 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 100)] 41 41 [StorableClass] 42 42 public sealed class LinearRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> { … … 69 69 70 70 public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) { 71 Datasetdataset = problemData.Dataset;71 var dataset = problemData.Dataset; 72 72 string targetVariable = problemData.TargetVariable; 73 73 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassification.cs
r12012 r12515 35 35 /// </summary> 36 36 [Item("Multinomial Logit Classification", "Multinomial logit classification data analysis algorithm (wrapper for ALGLIB).")] 37 [Creatable( "Data Analysis")]37 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 180)] 38 38 [StorableClass] 39 39 public sealed class MultiNomialLogitClassification : FixedDataAnalysisAlgorithm<IClassificationProblem> { … … 66 66 67 67 public static IClassificationSolution CreateLogitClassificationSolution(IClassificationProblemData problemData, out double rmsError, out double relClassError) { 68 Datasetdataset = problemData.Dataset;68 var dataset = problemData.Dataset; 69 69 string targetVariable = problemData.TargetVariable; 70 70 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitModel.cs
r12012 r12515 82 82 } 83 83 84 public IEnumerable<double> GetEstimatedClassValues( Dataset dataset, IEnumerable<int> rows) {84 public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 85 85 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 86 86 -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/Scaling.cs
r12012 r12515 41 41 scalingParameters.Add(pair.Key, Tuple.Create(pair.Value.Item1, pair.Value.Item2)); 42 42 } 43 public Scaling( Dataset ds, IEnumerable<string> variables, IEnumerable<int> rows) {43 public Scaling(IDataset ds, IEnumerable<string> variables, IEnumerable<int> rows) { 44 44 foreach (var variable in variables) { 45 45 var values = ds.GetDoubleValues(variable, rows); … … 54 54 } 55 55 56 public IEnumerable<double> GetScaledValues( Dataset ds, string variable, IEnumerable<int> rows) {56 public IEnumerable<double> GetScaledValues(IDataset ds, string variable, IEnumerable<int> rows) { 57 57 double min = scalingParameters[variable].Item1; 58 58 double max = scalingParameters[variable].Item2; -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaAlgorithm.cs
r12012 r12515 43 43 with additional regularizations described in Z. Yang, J. Laaksonen. 2007. 44 44 Regularized Neighborhood Component Analysis. Lecture Notes in Computer Science, 4522. pp. 253-262.")] 45 [Creatable( "Data Analysis")]45 [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 170)] 46 46 [StorableClass] 47 47 public sealed class NcaAlgorithm : EngineAlgorithm { -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaModel.cs
r12012 r12515 56 56 this.classValues = (double[])original.classValues.Clone(); 57 57 } 58 public NcaModel(int k, double[,] transformationMatrix, Dataset dataset, IEnumerable<int> rows, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues) {58 public NcaModel(int k, double[,] transformationMatrix, IDataset dataset, IEnumerable<int> rows, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues) { 59 59 Name = ItemName; 60 60 Description = ItemDescription; … … 72 72 } 73 73 74 public IEnumerable<double> GetEstimatedClassValues( Dataset dataset, IEnumerable<int> rows) {74 public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 75 75 var ds = ReduceDataset(dataset, rows); 76 76 return nnModel.GetEstimatedClassValues(ds, Enumerable.Range(0, ds.Rows)); … … 85 85 } 86 86 87 public double[,] Reduce( Dataset dataset, IEnumerable<int> rows) {87 public double[,] Reduce(IDataset dataset, IEnumerable<int> rows) { 88 88 var data = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 89 89 … … 100 100 } 101 101 102 public Dataset ReduceDataset( Dataset dataset, IEnumerable<int> rows) {102 public Dataset ReduceDataset(IDataset dataset, IEnumerable<int> rows) { 103 103 return new Dataset(Enumerable 104 104 .Range(0, transformationMatrix.GetLength(1)) -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourClassification.cs
r12012 r12515 35 35 /// </summary> 36 36 [Item("Nearest Neighbour Classification", "Nearest neighbour classification data analysis algorithm (wrapper for ALGLIB).")] 37 [Creatable( "Data Analysis")]37 [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 150)] 38 38 [StorableClass] 39 39 public sealed class NearestNeighbourClassification : FixedDataAnalysisAlgorithm<IClassificationProblem> { -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs
r12012 r12515 96 96 this.classValues = (double[])original.classValues.Clone(); 97 97 } 98 public NearestNeighbourModel( Dataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues = null) {98 public NearestNeighbourModel(IDataset dataset, IEnumerable<int> rows, int k, string targetVariable, IEnumerable<string> allowedInputVariables, double[] classValues = null) { 99 99 Name = ItemName; 100 100 Description = ItemDescription; … … 135 135 } 136 136 137 public IEnumerable<double> GetEstimatedValues( Dataset dataset, IEnumerable<int> rows) {137 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 138 138 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 139 139 … … 163 163 } 164 164 165 public IEnumerable<double> GetEstimatedClassValues( Dataset dataset, IEnumerable<int> rows) {165 public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 166 166 if (classValues == null) throw new InvalidOperationException("No class values are defined."); 167 167 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourRegression.cs
r12012 r12515 34 34 /// </summary> 35 35 [Item("Nearest Neighbour Regression", "Nearest neighbour regression data analysis algorithm (wrapper for ALGLIB).")] 36 [Creatable( "Data Analysis")]36 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 150)] 37 37 [StorableClass] 38 38 public sealed class NearestNeighbourRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> { -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkClassification.cs
r12012 r12515 36 36 /// </summary> 37 37 [Item("Neural Network Classification", "Neural network classification data analysis algorithm (wrapper for ALGLIB). Further documentation: http://www.alglib.net/dataanalysis/neuralnetworks.php")] 38 [Creatable( "Data Analysis")]38 [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 130)] 39 39 [StorableClass] 40 40 public sealed class NeuralNetworkClassification : FixedDataAnalysisAlgorithm<IClassificationProblem> { … … 179 179 public static IClassificationSolution CreateNeuralNetworkClassificationSolution(IClassificationProblemData problemData, int nLayers, int nHiddenNodes1, int nHiddenNodes2, double decay, int restarts, 180 180 out double rmsError, out double avgRelError, out double relClassError) { 181 Datasetdataset = problemData.Dataset;181 var dataset = problemData.Dataset; 182 182 string targetVariable = problemData.TargetVariable; 183 183 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleClassification.cs
r12012 r12515 36 36 /// </summary> 37 37 [Item("Neural Network Ensemble Classification", "Neural network ensemble classification data analysis algorithm (wrapper for ALGLIB). Further documentation: http://www.alglib.net/dataanalysis/mlpensembles.php")] 38 [Creatable( "Data Analysis")]38 [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 140)] 39 39 [StorableClass] 40 40 public sealed class NeuralNetworkEnsembleClassification : FixedDataAnalysisAlgorithm<IClassificationProblem> { … … 124 124 public NeuralNetworkEnsembleClassification() 125 125 : base() { 126 126 var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 127 127 (IntValue)new IntValue(0).AsReadOnly(), 128 128 (IntValue)new IntValue(1).AsReadOnly(), … … 165 165 public static IClassificationSolution CreateNeuralNetworkEnsembleClassificationSolution(IClassificationProblemData problemData, int ensembleSize, int nLayers, int nHiddenNodes1, int nHiddenNodes2, double decay, int restarts, 166 166 out double rmsError, out double avgRelError, out double relClassError) { 167 Datasetdataset = problemData.Dataset;167 var dataset = problemData.Dataset; 168 168 string targetVariable = problemData.TargetVariable; 169 169 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleModel.cs
r12012 r12515 86 86 } 87 87 88 public IEnumerable<double> GetEstimatedValues( Dataset dataset, IEnumerable<int> rows) {88 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 89 89 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 90 90 … … 103 103 } 104 104 105 public IEnumerable<double> GetEstimatedClassValues( Dataset dataset, IEnumerable<int> rows) {105 public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 106 106 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 107 107 -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleRegression.cs
r12012 r12515 36 36 /// </summary> 37 37 [Item("Neural Network Ensemble Regression", "Neural network ensemble regression data analysis algorithm (wrapper for ALGLIB). Further documentation: http://www.alglib.net/dataanalysis/mlpensembles.php")] 38 [Creatable( "Data Analysis")]38 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 140)] 39 39 [StorableClass] 40 40 public sealed class NeuralNetworkEnsembleRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> { … … 124 124 public NeuralNetworkEnsembleRegression() 125 125 : base() { 126 126 var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 127 127 (IntValue)new IntValue(0).AsReadOnly(), 128 128 (IntValue)new IntValue(1).AsReadOnly(), … … 164 164 public static IRegressionSolution CreateNeuralNetworkEnsembleRegressionSolution(IRegressionProblemData problemData, int ensembleSize, int nLayers, int nHiddenNodes1, int nHiddenNodes2, double decay, int restarts, 165 165 out double rmsError, out double avgRelError) { 166 Datasetdataset = problemData.Dataset;166 var dataset = problemData.Dataset; 167 167 string targetVariable = problemData.TargetVariable; 168 168 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkModel.cs
r12012 r12515 94 94 } 95 95 96 public IEnumerable<double> GetEstimatedValues( Dataset dataset, IEnumerable<int> rows) {96 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 97 97 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 98 98 … … 111 111 } 112 112 113 public IEnumerable<double> GetEstimatedClassValues( Dataset dataset, IEnumerable<int> rows) {113 public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 114 114 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 115 115 -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkRegression.cs
r12012 r12515 36 36 /// </summary> 37 37 [Item("Neural Network Regression", "Neural network regression data analysis algorithm (wrapper for ALGLIB). Further documentation: http://www.alglib.net/dataanalysis/neuralnetworks.php")] 38 [Creatable( "Data Analysis")]38 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 130)] 39 39 [StorableClass] 40 40 public sealed class NeuralNetworkRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> { … … 180 180 public static IRegressionSolution CreateNeuralNetworkRegressionSolution(IRegressionProblemData problemData, int nLayers, int nHiddenNodes1, int nHiddenNodes2, double decay, int restarts, 181 181 out double rmsError, out double avgRelError) { 182 Datasetdataset = problemData.Dataset;182 var dataset = problemData.Dataset; 183 183 string targetVariable = problemData.TargetVariable; 184 184 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassification.cs
r12012 r12515 20 20 #endregion 21 21 22 using System;23 using System.Collections.Generic;24 using System.Linq;25 22 using HeuristicLab.Common; 26 23 using HeuristicLab.Core; … … 36 33 /// </summary> 37 34 [Item("Random Forest Classification", "Random forest classification data analysis algorithm (wrapper for ALGLIB).")] 38 [Creatable( "Data Analysis")]35 [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 120)] 39 36 [StorableClass] 40 37 public sealed class RandomForestClassification : FixedDataAnalysisAlgorithm<IClassificationProblem> { -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs
r12012 r12515 129 129 } 130 130 131 public IEnumerable<double> GetEstimatedValues( Dataset dataset, IEnumerable<int> rows) {131 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 132 132 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows); 133 133 AssertInputMatrix(inputData); … … 147 147 } 148 148 149 public IEnumerable<double> GetEstimatedClassValues( Dataset dataset, IEnumerable<int> rows) {149 public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 150 150 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows); 151 151 AssertInputMatrix(inputData); … … 205 205 outOfBagRmsError = rep.oobrmserror; 206 206 207 return new RandomForestModel(dForest, seed, problemData,nTrees, r, m);207 return new RandomForestModel(dForest, seed, problemData, nTrees, r, m); 208 208 } 209 209 … … 242 242 outOfBagRelClassificationError = rep.oobrelclserror; 243 243 244 return new RandomForestModel(dForest, seed, problemData,nTrees, r, m, classValues);244 return new RandomForestModel(dForest, seed, problemData, nTrees, r, m, classValues); 245 245 } 246 246 -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegression.cs
r12012 r12515 20 20 #endregion 21 21 22 using System;23 using System.Collections.Generic;24 using System.Linq;25 22 using HeuristicLab.Common; 26 23 using HeuristicLab.Core; … … 36 33 /// </summary> 37 34 [Item("Random Forest Regression", "Random forest regression data analysis algorithm (wrapper for ALGLIB).")] 38 [Creatable( "Data Analysis")]35 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 120)] 39 36 [StorableClass] 40 37 public sealed class RandomForestRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> { -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestUtil.cs
r12012 r12515 90 90 91 91 public static class RandomForestUtil { 92 private static readonly object locker = new object();93 94 92 private static void CrossValidate(IRegressionProblemData problemData, Tuple<IEnumerable<int>, IEnumerable<int>>[] partitions, int nTrees, double r, double m, int seed, out double avgTestMse) { 95 93 avgTestMse = 0; … … 132 130 } 133 131 134 // grid search without cross-validation since in the case of random forests, the out-of-bag estimate is unbiased 132 /// <summary> 133 /// Grid search without crossvalidation (since for random forests the out-of-bag estimate is unbiased) 134 /// </summary> 135 /// <param name="problemData">The regression problem data</param> 136 /// <param name="parameterRanges">The ranges for each parameter in the grid search</param> 137 /// <param name="seed">The random seed (required by the random forest model)</param> 138 /// <param name="maxDegreeOfParallelism">The maximum allowed number of threads (to parallelize the grid search)</param> 135 139 public static RFParameter GridSearch(IRegressionProblemData problemData, Dictionary<string, IEnumerable<double>> parameterRanges, int seed = 12345, int maxDegreeOfParallelism = 1) { 136 140 var setters = parameterRanges.Keys.Select(GenerateSetter).ToList(); … … 139 143 RFParameter bestParameters = new RFParameter(); 140 144 145 var locker = new object(); 141 146 Parallel.ForEach(crossProduct, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, parameterCombination => { 142 147 var parameterValues = parameterCombination.ToList(); … … 156 161 } 157 162 163 /// <summary> 164 /// Grid search without crossvalidation (since for random forests the out-of-bag estimate is unbiased) 165 /// </summary> 166 /// <param name="problemData">The classification problem data</param> 167 /// <param name="parameterRanges">The ranges for each parameter in the grid search</param> 168 /// <param name="seed">The random seed (required by the random forest model)</param> 169 /// <param name="maxDegreeOfParallelism">The maximum allowed number of threads (to parallelize the grid search)</param> 158 170 public static RFParameter GridSearch(IClassificationProblemData problemData, Dictionary<string, IEnumerable<double>> parameterRanges, int seed = 12345, int maxDegreeOfParallelism = 1) { 159 171 var setters = parameterRanges.Keys.Select(GenerateSetter).ToList(); … … 163 175 RFParameter bestParameters = new RFParameter(); 164 176 177 var locker = new object(); 165 178 Parallel.ForEach(crossProduct, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, parameterCombination => { 166 179 var parameterValues = parameterCombination.ToList(); … … 181 194 } 182 195 196 /// <summary> 197 /// Grid search with crossvalidation 198 /// </summary> 199 /// <param name="problemData">The regression problem data</param> 200 /// <param name="numberOfFolds">The number of folds for crossvalidation</param> 201 /// <param name="shuffleFolds">Specifies whether the folds should be shuffled</param> 202 /// <param name="parameterRanges">The ranges for each parameter in the grid search</param> 203 /// <param name="seed">The random seed (required by the random forest model)</param> 204 /// <param name="maxDegreeOfParallelism">The maximum allowed number of threads (to parallelize the grid search)</param> 205 /// <returns>The best parameter values found by the grid search</returns> 183 206 public static RFParameter GridSearch(IRegressionProblemData problemData, int numberOfFolds, bool shuffleFolds, Dictionary<string, IEnumerable<double>> parameterRanges, int seed = 12345, int maxDegreeOfParallelism = 1) { 184 207 DoubleValue mse = new DoubleValue(Double.MaxValue); … … 189 212 var crossProduct = parameterRanges.Values.CartesianProduct(); 190 213 214 var locker = new object(); 191 215 Parallel.ForEach(crossProduct, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, parameterCombination => { 192 216 var parameterValues = parameterCombination.ToList(); … … 208 232 } 209 233 234 /// <summary> 235 /// Grid search with crossvalidation 236 /// </summary> 237 /// <param name="problemData">The classification problem data</param> 238 /// <param name="numberOfFolds">The number of folds for crossvalidation</param> 239 /// <param name="shuffleFolds">Specifies whether the folds should be shuffled</param> 240 /// <param name="parameterRanges">The ranges for each parameter in the grid search</param> 241 /// <param name="seed">The random seed (for shuffling)</param> 242 /// <param name="maxDegreeOfParallelism">The maximum allowed number of threads (to parallelize the grid search)</param> 210 243 public static RFParameter GridSearch(IClassificationProblemData problemData, int numberOfFolds, bool shuffleFolds, Dictionary<string, IEnumerable<double>> parameterRanges, int seed = 12345, int maxDegreeOfParallelism = 1) { 211 244 DoubleValue accuracy = new DoubleValue(0); … … 216 249 var partitions = GenerateRandomForestPartitions(problemData, numberOfFolds, shuffleFolds); 217 250 251 var locker = new object(); 218 252 Parallel.ForEach(crossProduct, new ParallelOptions { MaxDegreeOfParallelism = maxDegreeOfParallelism }, parameterCombination => { 219 253 var parameterValues = parameterCombination.ToList(); -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorClassification.cs
r12012 r12515 37 37 /// </summary> 38 38 [Item("Support Vector Classification", "Support vector machine classification data analysis algorithm (wrapper for libSVM).")] 39 [Creatable( "Data Analysis")]39 [Creatable(CreatableAttribute.Categories.DataAnalysisClassification, Priority = 110)] 40 40 [StorableClass] 41 41 public sealed class SupportVectorClassification : FixedDataAnalysisAlgorithm<IClassificationProblem> { … … 149 149 public static SupportVectorClassificationSolution CreateSupportVectorClassificationSolution(IClassificationProblemData problemData, IEnumerable<string> allowedInputVariables, 150 150 int svmType, int kernelType, double cost, double nu, double gamma, int degree, out double trainingAccuracy, out double testAccuracy, out int nSv) { 151 Datasetdataset = problemData.Dataset;151 var dataset = problemData.Dataset; 152 152 string targetVariable = problemData.TargetVariable; 153 153 IEnumerable<int> rows = problemData.TrainingIndices; -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorMachineModel.cs
r12012 r12515 120 120 121 121 #region IRegressionModel Members 122 public IEnumerable<double> GetEstimatedValues( Dataset dataset, IEnumerable<int> rows) {122 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 123 123 return GetEstimatedValuesHelper(dataset, rows); 124 124 } … … 132 132 133 133 #region IClassificationModel Members 134 public IEnumerable<double> GetEstimatedClassValues( Dataset dataset, IEnumerable<int> rows) {134 public IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 135 135 if (classValues == null) throw new NotSupportedException(); 136 136 // return the original class value instead of the predicted value of the model … … 159 159 } 160 160 #endregion 161 private IEnumerable<double> GetEstimatedValuesHelper( Dataset dataset, IEnumerable<int> rows) {161 private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) { 162 162 // calculate predictions for the currently requested rows 163 163 svm_problem problem = SupportVectorMachineUtil.CreateSvmProblem(dataset, targetVariable, allowedInputVariables, rows); -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorMachineUtil.cs
r12012 r12515 40 40 /// <param name="rowIndices">The rows of the dataset that should be contained in the resulting SVM-problem</param> 41 41 /// <returns>A problem data type that can be used to train a support vector machine.</returns> 42 public static svm_problem CreateSvmProblem( Dataset dataset, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<int> rowIndices) {42 public static svm_problem CreateSvmProblem(IDataset dataset, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<int> rowIndices) { 43 43 double[] targetVector = dataset.GetDoubleValues(targetVariable, rowIndices).ToArray(); 44 44 svm_node[][] nodes = new svm_node[targetVector.Length][]; -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorRegression.cs
r12012 r12515 37 37 /// </summary> 38 38 [Item("Support Vector Regression", "Support vector machine regression data analysis algorithm (wrapper for libSVM).")] 39 [Creatable( "Data Analysis")]39 [Creatable(CreatableAttribute.Categories.DataAnalysisRegression, Priority = 110)] 40 40 [StorableClass] 41 41 public sealed class SupportVectorRegression : FixedDataAnalysisAlgorithm<IRegressionProblem> { … … 152 152 string svmType, string kernelType, double cost, double nu, double gamma, double epsilon, int degree, 153 153 out double trainingR2, out double testR2, out int nSv) { 154 Datasetdataset = problemData.Dataset;154 var dataset = problemData.Dataset; 155 155 string targetVariable = problemData.TargetVariable; 156 156 IEnumerable<int> rows = problemData.TrainingIndices; -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TimeSeries/AutoregressiveModeling.cs
r12012 r12515 35 35 namespace HeuristicLab.Algorithms.DataAnalysis.TimeSeries { 36 36 [Item("Autoregressive Modeling", "Timeseries modeling algorithm that creates AR-N models.")] 37 [Creatable( "Data Analysis")]37 [Creatable(CreatableAttribute.Categories.DataAnalysis, Priority = 130)] 38 38 [StorableClass] 39 39 public class AutoregressiveModeling : FixedDataAnalysisAlgorithm<ITimeSeriesPrognosisProblem> { -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClustering.cs
r12012 r12515 36 36 /// </summary> 37 37 [Item("k-Means", "The k-Means clustering algorithm (wrapper for ALGLIB).")] 38 [Creatable( "Data Analysis")]38 [Creatable(CreatableAttribute.Categories.DataAnalysis, Priority = 10)] 39 39 [StorableClass] 40 40 public sealed class KMeansClustering : FixedDataAnalysisAlgorithm<IClusteringProblem> { … … 83 83 84 84 public static KMeansClusteringSolution CreateKMeansSolution(IClusteringProblemData problemData, int k, int restarts) { 85 Datasetdataset = problemData.Dataset;85 var dataset = problemData.Dataset; 86 86 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 87 87 IEnumerable<int> rows = problemData.TrainingIndices; -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClusteringModel.cs
r12012 r12515 81 81 82 82 83 public IEnumerable<int> GetClusterValues( Dataset dataset, IEnumerable<int> rows) {83 public IEnumerable<int> GetClusterValues(IDataset dataset, IEnumerable<int> rows) { 84 84 return KMeansClusteringUtil.FindClosestCenters(centers, dataset, allowedInputVariables, rows); 85 85 } -
branches/HiveStatistics/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClusteringUtil.cs
r12012 r12515 27 27 namespace HeuristicLab.Algorithms.DataAnalysis { 28 28 public static class KMeansClusteringUtil { 29 public static IEnumerable<int> FindClosestCenters(IEnumerable<double[]> centers, Dataset dataset, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows) {29 public static IEnumerable<int> FindClosestCenters(IEnumerable<double[]> centers, IDataset dataset, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows) { 30 30 int nRows = rows.Count(); 31 31 int nCols = allowedInputVariables.Count(); … … 58 58 } 59 59 60 public static double CalculateIntraClusterSumOfSquares(KMeansClusteringModel model, Dataset dataset, IEnumerable<int> rows) {60 public static double CalculateIntraClusterSumOfSquares(KMeansClusteringModel model, IDataset dataset, IEnumerable<int> rows) { 61 61 List<int> clusterValues = model.GetClusterValues(dataset, rows).ToList(); 62 62 List<string> allowedInputVariables = model.AllowedInputVariables.ToList();
Note: See TracChangeset
for help on using the changeset viewer.