Changeset 16158 for branches/2883_GBTModelStorage
- Timestamp:
- 09/20/18 13:12:17 (6 years ago)
- Location:
- branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis
- Files:
-
- 1 deleted
- 25 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis
- Property svn:mergeinfo changed
-
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4
- Property svn:ignore
-
old new 1 *.user 2 *.vs10x 3 HeuristicLab.Algorithms.DataAnalysis-3.4.csproj.user 4 HeuristicLabAlgorithmsDataAnalysisPlugin.cs 5 Plugin.cs 1 6 bin 2 7 obj 3 HeuristicLabAlgorithmsDataAnalysisPlugin.cs4 HeuristicLab.Algorithms.DataAnalysis-3.4.csproj.user5 *.vs10x6 Plugin.cs7 *.user
-
- Property svn:mergeinfo changed
- Property svn:ignore
-
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/GBM/GradientBoostingRegressionAlgorithm.cs
r15583 r16158 210 210 protected override void Run(CancellationToken cancellationToken) { 211 211 // Set up the algorithm 212 if (SetSeedRandomly) Seed = new System.Random().Next();212 if (SetSeedRandomly) Seed = RandomSeedGenerator.GetSeed(); 213 213 var rand = new MersenneTwister((uint)Seed); 214 214 … … 258 258 259 259 modifiableDataset.RemoveVariable(targetVarName); 260 modifiableDataset.AddVariable(targetVarName, curY.Concat(curYTest) );260 modifiableDataset.AddVariable(targetVarName, curY.Concat(curYTest).ToList()); 261 261 262 262 SampleTrainingData(rand, modifiableDataset, rRows, problemData.Dataset, curY, problemData.TargetVariable, problemData.TrainingIndices); // all training indices from the original problem data are allowed -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/GradientBoostedTrees/GradientBoostedTreesAlgorithm.cs
r15732 r16158 223 223 protected override void Run(CancellationToken cancellationToken) { 224 224 // Set up the algorithm 225 if (SetSeedRandomly) Seed = new System.Random().Next();225 if (SetSeedRandomly) Seed = Random.RandomSeedGenerator.GetSeed(); 226 226 227 227 // Set up the results display -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r15687 r16158 223 223 <SubType>Code</SubType> 224 224 </Compile> 225 <Compile Include="DoubleArrayExtensions.cs" /> 225 226 <Compile Include="FixedDataAnalysisAlgorithm.cs" /> 226 227 <Compile Include="GaussianProcess\CovarianceFunctions\CovarianceSpectralMixture.cs" /> -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearDiscriminantAnalysis.cs
r15583 r16158 80 80 inputMatrix = factorMatrix.HorzCat(inputMatrix); 81 81 82 if (inputMatrix.C ast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))82 if (inputMatrix.ContainsNanOrInfinity()) 83 83 throw new NotSupportedException("Linear discriminant analysis does not support NaN or infinity values in the input dataset."); 84 84 -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs
r15583 r16158 80 80 var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix); 81 81 82 if (inputMatrix.C ast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))82 if (inputMatrix.ContainsNanOrInfinity()) 83 83 throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); 84 84 … … 100 100 int nVarCoeff = doubleVariables.Count(); 101 101 var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(), 102 doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(), 102 doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(), 103 103 @const: coefficients[nFeatures]); 104 104 105 105 SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone()); 106 106 solution.Model.Name = "Linear Regression Model"; -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassification.cs
r15583 r16158 78 78 inputMatrix = factorMatrix.HorzCat(inputMatrix); 79 79 80 if (inputMatrix.C ast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))80 if (inputMatrix.ContainsNanOrInfinity()) 81 81 throw new NotSupportedException("Multinomial logit classification does not support NaN or infinity values in the input dataset."); 82 82 -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaModel.cs
r15583 r16158 65 65 66 66 var ds = ReduceDataset(dataset, rows); 67 nnModel = new NearestNeighbourModel(ds, Enumerable.Range(0, ds.Rows), k, ds.VariableNames.Last(), ds.VariableNames.Take(transformationMatrix.GetLength(1)), classValues );67 nnModel = new NearestNeighbourModel(ds, Enumerable.Range(0, ds.Rows), k, ds.VariableNames.Last(), ds.VariableNames.Take(transformationMatrix.GetLength(1)), classValues: classValues); 68 68 } 69 69 -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs
r15583 r16158 130 130 // automatic determination of weights (all features should have variance = 1) 131 131 this.weights = this.allowedInputVariables 132 .Select(name => 1.0 / dataset.GetDoubleValues(name, rows).StandardDeviationPop()) 132 .Select(name => { 133 var pop = dataset.GetDoubleValues(name, rows).StandardDeviationPop(); 134 return pop.IsAlmost(0) ? 1.0 : 1.0/pop; 135 }) 133 136 .Concat(new double[] { 1.0 }) // no scaling for target variable 134 137 .ToArray(); … … 142 145 } 143 146 144 if (inputMatrix.C ast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))147 if (inputMatrix.ContainsNanOrInfinity()) 145 148 throw new NotSupportedException( 146 149 "Nearest neighbour model does not support NaN or infinity values in the input dataset."); -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkClassification.cs
r15583 r16158 115 115 public NeuralNetworkClassification() 116 116 : base() { 117 var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 118 (IntValue)new IntValue(0).AsReadOnly(), 119 (IntValue)new IntValue(1).AsReadOnly(), 117 var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 118 (IntValue)new IntValue(0).AsReadOnly(), 119 (IntValue)new IntValue(1).AsReadOnly(), 120 120 (IntValue)new IntValue(2).AsReadOnly() }); 121 121 var selectedHiddenLayerValue = (from v in validHiddenLayerValues … … 185 185 IEnumerable<int> rows = problemData.TrainingIndices; 186 186 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows); 187 if (inputMatrix.C ast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))187 if (inputMatrix.ContainsNanOrInfinity()) 188 188 throw new NotSupportedException("Neural network classification does not support NaN or infinity values in the input dataset."); 189 189 -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleClassification.cs
r15583 r16158 171 171 IEnumerable<int> rows = problemData.TrainingIndices; 172 172 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows); 173 if (inputMatrix.C ast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))173 if (inputMatrix.ContainsNanOrInfinity()) 174 174 throw new NotSupportedException("Neural network ensemble classification does not support NaN or infinity values in the input dataset."); 175 175 -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleModel.cs
r15583 r16158 102 102 x[column] = inputData[row, column]; 103 103 } 104 alglib.mlpeprocess(mlpEnsemble, x, ref y); 104 // mlpeprocess writes data in mlpEnsemble and is therefore not thread-safe 105 lock (mlpEnsemble) { 106 alglib.mlpeprocess(mlpEnsemble, x, ref y); 107 } 105 108 yield return y[0]; 106 109 } … … 119 122 x[column] = inputData[row, column]; 120 123 } 121 alglib.mlpeprocess(mlpEnsemble, x, ref y); 124 // mlpeprocess writes data in mlpEnsemble and is therefore not thread-safe 125 lock (mlpEnsemble) { 126 alglib.mlpeprocess(mlpEnsemble, x, ref y); 127 } 122 128 // find class for with the largest probability value 123 129 int maxProbClassIndex = 0; -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleRegression.cs
r15583 r16158 125 125 public NeuralNetworkEnsembleRegression() 126 126 : base() { 127 var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 128 (IntValue)new IntValue(0).AsReadOnly(), 129 (IntValue)new IntValue(1).AsReadOnly(), 127 var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 128 (IntValue)new IntValue(0).AsReadOnly(), 129 (IntValue)new IntValue(1).AsReadOnly(), 130 130 (IntValue)new IntValue(2).AsReadOnly() }); 131 131 var selectedHiddenLayerValue = (from v in validHiddenLayerValues … … 170 170 IEnumerable<int> rows = problemData.TrainingIndices; 171 171 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows); 172 if (inputMatrix.C ast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))172 if (inputMatrix.ContainsNanOrInfinity()) 173 173 throw new NotSupportedException("Neural network ensemble regression does not support NaN or infinity values in the input dataset."); 174 174 -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkModel.cs
r15583 r16158 106 106 x[column] = inputData[row, column]; 107 107 } 108 alglib.mlpprocess(multiLayerPerceptron, x, ref y); 108 // NOTE: mlpprocess changes data in multiLayerPerceptron and is therefore not thread-save! 109 lock (multiLayerPerceptron) { 110 alglib.mlpprocess(multiLayerPerceptron, x, ref y); 111 } 109 112 yield return y[0]; 110 113 } … … 112 115 113 116 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 114 double[,] inputData = dataset.ToArray( 117 double[,] inputData = dataset.ToArray(allowedInputVariables, rows); 115 118 116 119 int n = inputData.GetLength(0); … … 123 126 x[column] = inputData[row, column]; 124 127 } 125 alglib.mlpprocess(multiLayerPerceptron, x, ref y); 128 // NOTE: mlpprocess changes data in multiLayerPerceptron and is therefore not thread-save! 129 lock (multiLayerPerceptron) { 130 alglib.mlpprocess(multiLayerPerceptron, x, ref y); 131 } 126 132 // find class for with the largest probability value 127 133 int maxProbClassIndex = 0; -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkRegression.cs
r15583 r16158 115 115 public NeuralNetworkRegression() 116 116 : base() { 117 var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 118 (IntValue)new IntValue(0).AsReadOnly(), 119 (IntValue)new IntValue(1).AsReadOnly(), 117 var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 118 (IntValue)new IntValue(0).AsReadOnly(), 119 (IntValue)new IntValue(1).AsReadOnly(), 120 120 (IntValue)new IntValue(2).AsReadOnly() }); 121 121 var selectedHiddenLayerValue = (from v in validHiddenLayerValues … … 186 186 IEnumerable<int> rows = problemData.TrainingIndices; 187 187 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows); 188 if (inputMatrix.C ast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))188 if (inputMatrix.ContainsNanOrInfinity()) 189 189 throw new NotSupportedException("Neural network regression does not support NaN or infinity values in the input dataset."); 190 190 -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/NonlinearRegression/NonlinearRegression.cs
r15583 r16158 186 186 qualityTable.Rows.Add(testRMSERow); 187 187 Results.Add(new Result(qualityTable.Name, qualityTable.Name + " for all restarts", qualityTable)); 188 if (SetSeedRandomly) Seed = (new System.Random()).Next();188 if (SetSeedRandomly) Seed = RandomSeedGenerator.GetSeed(); 189 189 var rand = new MersenneTwister((uint)Seed); 190 190 bestSolution = CreateRegressionSolution(Problem.ProblemData, ModelStructure, Iterations, ApplyLinearScaling, rand); -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestClassification.cs
r15583 r16158 135 135 protected override void Run(CancellationToken cancellationToken) { 136 136 double rmsError, relClassificationError, outOfBagRmsError, outOfBagRelClassificationError; 137 if (SetSeedRandomly) Seed = new System.Random().Next();137 if (SetSeedRandomly) Seed = Random.RandomSeedGenerator.GetSeed(); 138 138 139 139 var model = CreateRandomForestClassificationModel(Problem.ProblemData, NumberOfTrees, R, M, Seed, out rmsError, out relClassificationError, out outOfBagRmsError, out outOfBagRelClassificationError); -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs
r15583 r16158 310 310 public static RandomForestModel CreateClassificationModel(IClassificationProblemData problemData, int nTrees, double r, double m, int seed, 311 311 out double rmsError, out double outOfBagRmsError, out double relClassificationError, out double outOfBagRelClassificationError) { 312 return CreateClassificationModel(problemData, problemData.TrainingIndices, nTrees, r, m, seed, 312 return CreateClassificationModel(problemData, problemData.TrainingIndices, nTrees, r, m, seed, 313 313 out rmsError, out outOfBagRmsError, out relClassificationError, out outOfBagRelClassificationError); 314 314 } … … 370 370 371 371 private static void AssertInputMatrix(double[,] inputMatrix) { 372 if (inputMatrix.C ast<double>().Any(x => Double.IsNaN(x) || Double.IsInfinity(x)))372 if (inputMatrix.ContainsNanOrInfinity()) 373 373 throw new NotSupportedException("Random forest modeling does not support NaN or infinity values in the input dataset."); 374 374 } -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestRegression.cs
r15583 r16158 134 134 protected override void Run(CancellationToken cancellationToken) { 135 135 double rmsError, avgRelError, outOfBagRmsError, outOfBagAvgRelError; 136 if (SetSeedRandomly) Seed = new System.Random().Next();136 if (SetSeedRandomly) Seed = Random.RandomSeedGenerator.GetSeed(); 137 137 var model = CreateRandomForestRegressionModel(Problem.ProblemData, NumberOfTrees, R, M, Seed, 138 138 out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError); -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorMachineModel.cs
r15583 r16158 155 155 private IEnumerable<double> GetEstimatedValuesHelper(IDataset dataset, IEnumerable<int> rows) { 156 156 // calculate predictions for the currently requested rows 157 svm_problem problem = SupportVectorMachineUtil.CreateSvmProblem(dataset, TargetVariable,allowedInputVariables, rows);157 svm_problem problem = SupportVectorMachineUtil.CreateSvmProblem(dataset, allowedInputVariables, rows); 158 158 svm_problem scaledProblem = rangeTransform.Scale(problem); 159 159 -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorMachineUtil.cs
r15583 r16158 35 35 public class SupportVectorMachineUtil { 36 36 /// <summary> 37 /// Transforms <paramref name="problemData"/> into a data structure as needed by libSVM. 38 /// </summary> 39 /// <param name="problemData">The problem data to transform</param> 37 /// Transforms <paramref name="dataset"/> into a data structure as needed by libSVM. 38 /// </summary> 39 /// <param name="dataset">The source dataset</param> 40 /// <param name="targetVariable">The target variable</param> 41 /// <param name="inputVariables">The selected input variables to include in the svm_problem.</param> 40 42 /// <param name="rowIndices">The rows of the dataset that should be contained in the resulting SVM-problem</param> 41 43 /// <returns>A problem data type that can be used to train a support vector machine.</returns> 42 44 public static svm_problem CreateSvmProblem(IDataset dataset, string targetVariable, IEnumerable<string> inputVariables, IEnumerable<int> rowIndices) { 43 double[] targetVector = dataset.GetDoubleValues(targetVariable, rowIndices).ToArray(); 44 svm_node[][] nodes = new svm_node[targetVector.Length][]; 45 double[] targetVector ; 46 var nRows = rowIndices.Count(); 47 if (string.IsNullOrEmpty(targetVariable)) { 48 // if the target variable is not set (e.g. for prediction of a trained model) we just use a zero vector 49 targetVector = new double[nRows]; 50 } else { 51 targetVector = dataset.GetDoubleValues(targetVariable, rowIndices).ToArray(); 52 } 53 svm_node[][] nodes = new svm_node[nRows][]; 45 54 int maxNodeIndex = 0; 46 55 int svmProblemRowIndex = 0; … … 66 75 67 76 /// <summary> 77 /// Transforms <paramref name="dataset"/> into a data structure as needed by libSVM for prediction. 78 /// </summary> 79 /// <param name="dataset">The problem data to transform</param> 80 /// <param name="inputVariables">The selected input variables to include in the svm_problem.</param> 81 /// <param name="rowIndices">The rows of the dataset that should be contained in the resulting SVM-problem</param> 82 /// <returns>A problem data type that can be used for prediction with a trained support vector machine.</returns> 83 public static svm_problem CreateSvmProblem(IDataset dataset, IEnumerable<string> inputVariables, IEnumerable<int> rowIndices) { 84 // for prediction we don't need a target variable 85 return CreateSvmProblem(dataset, string.Empty, inputVariables, rowIndices); 86 } 87 88 /// <summary> 68 89 /// Instantiate and return a svm_parameter object with default values. 69 90 /// </summary> -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/TSNE/TSNEAlgorithm.cs
r15583 r16158 275 275 if (wdist != null) wdist.Initialize(problemData); 276 276 if (state == null) { 277 if (SetSeedRandomly) Seed = new System.Random().Next();277 if (SetSeedRandomly) Seed = RandomSeedGenerator.GetSeed(); 278 278 var random = new MersenneTwister((uint)Seed); 279 279 var dataset = problemData.Dataset; -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/TimeSeries/AutoregressiveModeling.cs
r15583 r16158 26 26 using HeuristicLab.Core; 27 27 using HeuristicLab.Data; 28 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;29 28 using HeuristicLab.Optimization; 30 29 using HeuristicLab.Parameters; … … 97 96 inputMatrix[i, timeOffset] = targetValues[i + problemData.TrainingPartition.Start]; 98 97 99 if (inputMatrix.C ast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))98 if (inputMatrix.ContainsNanOrInfinity()) 100 99 throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); 101 100 -
branches/2883_GBTModelStorage/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClustering.cs
r15583 r16158 91 91 int[] xyc; 92 92 double[,] inputMatrix = dataset.ToArray(allowedInputVariables, rows); 93 if (inputMatrix.C ast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))93 if (inputMatrix.ContainsNanOrInfinity()) 94 94 throw new NotSupportedException("k-Means clustering does not support NaN or infinity values in the input dataset."); 95 95
Note: See TracChangeset
for help on using the changeset viewer.