Changeset 14400 for trunk/sources/HeuristicLab.Algorithms.DataAnalysis
- Timestamp:
- 11/17/16 15:41:33 (8 years ago)
- Location:
- trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4
- Files:
-
- 21 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/GaussianProcess/GaussianProcessModel.cs
r14393 r14400 165 165 try { 166 166 CalculateModel(ds, rows, scaleInputs); 167 } catch (alglib.alglibexception ae) { 167 } 168 catch (alglib.alglibexception ae) { 168 169 // wrap exception so that calling code doesn't have to know about alglib implementation 169 170 throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae); … … 259 260 private static double[,] GetData(IDataset ds, IEnumerable<string> allowedInputs, IEnumerable<int> rows, Scaling scaling) { 260 261 if (scaling != null) { 261 // TODO: completely remove Scaling class 262 List<ITransformation<double>> transformations = new List<ITransformation<double>>(); 263 264 foreach (var varName in allowedInputs) { 265 double min; 266 double max; 267 scaling.GetScalingParameters(varName, out min, out max); 268 var add = -min / (max - min); 269 var mult = 1.0 / (max - min); 270 transformations.Add(new LinearTransformation(allowedInputs) { Addend = add, Multiplier = mult }); 271 } 272 return ds.ToArray(allowedInputs, transformations, rows); 262 return AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputs, rows, scaling); 273 263 } else { 274 return ds.ToArray(allowedInputs, rows);264 return AlglibUtil.PrepareInputMatrix(ds, allowedInputs, rows); 275 265 } 276 266 } … … 344 334 return Enumerable.Range(0, newN) 345 335 .Select(i => ms[i] + Util.ScalarProd(Ks[i], alpha)); 346 } catch (alglib.alglibexception ae) { 336 } 337 catch (alglib.alglibexception ae) { 347 338 // wrap exception so that calling code doesn't have to know about alglib implementation 348 339 throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae); … … 390 381 } 391 382 return kss; 392 } catch (alglib.alglibexception ae) { 383 } 384 catch (alglib.alglibexception ae) { 393 385 // wrap exception so that calling code doesn't have to know about alglib implementation 394 386 throw new ArgumentException("There was a problem in the calculation of the Gaussian process model", ae); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r14393 r14400 244 244 <SubType>Code</SubType> 245 245 </Compile> 246 <Compile Include="Linear\AlglibUtil.cs" /> 247 <Compile Include="Linear\Scaling.cs" /> 246 248 <Compile Include="Linear\LinearDiscriminantAnalysis.cs" /> 247 249 <Compile Include="Linear\LinearRegression.cs"> … … 251 253 <Compile Include="Linear\MultinomialLogitClassificationSolution.cs" /> 252 254 <Compile Include="Linear\MultinomialLogitModel.cs" /> 253 <Compile Include="Linear\Scaling.cs" />254 255 <Compile Include="MctsSymbolicRegression\Automaton.cs" /> 255 256 <Compile Include="MctsSymbolicRegression\CodeGenerator.cs" /> -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearDiscriminantAnalysis.cs
r14393 r14400 70 70 IEnumerable<int> rows = problemData.TrainingIndices; 71 71 int nClasses = problemData.ClassNames.Count(); 72 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);72 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 73 73 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 74 74 throw new NotSupportedException("Linear discriminant analysis does not support NaN or infinity values in the input dataset."); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs
r14393 r14400 73 73 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 74 74 IEnumerable<int> rows = problemData.TrainingIndices; 75 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);75 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 76 76 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 77 77 throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); … … 81 81 int nRows = inputMatrix.GetLength(0); 82 82 int nFeatures = inputMatrix.GetLength(1) - 1; 83 double[] coefficients ;83 double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant 84 84 85 85 int retVal = 1; … … 91 91 alglib.lrunpack(lm, out coefficients, out nFeatures); 92 92 93 var tree = LinearModelToTreeConverter.CreateTree(allowedInputVariables.ToArray(), 94 coefficients.Take(nFeatures).ToArray(), @const: coefficients[nFeatures]); 93 ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); 94 ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); 95 tree.Root.AddSubtree(startNode); 96 ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); 97 startNode.AddSubtree(addition); 98 99 int col = 0; 100 foreach (string column in allowedInputVariables) { 101 VariableTreeNode vNode = (VariableTreeNode)new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode(); 102 vNode.VariableName = column; 103 vNode.Weight = coefficients[col]; 104 addition.AddSubtree(vNode); 105 col++; 106 } 107 108 ConstantTreeNode cNode = (ConstantTreeNode)new Constant().CreateTreeNode(); 109 cNode.Value = coefficients[coefficients.Length - 1]; 110 addition.AddSubtree(cNode); 95 111 96 112 SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), (IRegressionProblemData)problemData.Clone()); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitClassification.cs
r14393 r14400 70 70 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 71 71 IEnumerable<int> rows = problemData.TrainingIndices; 72 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);72 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 73 73 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 74 74 throw new NotSupportedException("Multinomial logit classification does not support NaN or infinity values in the input dataset."); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/MultinomialLogitModel.cs
r14393 r14400 83 83 84 84 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 85 double[,] inputData = dataset.ToArray(allowedInputVariables, rows);85 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 86 86 87 87 int n = inputData.GetLength(0); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/Scaling.cs
r14393 r14400 29 29 30 30 namespace HeuristicLab.Algorithms.DataAnalysis { 31 [Obsolete("Use transformation classes in Problems.DataAnalysis instead")]32 31 [StorableClass] 33 32 [Item(Name = "Scaling", Description = "Contains information about scaling of variables for data-analysis algorithms.")] -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/Initialization/LdaInitializer.cs
r14393 r14400 44 44 var attributes = data.AllowedInputVariables.Count(); 45 45 46 var ldaDs = data.Dataset.ToArray(47 data.AllowedInputVariables.Concat(data.TargetVariable.ToEnumerable()),48 data.TrainingIndices);46 var ldaDs = AlglibUtil.PrepareInputMatrix(data.Dataset, 47 data.AllowedInputVariables.Concat(data.TargetVariable.ToEnumerable()), 48 data.TrainingIndices); 49 49 50 50 // map class values to sequential natural numbers (required by alglib) -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/Initialization/PcaInitializer.cs
r14393 r14400 44 44 var attributes = data.AllowedInputVariables.Count(); 45 45 46 var pcaDs = data.Dataset.ToArray(data.AllowedInputVariables, data.TrainingIndices);46 var pcaDs = AlglibUtil.PrepareInputMatrix(data.Dataset, data.AllowedInputVariables, data.TrainingIndices); 47 47 48 48 int info; -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaGradientCalculator.cs
r14393 r14400 99 99 } 100 100 101 var data = problemData.Dataset.ToArray(problemData.AllowedInputVariables,102 problemData.TrainingIndices);101 var data = AlglibUtil.PrepareInputMatrix(problemData.Dataset, problemData.AllowedInputVariables, 102 problemData.TrainingIndices); 103 103 var classes = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray(); 104 104 -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Nca/NcaModel.cs
r14393 r14400 86 86 87 87 public double[,] Reduce(IDataset dataset, IEnumerable<int> rows) { 88 var data = dataset.ToArray(allowedInputVariables, rows);88 var data = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 89 89 90 90 var targets = dataset.GetDoubleValues(TargetVariable, rows).ToArray(); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NearestNeighbour/NearestNeighbourModel.cs
r14393 r14400 119 119 if (IsCompatibilityLoaded) { 120 120 // no scaling 121 inputMatrix = dataset.ToArray(121 inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, 122 122 this.allowedInputVariables.Concat(new string[] { targetVariable }), 123 123 rows); … … 167 167 168 168 private static double[,] CreateScaledData(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows, double[] offsets, double[] factors) { 169 var transforms = 170 variables.Select( 171 (_, colIdx) => 172 new LinearTransformation(variables) { Addend = offsets[colIdx] * factors[colIdx], Multiplier = factors[colIdx] }); 173 return dataset.ToArray(variables, transforms, rows); 169 var x = new double[rows.Count(), variables.Count()]; 170 var colIdx = 0; 171 foreach (var variableName in variables) { 172 var rowIdx = 0; 173 foreach (var val in dataset.GetDoubleValues(variableName, rows)) { 174 x[rowIdx, colIdx] = (val + offsets[colIdx]) * factors[colIdx]; 175 rowIdx++; 176 } 177 colIdx++; 178 } 179 return x; 174 180 } 175 181 … … 181 187 double[,] inputData; 182 188 if (IsCompatibilityLoaded) { 183 inputData = dataset.ToArray(allowedInputVariables, rows);189 inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 184 190 } else { 185 191 inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights); … … 217 223 double[,] inputData; 218 224 if (IsCompatibilityLoaded) { 219 inputData = dataset.ToArray(allowedInputVariables, rows);225 inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 220 226 } else { 221 227 inputData = CreateScaledData(dataset, allowedInputVariables, rows, offsets, weights); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkClassification.cs
r14393 r14400 183 183 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 184 184 IEnumerable<int> rows = problemData.TrainingIndices; 185 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);185 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 186 186 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 187 187 throw new NotSupportedException("Neural network classification does not support NaN or infinity values in the input dataset."); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleClassification.cs
r14393 r14400 124 124 public NeuralNetworkEnsembleClassification() 125 125 : base() { 126 var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 127 (IntValue)new IntValue(0).AsReadOnly(), 128 (IntValue)new IntValue(1).AsReadOnly(), 126 var validHiddenLayerValues = new ItemSet<IntValue>(new IntValue[] { 127 (IntValue)new IntValue(0).AsReadOnly(), 128 (IntValue)new IntValue(1).AsReadOnly(), 129 129 (IntValue)new IntValue(2).AsReadOnly() }); 130 130 var selectedHiddenLayerValue = (from v in validHiddenLayerValues … … 169 169 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 170 170 IEnumerable<int> rows = problemData.TrainingIndices; 171 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);171 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 172 172 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 173 173 throw new NotSupportedException("Neural network ensemble classification does not support NaN or infinity values in the input dataset."); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleModel.cs
r14393 r14400 91 91 92 92 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 93 double[,] inputData = dataset.ToArray(allowedInputVariables, rows);93 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 94 94 95 95 int n = inputData.GetLength(0); … … 108 108 109 109 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 110 double[,] inputData = dataset.ToArray(allowedInputVariables, rows);110 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 111 111 112 112 int n = inputData.GetLength(0); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkEnsembleRegression.cs
r14393 r14400 168 168 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 169 169 IEnumerable<int> rows = problemData.TrainingIndices; 170 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);170 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 171 171 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 172 172 throw new NotSupportedException("Neural network ensemble regression does not support NaN or infinity values in the input dataset."); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkModel.cs
r14393 r14400 95 95 96 96 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 97 double[,] inputData = dataset.ToArray(allowedInputVariables, rows);97 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 98 98 99 99 int n = inputData.GetLength(0); … … 112 112 113 113 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 114 double[,] inputData = dataset.ToArray(allowedInputVariables, rows);114 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 115 115 116 116 int n = inputData.GetLength(0); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/NeuralNetwork/NeuralNetworkRegression.cs
r14393 r14400 184 184 IEnumerable<string> allowedInputVariables = problemData.AllowedInputVariables; 185 185 IEnumerable<int> rows = problemData.TrainingIndices; 186 double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);186 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 187 187 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 188 188 throw new NotSupportedException("Neural network regression does not support NaN or infinity values in the input dataset."); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/RandomForest/RandomForestModel.cs
r14393 r14400 139 139 140 140 public IEnumerable<double> GetEstimatedValues(IDataset dataset, IEnumerable<int> rows) { 141 double[,] inputData = dataset.ToArray(AllowedInputVariables, rows);141 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows); 142 142 AssertInputMatrix(inputData); 143 143 … … 157 157 158 158 public IEnumerable<double> GetEstimatedVariances(IDataset dataset, IEnumerable<int> rows) { 159 double[,] inputData = dataset.ToArray(AllowedInputVariables, rows);159 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows); 160 160 AssertInputMatrix(inputData); 161 161 … … 175 175 176 176 public override IEnumerable<double> GetEstimatedClassValues(IDataset dataset, IEnumerable<int> rows) { 177 double[,] inputData = dataset.ToArray(AllowedInputVariables, rows);177 double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, AllowedInputVariables, rows); 178 178 AssertInputMatrix(inputData); 179 179 … … 294 294 out double rmsError, out double outOfBagRmsError, out double avgRelError, out double outOfBagAvgRelError) { 295 295 var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable }); 296 double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices);296 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(problemData.Dataset, variables, trainingIndices); 297 297 298 298 alglib.dfreport rep; … … 316 316 317 317 var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable }); 318 double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices);318 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(problemData.Dataset, variables, trainingIndices); 319 319 320 320 var classValues = problemData.ClassValues.ToArray(); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/TimeSeries/AutoregressiveModeling.cs
r14391 r14400 114 114 alglib.lrunpack(lm, out coefficients, out nFeatures); 115 115 116 var tree = LinearModelToTreeConverter.CreateTree( 117 variableNames: Enumerable.Repeat(problemData.TargetVariable, nFeatures).ToArray(), 118 lags: Enumerable.Range(0, timeOffset).Select(i => (i + 1) * -1).ToArray(), 119 coefficients: coefficients.Take(nFeatures).ToArray(), 120 @const: coefficients[nFeatures] 121 ); 116 117 ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); 118 ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); 119 tree.Root.AddSubtree(startNode); 120 ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); 121 startNode.AddSubtree(addition); 122 123 for (int i = 0; i < timeOffset; i++) { 124 LaggedVariableTreeNode node = (LaggedVariableTreeNode)new LaggedVariable().CreateTreeNode(); 125 node.VariableName = targetVariable; 126 node.Weight = coefficients[i]; 127 node.Lag = (i + 1) * -1; 128 addition.AddSubtree(node); 129 } 130 131 ConstantTreeNode cNode = (ConstantTreeNode)new Constant().CreateTreeNode(); 132 cNode.Value = coefficients[coefficients.Length - 1]; 133 addition.AddSubtree(cNode); 122 134 123 135 var interpreter = new SymbolicTimeSeriesPrognosisExpressionTreeInterpreter(problemData.TargetVariable); -
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClustering.cs
r14393 r14400 89 89 double[,] centers; 90 90 int[] xyc; 91 double[,] inputMatrix = dataset.ToArray(allowedInputVariables, rows);91 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 92 92 if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) 93 93 throw new NotSupportedException("k-Means clustering does not support NaN or infinity values in the input dataset.");
Note: See TracChangeset
for help on using the changeset viewer.