Free cookie consent management tool by TermsFeed Policy Generator

# Changeset 6002

Ignore:
Timestamp:
04/11/11 18:41:03 (13 years ago)
Message:

#790 Fixed minor issues in LDA, LR, SVC and SVR to make sure everything works correctly in presence of NaN and infinity values.

Location:
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4
Files:
5 edited

Unmodified
Removed
• ## trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/AlglibUtil.cs

 r5809 public static class AlglibUtil { public static double[,] PrepareInputMatrix(Dataset dataset, IEnumerable variables, IEnumerable rows) { List allowedRows = CalculateAllowedRows(dataset, variables, rows).ToList(); List variablesList = variables.ToList(); List rowsList = rows.ToList(); double[,] matrix = new double[allowedRows.Count, variables.Count()]; for (int row = 0; row < allowedRows.Count; row++) { double[,] matrix = new double[rowsList.Count, variablesList.Count]; for (int row = 0; row < rowsList.Count; row++) { int col = 0; foreach (string column in variables) { matrix[row, col] = dataset[column, row]; matrix[row, col] = dataset[column, rowsList[row]]; col++; } return matrix; } private static IEnumerable CalculateAllowedRows(Dataset dataset, IEnumerable variables, IEnumerable rows) { // return only rows that contain no infinity or NaN values return from row in rows where (from variable in variables let x = dataset[variable, row] where double.IsInfinity(x) || double.IsNaN(x) select 1) .Any() == false select row; } } }
• ## trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearDiscriminantAnalysis.cs

 r5809 int nClasses = problemData.ClassNames.Count(); double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); if (inputMatrix.Cast().Any(x => double.IsNaN(x) || double.IsInfinity(x))) throw new NotSupportedException("Linear discriminant analysis does not support NaN or infinity values in the input dataset."); // change class values into class index
• ## trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

 r5809 IEnumerable rows = Enumerable.Range(samplesStart, samplesEnd - samplesStart); double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); if (inputMatrix.Cast().Any(x => double.IsNaN(x) || double.IsInfinity(x))) throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); alglib.linearmodel lm = new alglib.linearmodel();
• ## trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorMachineUtil.cs

 r5809 int maxNodeIndex = 0; int svmProblemRowIndex = 0; List inputVariablesList = inputVariables.ToList(); foreach (int row in rowIndices) { tempRow = new List(); foreach (var inputVariable in inputVariables) { int col = dataset.GetVariableIndex(inputVariable); double value = dataset[row, col]; int colIndex = 1; // make sure the smallest node index for SVM = 1 foreach (var inputVariable in inputVariablesList) { double value = dataset[row, dataset.GetVariableIndex(inputVariable)]; // SVM also works with missing values // => don't add NaN values in the dataset to the sparse SVM matrix representation if (!double.IsNaN(value)) { int nodeIndex = col + 1; // make sure the smallest nodeIndex is 1 (libSVM convention) tempRow.Add(new SVM.Node(nodeIndex, value)); if (nodeIndex > maxNodeIndex) maxNodeIndex = nodeIndex; tempRow.Add(new SVM.Node(colIndex, value)); // nodes must be sorted in ascending ordered by column index if (colIndex > maxNodeIndex) maxNodeIndex = colIndex; } colIndex++; } nodes[svmProblemRowIndex++] = tempRow.OrderBy(x => x.Index).ToArray(); // make sure the values are sorted by node index nodes[svmProblemRowIndex++] = tempRow.ToArray(); }
• ## trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClustering.cs

 r5914 int[] xyc; double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); if (inputMatrix.Cast().Any(x => double.IsNaN(x) || double.IsInfinity(x))) throw new NotSupportedException("k-Means clustering does not support NaN or infinity values in the input dataset."); alglib.kmeansgenerate(inputMatrix, inputMatrix.GetLength(0), inputMatrix.GetLength(1), k, restarts + 1, out info, out centers, out xyc); if (info != 1) throw new ArgumentException("Error in calculation of k-Means clustering solution");
Note: See TracChangeset for help on using the changeset viewer.