Changeset 6002


Ignore:
Timestamp:
04/11/11 18:41:03 (8 years ago)
Author:
gkronber
Message:

#790 Fixed minor issues in LDA, LR, SVC and SVR to make sure everything works correctly in presence of NaN and infinity values.

Location:
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/AlglibUtil.cs

    r5809 r6002  
    2727  public static class AlglibUtil {
    2828    public static double[,] PrepareInputMatrix(Dataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) {
    29       List<int> allowedRows = CalculateAllowedRows(dataset, variables, rows).ToList();
     29      List<string> variablesList = variables.ToList();
     30      List<int> rowsList = rows.ToList();
    3031
    31       double[,] matrix = new double[allowedRows.Count, variables.Count()];
    32       for (int row = 0; row < allowedRows.Count; row++) {
     32      double[,] matrix = new double[rowsList.Count, variablesList.Count];
     33      for (int row = 0; row < rowsList.Count; row++) {
    3334        int col = 0;
    3435        foreach (string column in variables) {
    35           matrix[row, col] = dataset[column, row];
     36          matrix[row, col] = dataset[column, rowsList[row]];
    3637          col++;
    3738        }
     
    3940      return matrix;
    4041    }
    41 
    42     private static IEnumerable<int> CalculateAllowedRows(Dataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) {
    43       // return only rows that contain no infinity or NaN values
    44       return from row in rows
    45              where (from variable in variables
    46                     let x = dataset[variable, row]
    47                     where double.IsInfinity(x) || double.IsNaN(x)
    48                     select 1)
    49                     .Any() == false
    50              select row;
    51     }
    5242  }
    5343}
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearDiscriminantAnalysis.cs

    r5809 r6002  
    7373      int nClasses = problemData.ClassNames.Count();
    7474      double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     75      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
     76        throw new NotSupportedException("Linear discriminant analysis does not support NaN or infinity values in the input dataset.");
    7577
    7678      // change class values into class index
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

    r5809 r6002  
    7676      IEnumerable<int> rows = Enumerable.Range(samplesStart, samplesEnd - samplesStart);
    7777      double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
     78      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
     79        throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
    7880
    7981      alglib.linearmodel lm = new alglib.linearmodel();
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/SupportVectorMachine/SupportVectorMachineUtil.cs

    r5809 r6002  
    4141      int maxNodeIndex = 0;
    4242      int svmProblemRowIndex = 0;
     43      List<string> inputVariablesList = inputVariables.ToList();
    4344      foreach (int row in rowIndices) {
    4445        tempRow = new List<SVM.Node>();
    45         foreach (var inputVariable in inputVariables) {
    46           int col = dataset.GetVariableIndex(inputVariable);
    47           double value = dataset[row, col];
     46        int colIndex = 1; // make sure the smallest node index for SVM = 1
     47        foreach (var inputVariable in inputVariablesList) {
     48          double value = dataset[row, dataset.GetVariableIndex(inputVariable)];
     49          // SVM also works with missing values
     50          // => don't add NaN values in the dataset to the sparse SVM matrix representation
    4851          if (!double.IsNaN(value)) {
    49             int nodeIndex = col + 1; // make sure the smallest nodeIndex is 1 (libSVM convention)
    50             tempRow.Add(new SVM.Node(nodeIndex, value));
    51             if (nodeIndex > maxNodeIndex) maxNodeIndex = nodeIndex;
     52            tempRow.Add(new SVM.Node(colIndex, value)); // nodes must be sorted in ascending ordered by column index
     53            if (colIndex > maxNodeIndex) maxNodeIndex = colIndex;
    5254          }
     55          colIndex++;
    5356        }
    54         nodes[svmProblemRowIndex++] = tempRow.OrderBy(x => x.Index).ToArray(); // make sure the values are sorted by node index
     57        nodes[svmProblemRowIndex++] = tempRow.ToArray();
    5558      }
    5659
  • trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClustering.cs

    r5914 r6002  
    9292      int[] xyc;
    9393      double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     94      if (inputMatrix.Cast<double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
     95        throw new NotSupportedException("k-Means clustering does not support NaN or infinity values in the input dataset.");
     96
    9497      alglib.kmeansgenerate(inputMatrix, inputMatrix.GetLength(0), inputMatrix.GetLength(1), k, restarts + 1, out info, out centers, out xyc);
    9598      if (info != 1) throw new ArgumentException("Error in calculation of k-Means clustering solution");
Note: See TracChangeset for help on using the changeset viewer.