Free cookie consent management tool by TermsFeed Policy Generator

Changeset 5658


Ignore:
Timestamp:
03/10/11 12:38:43 (14 years ago)
Author:
gkronber
Message:

#1418 implemented wrapper for LDA (linear discriminant analysis) implemented in alglib.

Location:
branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4
Files:
1 added
5 edited
1 moved

Legend:

Unmodified
Added
Removed
  • branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj

    r5651 r5658  
    116116    <Compile Include="kMeans\KMeansClusteringModel.cs" />
    117117    <Compile Include="kMeans\KMeansClusteringSolution.cs" />
    118     <Compile Include="kMeans\KMeansClusteringUtil.cs" />
     118    <Compile Include="kMeans\KMeansClusteringUtil.cs">
     119      <SubType>Code</SubType>
     120    </Compile>
     121    <Compile Include="Linear\AlglibUtil.cs" />
     122    <Compile Include="Linear\LinearDiscriminantAnalysis.cs" />
    119123    <Compile Include="Linear\LinearRegression.cs">
    120       <SubType>Code</SubType>
    121     </Compile>
    122     <Compile Include="Linear\LinearRegressionUtil.cs">
    123124      <SubType>Code</SubType>
    124125    </Compile>
  • branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/AlglibUtil.cs

    r5642 r5658  
    2525
    2626namespace HeuristicLab.Algorithms.DataAnalysis {
    27   public static class LinearRegressionUtil {
    28     public static double[,] PrepareInputMatrix(Dataset dataset, string targetVariable, IEnumerable<string> allowedInputVariables, int start, int end) {
    29       List<int> allowedRows = CalculateAllowedRows(dataset, targetVariable, allowedInputVariables, start, end);
     27  public static class AlglibUtil {
     28    public static double[,] PrepareInputMatrix(Dataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) {
     29      List<int> allowedRows = CalculateAllowedRows(dataset, variables, rows).ToList();
    3030
    31       double[,] matrix = new double[allowedRows.Count, allowedInputVariables.Count() + 1];
     31      double[,] matrix = new double[allowedRows.Count, variables.Count()];
    3232      for (int row = 0; row < allowedRows.Count; row++) {
    3333        int col = 0;
    34         foreach (string column in allowedInputVariables) {
     34        foreach (string column in variables) {
    3535          matrix[row, col] = dataset[column, row];
    3636          col++;
    3737        }
    38         matrix[row, allowedInputVariables.Count()] = dataset[targetVariable, row];
    3938      }
    4039      return matrix;
    4140    }
    4241
    43     private static List<int> CalculateAllowedRows(Dataset dataset, string targetVariable, IEnumerable<string> allowedInputVariables, int start, int end) {
    44       List<int> allowedRows = new List<int>();
    45       bool add = false;
    46 
    47       for (int row = start; row < end; row++) {
    48         add = true;
    49         foreach (string column in allowedInputVariables) {
    50           double value = dataset[column, row];
    51           if (double.IsInfinity(value) ||
    52             double.IsNaN(value))
    53             add = false;
    54         }
    55         if (double.IsNaN(dataset[targetVariable, row]))
    56           add = false;
    57         if (add)
    58           allowedRows.Add(row);
    59         add = true;
    60       }
    61       return allowedRows;
     42    private static IEnumerable<int> CalculateAllowedRows(Dataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) {
     43      // return only rows that contain no infinity or NaN values
     44      return from row in rows
     45             where (from variable in variables
     46                    let x = dataset[variable, row]
     47                    where double.IsInfinity(x) || double.IsNaN(x)
     48                    select 1)
     49                    .Any() == false
     50             select row;
    6251    }
    6352  }
  • branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs

    r5649 r5658  
    7575      int samplesStart = problemData.TrainingPartitionStart.Value;
    7676      int samplesEnd = problemData.TrainingPartitionEnd.Value;
    77 
    78       double[,] inputMatrix = LinearRegressionUtil.PrepareInputMatrix(dataset, targetVariable, allowedInputVariables, samplesStart, samplesEnd);
     77      IEnumerable<int> rows = Enumerable.Range(samplesStart, samplesEnd - samplesStart);
     78      double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
    7979
    8080      alglib.linearmodel lm = new alglib.linearmodel();
  • branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClustering.cs

    r5651 r5658  
    9494      double[,] centers;
    9595      int[] xyc;
    96       double[,] inputMatrix = KMeansClusteringUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
     96      double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
    9797      alglib.kmeansgenerate(inputMatrix, inputMatrix.GetLength(0), inputMatrix.GetLength(1), k, restarts + 1, out info, out centers, out xyc);
    9898      if (info != 1) throw new ArgumentException("Error in calculation of k-Means clustering solution");
  • branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClusteringModel.cs

    r5651 r5658  
    6464    public KMeansClusteringModel(double[,] centers, IEnumerable<string> allowedInputVariables)
    6565      : base() {
     66      this.name = ItemName;
     67      this.description = ItemDescription;
    6668      // disect center matrix into list of double[]
    6769      // centers are given as double matrix where number of rows = dimensions and number of columns = clusters
  • branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClusteringUtil.cs

    r5651 r5658  
    2727namespace HeuristicLab.Algorithms.DataAnalysis {
    2828  public static class KMeansClusteringUtil {
    29     public static double[,] PrepareInputMatrix(Dataset dataset, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows) {
    30       List<int> allowedRows = CalculateAllowedRows(dataset, allowedInputVariables, rows).ToList();
    31 
    32       double[,] matrix = new double[allowedRows.Count, allowedInputVariables.Count()];
    33       for (int row = 0; row < allowedRows.Count; row++) {
    34         int col = 0;
    35         foreach (string column in allowedInputVariables) {
    36           matrix[row, col] = dataset[column, row];
    37           col++;
    38         }
    39       }
    40       return matrix;
    41     }
    42 
    43     private static IEnumerable<int> CalculateAllowedRows(Dataset dataset, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows) {
    44       // return only rows that contain no infinity or NaN values
    45       return from row in rows
    46              where (from inputVariable in allowedInputVariables
    47                     let x = dataset[inputVariable, row]
    48                     where double.IsInfinity(x) || double.IsNaN(x)
    49                     select 1)
    50                     .Any() == false
    51              select row;
    52     }
    53 
    5429    public static IEnumerable<int> FindClosestCenters(IEnumerable<double[]> centers, Dataset dataset, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows) {
    5530      int nRows = rows.Count();
Note: See TracChangeset for help on using the changeset viewer.