Changeset 5658
- Timestamp:
- 03/10/11 12:38:43 (14 years ago)
- Location:
- branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4
- Files:
-
- 1 added
- 5 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/HeuristicLab.Algorithms.DataAnalysis-3.4.csproj
r5651 r5658 116 116 <Compile Include="kMeans\KMeansClusteringModel.cs" /> 117 117 <Compile Include="kMeans\KMeansClusteringSolution.cs" /> 118 <Compile Include="kMeans\KMeansClusteringUtil.cs" /> 118 <Compile Include="kMeans\KMeansClusteringUtil.cs"> 119 <SubType>Code</SubType> 120 </Compile> 121 <Compile Include="Linear\AlglibUtil.cs" /> 122 <Compile Include="Linear\LinearDiscriminantAnalysis.cs" /> 119 123 <Compile Include="Linear\LinearRegression.cs"> 120 <SubType>Code</SubType>121 </Compile>122 <Compile Include="Linear\LinearRegressionUtil.cs">123 124 <SubType>Code</SubType> 124 125 </Compile> -
branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/AlglibUtil.cs
r5642 r5658 25 25 26 26 namespace HeuristicLab.Algorithms.DataAnalysis { 27 public static class LinearRegressionUtil {28 public static double[,] PrepareInputMatrix(Dataset dataset, string targetVariable, IEnumerable<string> allowedInputVariables, int start, int end) {29 List<int> allowedRows = CalculateAllowedRows(dataset, targetVariable, allowedInputVariables, start, end);27 public static class AlglibUtil { 28 public static double[,] PrepareInputMatrix(Dataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) { 29 List<int> allowedRows = CalculateAllowedRows(dataset, variables, rows).ToList(); 30 30 31 double[,] matrix = new double[allowedRows.Count, allowedInputVariables.Count() + 1];31 double[,] matrix = new double[allowedRows.Count, variables.Count()]; 32 32 for (int row = 0; row < allowedRows.Count; row++) { 33 33 int col = 0; 34 foreach (string column in allowedInputVariables) {34 foreach (string column in variables) { 35 35 matrix[row, col] = dataset[column, row]; 36 36 col++; 37 37 } 38 matrix[row, allowedInputVariables.Count()] = dataset[targetVariable, row];39 38 } 40 39 return matrix; 41 40 } 42 41 43 private static List<int> CalculateAllowedRows(Dataset dataset, string targetVariable, IEnumerable<string> allowedInputVariables, int start, int end) { 44 List<int> allowedRows = new List<int>(); 45 bool add = false; 46 47 for (int row = start; row < end; row++) { 48 add = true; 49 foreach (string column in allowedInputVariables) { 50 double value = dataset[column, row]; 51 if (double.IsInfinity(value) || 52 double.IsNaN(value)) 53 add = false; 54 } 55 if (double.IsNaN(dataset[targetVariable, row])) 56 add = false; 57 if (add) 58 allowedRows.Add(row); 59 add = true; 60 } 61 return allowedRows; 42 private static IEnumerable<int> CalculateAllowedRows(Dataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) { 43 // return only rows that contain no infinity or NaN values 44 return from row in rows 45 where (from variable in variables 46 let x = dataset[variable, row] 47 where double.IsInfinity(x) || double.IsNaN(x) 48 select 1) 49 .Any() == false 50 select row; 62 51 } 63 52 } -
branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/LinearRegression.cs
r5649 r5658 75 75 int samplesStart = problemData.TrainingPartitionStart.Value; 76 76 int samplesEnd = problemData.TrainingPartitionEnd.Value; 77 78 double[,] inputMatrix = LinearRegressionUtil.PrepareInputMatrix(dataset, targetVariable, allowedInputVariables, samplesStart, samplesEnd);77 IEnumerable<int> rows = Enumerable.Range(samplesStart, samplesEnd - samplesStart); 78 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); 79 79 80 80 alglib.linearmodel lm = new alglib.linearmodel(); -
branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClustering.cs
r5651 r5658 94 94 double[,] centers; 95 95 int[] xyc; 96 double[,] inputMatrix = KMeansClusteringUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);96 double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows); 97 97 alglib.kmeansgenerate(inputMatrix, inputMatrix.GetLength(0), inputMatrix.GetLength(1), k, restarts + 1, out info, out centers, out xyc); 98 98 if (info != 1) throw new ArgumentException("Error in calculation of k-Means clustering solution"); -
branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClusteringModel.cs
r5651 r5658 64 64 public KMeansClusteringModel(double[,] centers, IEnumerable<string> allowedInputVariables) 65 65 : base() { 66 this.name = ItemName; 67 this.description = ItemDescription; 66 68 // disect center matrix into list of double[] 67 69 // centers are given as double matrix where number of rows = dimensions and number of columns = clusters -
branches/DataAnalysis Refactoring/HeuristicLab.Algorithms.DataAnalysis/3.4/kMeans/KMeansClusteringUtil.cs
r5651 r5658 27 27 namespace HeuristicLab.Algorithms.DataAnalysis { 28 28 public static class KMeansClusteringUtil { 29 public static double[,] PrepareInputMatrix(Dataset dataset, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows) {30 List<int> allowedRows = CalculateAllowedRows(dataset, allowedInputVariables, rows).ToList();31 32 double[,] matrix = new double[allowedRows.Count, allowedInputVariables.Count()];33 for (int row = 0; row < allowedRows.Count; row++) {34 int col = 0;35 foreach (string column in allowedInputVariables) {36 matrix[row, col] = dataset[column, row];37 col++;38 }39 }40 return matrix;41 }42 43 private static IEnumerable<int> CalculateAllowedRows(Dataset dataset, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows) {44 // return only rows that contain no infinity or NaN values45 return from row in rows46 where (from inputVariable in allowedInputVariables47 let x = dataset[inputVariable, row]48 where double.IsInfinity(x) || double.IsNaN(x)49 select 1)50 .Any() == false51 select row;52 }53 54 29 public static IEnumerable<int> FindClosestCenters(IEnumerable<double[]> centers, Dataset dataset, IEnumerable<string> allowedInputVariables, IEnumerable<int> rows) { 55 30 int nRows = rows.Count();
Note: See TracChangeset
for help on using the changeset viewer.