- Timestamp:
- 04/04/17 17:52:44 (7 years ago)
- Location:
- trunk/sources
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources
- Property svn:mergeinfo changed
-
trunk/sources/HeuristicLab.Algorithms.DataAnalysis
-
trunk/sources/HeuristicLab.Algorithms.DataAnalysis/3.4/Linear/AlglibUtil.cs
r14400 r14826 20 20 #endregion 21 21 22 using System; 22 23 using System.Collections.Generic; 23 24 using System.Linq; … … 27 28 public static class AlglibUtil { 28 29 public static double[,] PrepareInputMatrix(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows) { 29 List<string> variablesList = variables.ToList(); 30 // check input variables. Only double variables are allowed. 31 var invalidInputs = 32 variables.Where(name => !dataset.VariableHasType<double>(name)); 33 if (invalidInputs.Any()) 34 throw new NotSupportedException("Unsupported inputs: " + string.Join(", ", invalidInputs)); 35 30 36 List<int> rowsList = rows.ToList(); 31 32 double[,] matrix = new double[rowsList.Count, variablesList.Count]; 37 double[,] matrix = new double[rowsList.Count, variables.Count()]; 33 38 34 39 int col = 0; … … 45 50 return matrix; 46 51 } 52 47 53 public static double[,] PrepareAndScaleInputMatrix(IDataset dataset, IEnumerable<string> variables, IEnumerable<int> rows, Scaling scaling) { 54 // check input variables. Only double variables are allowed. 55 var invalidInputs = 56 variables.Where(name => !dataset.VariableHasType<double>(name)); 57 if (invalidInputs.Any()) 58 throw new NotSupportedException("Unsupported inputs: " + string.Join(", ", invalidInputs)); 59 48 60 List<string> variablesList = variables.ToList(); 49 61 List<int> rowsList = rows.ToList(); … … 64 76 return matrix; 65 77 } 78 79 /// <summary> 80 /// Prepares a binary data matrix from a number of factors and specified factor values 81 /// </summary> 82 /// <param name="dataset">A dataset that contains the variable values</param> 83 /// <param name="factorVariables">An enumerable of categorical variables (factors). For each variable an enumerable of values must be specified.</param> 84 /// <param name="rows">An enumerable of row indices for the dataset</param> 85 /// <returns></returns> 86 /// <remarks>Factor variables (categorical variables) are split up into multiple binary variables one for each specified value.</remarks> 87 public static double[,] PrepareInputMatrix( 88 IDataset dataset, 89 IEnumerable<KeyValuePair<string, IEnumerable<string>>> factorVariables, 90 IEnumerable<int> rows) { 91 // check input variables. Only string variables are allowed. 92 var invalidInputs = 93 factorVariables.Select(kvp => kvp.Key).Where(name => !dataset.VariableHasType<string>(name)); 94 if (invalidInputs.Any()) 95 throw new NotSupportedException("Unsupported inputs: " + string.Join(", ", invalidInputs)); 96 97 int numBinaryColumns = factorVariables.Sum(kvp => kvp.Value.Count()); 98 99 List<int> rowsList = rows.ToList(); 100 double[,] matrix = new double[rowsList.Count, numBinaryColumns]; 101 102 int col = 0; 103 foreach (var kvp in factorVariables) { 104 var varName = kvp.Key; 105 var cats = kvp.Value; 106 if (!cats.Any()) continue; 107 foreach (var cat in cats) { 108 var values = dataset.GetStringValues(varName, rows); 109 int row = 0; 110 foreach (var value in values) { 111 matrix[row, col] = value == cat ? 1 : 0; 112 row++; 113 } 114 col++; 115 } 116 } 117 return matrix; 118 } 119 120 public static IEnumerable<KeyValuePair<string, IEnumerable<string>>> GetFactorVariableValues(IDataset ds, IEnumerable<string> factorVariables, IEnumerable<int> rows) { 121 return from factor in factorVariables 122 let distinctValues = ds.GetStringValues(factor, rows).Distinct().ToArray() 123 // 1 distinct value => skip (constant) 124 // 2 distinct values => only take one of the two values 125 // >=3 distinct values => create a binary value for each value 126 let reducedValues = distinctValues.Length <= 2 127 ? distinctValues.Take(distinctValues.Length - 1) 128 : distinctValues 129 select new KeyValuePair<string, IEnumerable<string>>(factor, reducedValues); 130 } 66 131 } 67 132 }
Note: See TracChangeset
for help on using the changeset viewer.