Changeset 2367 for trunk/sources/HeuristicLab.LinearRegression
- Timestamp:
- 09/17/09 10:15:56 (15 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.LinearRegression/3.2/LinearRegressionOperator.cs
r2360 r2367 56 56 int minTimeOffset = minTimeOffsetData == null ? 0 : minTimeOffsetData.Data; 57 57 58 List<int> allowedRows = CalculateAllowedRows(dataset, targetVariable, start, end, minTimeOffset, maxTimeOffset);59 58 List<int> allowedColumns = CalculateAllowedColumns(dataset, targetVariable, start, end); 59 List<int> allowedRows = CalculateAllowedRows(dataset, targetVariable, allowedColumns, start, end, minTimeOffset, maxTimeOffset); 60 60 61 61 double[,] inputMatrix = PrepareInputMatrix(dataset, allowedColumns, allowedRows, minTimeOffset, maxTimeOffset); … … 110 110 111 111 //returns list of valid row indexes (rows without NaN values) 112 private List<int> CalculateAllowedRows(Dataset dataset, int targetVariable, int start, int end, int minTimeOffset, int maxTimeOffset) {112 private List<int> CalculateAllowedRows(Dataset dataset, int targetVariable, IList<int> allowedColumns, int start, int end, int minTimeOffset, int maxTimeOffset) { 113 113 List<int> allowedRows = new List<int>(); 114 114 bool add; 115 115 for (int row = start; row < end; row++) { 116 116 add = true; 117 for (int col = 0; col < dataset.Columns && add == true; col++) {117 for (int colIndex = 0; colIndex < allowedColumns.Count && add == true; colIndex++) { 118 118 for (int timeOffset = minTimeOffset; timeOffset <= maxTimeOffset; timeOffset++) { 119 119 if ( 120 120 row + timeOffset < 0 || 121 121 row + timeOffset > dataset.Rows || 122 double.IsNaN(dataset.GetValue(row + timeOffset, col)) || 122 double.IsNaN(dataset.GetValue(row + timeOffset, allowedColumns[colIndex])) || 123 double.IsInfinity(dataset.GetValue(row + timeOffset, allowedColumns[colIndex])) || 123 124 double.IsNaN(dataset.GetValue(row + timeOffset, targetVariable))) { 124 125 add = false; … … 133 134 } 134 135 135 //returns list of valid column indexes (columns which contain at least one non-zero value)136 //returns list of valid column indexes (columns which contain max. 10% NaN (or infinity) and contain at least two different values) 136 137 private List<int> CalculateAllowedColumns(Dataset dataset, int targetVariable, int start, int end) { 137 138 List<int> allowedColumns = new List<int>(); 139 double n = end - start; 138 140 for (int i = 0; i < dataset.Columns; i++) { 139 if (i == targetVariable) continue; 140 if (!dataset.GetMinimum(i, start, end).IsAlmost(0.0) || 141 !dataset.GetMaximum(i, start, end).IsAlmost(0.0)) 141 double nanRatio = CountNaN(dataset, i, start, end) / n; 142 if (i != targetVariable && nanRatio < 0.1 && dataset.GetRange(i, start, end) > 0.0) { 142 143 allowedColumns.Add(i); 144 } 143 145 } 144 146 return allowedColumns; 145 147 } 148 149 private double CountNaN(Dataset dataset, int column, int start, int end) { 150 double n = 0; 151 for (int i = start; i < end; i++) { 152 if (double.IsNaN(dataset.GetValue(i, column)) || double.IsInfinity(dataset.GetValue(i, column))) 153 n++; 154 } 155 return n; 156 } 157 146 158 147 159 private double[,] PrepareInputMatrix(Dataset dataset, List<int> allowedColumns, List<int> allowedRows, int minTimeOffset, int maxTimeOffset) {
Note: See TracChangeset
for help on using the changeset viewer.