- Timestamp:
- 09/17/09 13:32:19 (15 years ago)
- Location:
- trunk/sources
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/sources/HeuristicLab.DataAnalysis/3.2/Dataset.cs
r2367 r2368 308 308 return min; 309 309 } 310 311 public int CountMissingValues(string variableName) { 312 return CountMissingValues(this.GetVariableIndex(variableName)); 313 } 314 public int CountMissingValues(int column) { 315 return CountMissingValues(column, 0, Rows); 316 } 317 318 public int CountMissingValues(string variableName, int start, int end) { 319 return CountMissingValues(this.GetVariableIndex(variableName), start, end); 320 } 321 322 public int CountMissingValues(int column, int start, int end) { 323 int n = 0; 324 for (int i = start; i < end; i++) { 325 double val = GetValue(i, column); 326 if (double.IsNaN(val)) n++; 327 } 328 return n; 329 } 330 310 331 #endregion 311 332 -
trunk/sources/HeuristicLab.LinearRegression/3.2/LinearRegressionOperator.cs
r2367 r2368 139 139 double n = end - start; 140 140 for (int i = 0; i < dataset.Columns; i++) { 141 double nanRatio = CountNaN(dataset,i, start, end) / n;141 double nanRatio = dataset.CountMissingValues(i, start, end) / n; 142 142 if (i != targetVariable && nanRatio < 0.1 && dataset.GetRange(i, start, end) > 0.0) { 143 143 allowedColumns.Add(i); … … 146 146 return allowedColumns; 147 147 } 148 149 private double CountNaN(Dataset dataset, int column, int start, int end) {150 double n = 0;151 for (int i = start; i < end; i++) {152 if (double.IsNaN(dataset.GetValue(i, column)) || double.IsInfinity(dataset.GetValue(i, column)))153 n++;154 }155 return n;156 }157 158 148 159 149 private double[,] PrepareInputMatrix(Dataset dataset, List<int> allowedColumns, List<int> allowedRows, int minTimeOffset, int maxTimeOffset) { -
trunk/sources/HeuristicLab.Modeling/3.2/VariableEvaluationImpactCalculator.cs
r2330 r2368 97 97 foreach (string variableName in variables) { 98 98 if (variableName != targetVariableName) { 99 mean = dataset.GetMean(variableName, start, end); 100 oldValues = dirtyDataset.ReplaceVariableValues(variableName, Enumerable.Repeat(mean, end - start), start, end); 101 newValues = predictor.Predict(dirtyDataset, start, end); 102 evaluationImpacts[variableName] = 1 - CalculateVAF(referenceValues, newValues); 103 dirtyDataset.ReplaceVariableValues(variableName, oldValues, start, end); 99 if (dataset.CountMissingValues(variableName, start, end) < (end - start) && dataset.GetRange(variableName, start, end) > 0.0) { 100 mean = dataset.GetMean(variableName, start, end); 101 oldValues = dirtyDataset.ReplaceVariableValues(variableName, Enumerable.Repeat(mean, end - start), start, end); 102 newValues = predictor.Predict(dirtyDataset, start, end); 103 evaluationImpacts[variableName] = 1 - CalculateVAF(referenceValues, newValues); 104 dirtyDataset.ReplaceVariableValues(variableName, oldValues, start, end); 105 } else { 106 evaluationImpacts[variableName] = 0.0; 107 } 104 108 } 105 109 } -
trunk/sources/HeuristicLab.Modeling/3.2/VariableQualityImpactCalculator.cs
r2357 r2368 99 99 100 100 foreach (string variableName in variables) { 101 if (variableName != targetVariableName) { 101 if (dataset.CountMissingValues(variableName, start, end) < (end - start) && 102 dataset.GetRange(variableName, start, end) > 0.0 && 103 variableName != targetVariableName) { 102 104 mean = dataset.GetMean(variableName, start, end); 103 105 oldValues = dirtyDataset.ReplaceVariableValues(variableName, Enumerable.Repeat(mean, end - start), start, end); … … 106 108 evaluationImpacts[variableName] = newMSE / oldMSE; 107 109 dirtyDataset.ReplaceVariableValues(variableName, oldValues, start, end); 110 } else { 111 evaluationImpacts[variableName] = 1.0; 108 112 } 109 113 }
Note: See TracChangeset
for help on using the changeset viewer.