Free cookie consent management tool by TermsFeed Policy Generator

Changeset 2368


Ignore:
Timestamp:
09/17/09 13:32:19 (15 years ago)
Author:
gkronber
Message:

Fixed a bug in the variable impact calculation operators. #750.

Location:
trunk/sources
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.DataAnalysis/3.2/Dataset.cs

    r2367 r2368  
    308308      return min;
    309309    }
     310
     311    public int CountMissingValues(string variableName) {
     312      return CountMissingValues(this.GetVariableIndex(variableName));
     313    }
     314    public int CountMissingValues(int column) {
     315      return CountMissingValues(column, 0, Rows);
     316    }
     317
     318    public int CountMissingValues(string variableName, int start, int end) {
     319      return CountMissingValues(this.GetVariableIndex(variableName), start, end);
     320    }
     321
     322    public int CountMissingValues(int column, int start, int end) {
     323      int n = 0;
     324      for (int i = start; i < end; i++) {
     325        double val = GetValue(i, column);
     326        if (double.IsNaN(val)) n++;
     327      }
     328      return n;
     329    }
     330
    310331    #endregion
    311332
  • trunk/sources/HeuristicLab.LinearRegression/3.2/LinearRegressionOperator.cs

    r2367 r2368  
    139139      double n = end - start;
    140140      for (int i = 0; i < dataset.Columns; i++) {
    141         double nanRatio = CountNaN(dataset, i, start, end) / n;
     141        double nanRatio = dataset.CountMissingValues(i, start, end) / n;
    142142        if (i != targetVariable && nanRatio < 0.1 && dataset.GetRange(i, start, end) > 0.0) {
    143143          allowedColumns.Add(i);
     
    146146      return allowedColumns;
    147147    }
    148 
    149     private double CountNaN(Dataset dataset, int column, int start, int end) {
    150       double n = 0;
    151       for (int i = start; i < end; i++) {
    152         if (double.IsNaN(dataset.GetValue(i, column)) || double.IsInfinity(dataset.GetValue(i, column)))
    153           n++;
    154       }
    155       return n;
    156     }
    157 
    158148
    159149    private double[,] PrepareInputMatrix(Dataset dataset, List<int> allowedColumns, List<int> allowedRows, int minTimeOffset, int maxTimeOffset) {
  • trunk/sources/HeuristicLab.Modeling/3.2/VariableEvaluationImpactCalculator.cs

    r2330 r2368  
    9797      foreach (string variableName in variables) {
    9898        if (variableName != targetVariableName) {
    99           mean = dataset.GetMean(variableName, start, end);
    100           oldValues = dirtyDataset.ReplaceVariableValues(variableName, Enumerable.Repeat(mean, end - start), start, end);
    101           newValues = predictor.Predict(dirtyDataset, start, end);
    102           evaluationImpacts[variableName] = 1 - CalculateVAF(referenceValues, newValues);
    103           dirtyDataset.ReplaceVariableValues(variableName, oldValues, start, end);
     99          if (dataset.CountMissingValues(variableName, start, end) < (end - start) && dataset.GetRange(variableName, start, end) > 0.0) {
     100            mean = dataset.GetMean(variableName, start, end);
     101            oldValues = dirtyDataset.ReplaceVariableValues(variableName, Enumerable.Repeat(mean, end - start), start, end);
     102            newValues = predictor.Predict(dirtyDataset, start, end);
     103            evaluationImpacts[variableName] = 1 - CalculateVAF(referenceValues, newValues);
     104            dirtyDataset.ReplaceVariableValues(variableName, oldValues, start, end);
     105          } else {
     106            evaluationImpacts[variableName] = 0.0;
     107          }
    104108        }
    105109      }
  • trunk/sources/HeuristicLab.Modeling/3.2/VariableQualityImpactCalculator.cs

    r2357 r2368  
    9999
    100100      foreach (string variableName in variables) {
    101         if (variableName != targetVariableName) {
     101        if (dataset.CountMissingValues(variableName, start, end) < (end - start) &&
     102          dataset.GetRange(variableName, start, end) > 0.0 &&
     103          variableName != targetVariableName) {
    102104          mean = dataset.GetMean(variableName, start, end);
    103105          oldValues = dirtyDataset.ReplaceVariableValues(variableName, Enumerable.Repeat(mean, end - start), start, end);
     
    106108          evaluationImpacts[variableName] = newMSE / oldMSE;
    107109          dirtyDataset.ReplaceVariableValues(variableName, oldValues, start, end);
     110        } else {
     111          evaluationImpacts[variableName] = 1.0;
    108112        }
    109113      }
Note: See TracChangeset for help on using the changeset viewer.