Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
07/09/14 10:37:01 (10 years ago)
Author:
gkronber
Message:

#2206: made several changes / improvements to the data-preprocessing code while reviewing the code

Location:
trunk/sources/HeuristicLab.DataPreprocessing/3.4
Files:
12 edited

Legend:

Unmodified
Added
Removed
  • trunk/sources/HeuristicLab.DataPreprocessing/3.4/Implementations/Filter/ComparisonFilter.cs

    r10999 r11156  
    9595      for (int row = 0; row < ConstrainedValue.Rows; ++row) {
    9696        object item = null;
    97         if (ConstrainedValue.IsType<double>(constraintColumn)) {
     97        if (ConstrainedValue.VariableHasType<double>(constraintColumn)) {
    9898          item = new HeuristicLab.Data.DoubleValue(ConstrainedValue.GetCell<double>(ConstraintColumn, row));
    99         } else if (ConstrainedValue.IsType<DateTime>(constraintColumn)) {
     99        } else if (ConstrainedValue.VariableHasType<DateTime>(constraintColumn)) {
    100100          item = new HeuristicLab.Data.DateTimeValue(ConstrainedValue.GetCell<DateTime>(ConstraintColumn, row));
    101101        } else {
  • trunk/sources/HeuristicLab.DataPreprocessing/3.4/Implementations/FilteredPreprocessingData.cs

    r11068 r11156  
    133133    }
    134134
    135     public bool IsType<T>(int columnIndex) {
    136       return originalData.IsType<T>(columnIndex);
     135    public bool VariableHasType<T>(int columnIndex) {
     136      return originalData.VariableHasType<T>(columnIndex);
    137137    }
    138138
     
    156156      originalData.InTransaction(() => {
    157157        for (int i = 0; i < filteredData.Columns; ++i) {
    158           if (filteredData.IsType<double>(i)) {
     158          if (filteredData.VariableHasType<double>(i)) {
    159159            originalData.SetValues<double>(i, filteredData.GetValues<double>(i));
    160           } else if (filteredData.IsType<string>(i)) {
     160          } else if (filteredData.VariableHasType<string>(i)) {
    161161            originalData.SetValues<string>(i, filteredData.GetValues<string>(i));
    162           } else if (filteredData.IsType<DateTime>(i)) {
     162          } else if (filteredData.VariableHasType<DateTime>(i)) {
    163163            originalData.SetValues<DateTime>(i, filteredData.GetValues<DateTime>(i));
    164164          } else {
  • trunk/sources/HeuristicLab.DataPreprocessing/3.4/Implementations/HistogramContent.cs

    r10992 r11156  
    6161      //only return variable names from type double
    6262      for (int i = 0; i < PreprocessingData.Columns; ++i) {
    63         if (PreprocessingData.IsType<double>(i)) {
     63        if (PreprocessingData.VariableHasType<double>(i)) {
    6464          double distinctValueCount = PreprocessingData.GetValues<double>(i).GroupBy(x => x).Count();
    6565          bool distinctValuesOk = distinctValueCount <= MAX_DISTINCT_VALUES_FOR_CLASSIFCATION;
  • trunk/sources/HeuristicLab.DataPreprocessing/3.4/Implementations/ManipulationLogic.cs

    r11070 r11156  
    5454      preprocessingData.InTransaction(() => {
    5555        foreach (var column in cells) {
    56           if (preprocessingData.IsType<double>(column.Key)) {
     56          if (preprocessingData.VariableHasType<double>(column.Key)) {
    5757            double average = statisticsLogic.GetAverage(column.Key, considerSelection);
    5858            ReplaceIndicesByValue<double>(column.Key, column.Value, average);
    59           } else if (preprocessingData.IsType<DateTime>(column.Key)) {
     59          } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) {
    6060            DateTime average = statisticsLogic.GetAverageDateTime(column.Key, considerSelection);
    6161            ReplaceIndicesByValue<DateTime>(column.Key, column.Value, average);
     
    6868      preprocessingData.InTransaction(() => {
    6969        foreach (var column in cells) {
    70           if (preprocessingData.IsType<double>(column.Key)) {
     70          if (preprocessingData.VariableHasType<double>(column.Key)) {
    7171            double median = statisticsLogic.GetMedian(column.Key, considerSelection);
    7272            ReplaceIndicesByValue<double>(column.Key, column.Value, median);
    73           } else if (preprocessingData.IsType<DateTime>(column.Key)) {
     73          } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) {
    7474            DateTime median = statisticsLogic.GetMedianDateTime(column.Key, considerSelection);
    7575            ReplaceIndicesByValue<DateTime>(column.Key, column.Value, median);
     
    8484
    8585        foreach (var column in cells) {
    86           if (preprocessingData.IsType<double>(column.Key)) {
     86          if (preprocessingData.VariableHasType<double>(column.Key)) {
    8787            double max = statisticsLogic.GetMax<double>(column.Key, considerSelection);
    8888            double min = statisticsLogic.GetMin<double>(column.Key, considerSelection);
     
    9292              preprocessingData.SetCell<double>(column.Key, index, rand);
    9393            }
    94           } else if (preprocessingData.IsType<DateTime>(column.Key)) {
     94          } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) {
    9595            DateTime min = statisticsLogic.GetMin<DateTime>(column.Key, considerSelection);
    9696            DateTime max = statisticsLogic.GetMax<DateTime>(column.Key, considerSelection);
     
    109109        foreach (var column in cells) {
    110110          int countValues = 0;
    111           if (preprocessingData.IsType<double>(column.Key)) {
     111          if (preprocessingData.VariableHasType<double>(column.Key)) {
    112112            countValues = preprocessingData.GetValues<double>(column.Key).Count();
    113           } else if (preprocessingData.IsType<DateTime>(column.Key)) {
     113          } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) {
    114114            countValues = preprocessingData.GetValues<DateTime>(column.Key).Count();
    115115          }
     
    170170      int valuesToInterpolate = nextIndex - prevIndex;
    171171
    172       if (preprocessingData.IsType<double>(column.Key)) {
     172      if (preprocessingData.VariableHasType<double>(column.Key)) {
    173173        double prev = preprocessingData.GetCell<double>(column.Key, prevIndex);
    174174        double next = preprocessingData.GetCell<double>(column.Key, nextIndex);
     
    179179          preprocessingData.SetCell<double>(column.Key, i, interpolated);
    180180        }
    181       } else if (preprocessingData.IsType<DateTime>(column.Key)) {
     181      } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) {
    182182        DateTime prev = preprocessingData.GetCell<DateTime>(column.Key, prevIndex);
    183183        DateTime next = preprocessingData.GetCell<DateTime>(column.Key, nextIndex);
     
    212212      preprocessingData.InTransaction(() => {
    213213        foreach (var column in cells) {
    214           if (preprocessingData.IsType<double>(column.Key)) {
     214          if (preprocessingData.VariableHasType<double>(column.Key)) {
    215215            ReplaceIndicesByValue<double>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<double>(column.Key, considerSelection));
    216           } else if (preprocessingData.IsType<string>(column.Key)) {
     216          } else if (preprocessingData.VariableHasType<string>(column.Key)) {
    217217            ReplaceIndicesByValue<string>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<string>(column.Key, considerSelection));
    218           } else if (preprocessingData.IsType<DateTime>(column.Key)) {
     218          } else if (preprocessingData.VariableHasType<DateTime>(column.Key)) {
    219219            ReplaceIndicesByValue<DateTime>(column.Key, column.Value, statisticsLogic.GetMostCommonValue<DateTime>(column.Key, considerSelection));
    220220          } else {
     
    265265      preprocessingData.InTransaction(() => {
    266266        for (int i = 0; i < preprocessingData.Columns; ++i) {
    267           if (preprocessingData.IsType<double>(i)) {
     267          if (preprocessingData.VariableHasType<double>(i)) {
    268268            reOrderToIndices<double>(i, indices);
    269           } else if (preprocessingData.IsType<string>(i)) {
     269          } else if (preprocessingData.VariableHasType<string>(i)) {
    270270            reOrderToIndices<string>(i, indices);
    271           } else if (preprocessingData.IsType<DateTime>(i)) {
     271          } else if (preprocessingData.VariableHasType<DateTime>(i)) {
    272272            reOrderToIndices<DateTime>(i, indices);
    273273          }
     
    279279      preprocessingData.InTransaction(() => {
    280280        for (int i = 0; i < preprocessingData.Columns; ++i) {
    281           if (preprocessingData.IsType<double>(i)) {
     281          if (preprocessingData.VariableHasType<double>(i)) {
    282282            ShuffleToIndices<double>(i, indices);
    283           } else if (preprocessingData.IsType<string>(i)) {
     283          } else if (preprocessingData.VariableHasType<string>(i)) {
    284284            ShuffleToIndices<string>(i, indices);
    285           } else if (preprocessingData.IsType<DateTime>(i)) {
     285          } else if (preprocessingData.VariableHasType<DateTime>(i)) {
    286286            ShuffleToIndices<DateTime>(i, indices);
    287287          }
     
    360360      List<int> columns = new List<int>();
    361361      for (int i = 0; i < preprocessingData.Columns; ++i) {
    362         if (preprocessingData.IsType<double>(i) || preprocessingData.IsType<DateTime>(i)) {
     362        if (preprocessingData.VariableHasType<double>(i) || preprocessingData.VariableHasType<DateTime>(i)) {
    363363          double columnVariance = statisticsLogic.GetVariance(i);
    364364          if (columnVariance < variance) {
  • trunk/sources/HeuristicLab.DataPreprocessing/3.4/Implementations/PreprocessingData.cs

    r11068 r11156  
    5252      var doubleVariableNames = new List<string>();
    5353      for (int i = 0; i < Columns; ++i) {
    54         if (IsType<double>(i)) {
     54        if (VariableHasType<double>(i)) {
    5555          doubleVariableNames.Add(variableNames[i]);
    5656        }
     
    100100      variableValues = new List<IList>();
    101101      foreach (var variableName in problemData.Dataset.VariableNames) {
    102         if (dataset.IsType<double>(variableName)) {
     102        if (dataset.VariableHasType<double>(variableName)) {
    103103          variableValues.Insert(columnIndex, dataset.GetDoubleValues(variableName).ToList());
    104         } else if (dataset.IsType<string>(variableName)) {
     104        } else if (dataset.VariableHasType<string>(variableName)) {
    105105          variableValues.Insert(columnIndex, dataset.GetStringValues(variableName).ToList());
    106         } else if (dataset.IsType<DateTime>(variableName)) {
     106        } else if (dataset.VariableHasType<DateTime>(variableName)) {
    107107          variableValues.Insert(columnIndex, dataset.GetDateTimeValues(variableName).ToList());
    108108        } else {
     
    171171    public abstract int GetColumnIndex(string variableName);
    172172
    173     public abstract bool IsType<T>(int columnIndex);
     173    public abstract bool VariableHasType<T>(int columnIndex);
    174174
    175175    [Obsolete("use the index based variant, is faster")]
  • trunk/sources/HeuristicLab.DataPreprocessing/3.4/Implementations/SearchLogic.cs

    r11002 r11156  
    8585
    8686    public bool IsMissingValue(int columnIndex, int rowIndex) {
    87       if (preprocessingData.IsType<double>(columnIndex)) {
     87      if (preprocessingData.VariableHasType<double>(columnIndex)) {
    8888        return double.IsNaN(preprocessingData.GetCell<double>(columnIndex, rowIndex));
    89       } else if (preprocessingData.IsType<string>(columnIndex)) {
     89      } else if (preprocessingData.VariableHasType<string>(columnIndex)) {
    9090        return string.IsNullOrEmpty(preprocessingData.GetCell<string>(columnIndex, rowIndex));
    91       } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
     91      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
    9292        return preprocessingData.GetCell<DateTime>(columnIndex, rowIndex).Equals(DateTime.MinValue);
    9393      } else {
     
    9898    public IList<int> GetMissingValueIndices(int columnIndex) {
    9999      if (!MissingValueIndicies.ContainsKey(columnIndex)){       
    100           if (preprocessingData.IsType<double>(columnIndex)) {
     100          if (preprocessingData.VariableHasType<double>(columnIndex)) {
    101101            MissingValueIndicies[columnIndex] = GetMissingValueIndices<double>(columnIndex);
    102           } else if (preprocessingData.IsType<string>(columnIndex)) {
     102          } else if (preprocessingData.VariableHasType<string>(columnIndex)) {
    103103            MissingValueIndicies[columnIndex] = GetMissingValueIndices<string>(columnIndex);
    104           } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
     104          } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
    105105            MissingValueIndicies[columnIndex] = GetMissingValueIndices<DateTime>(columnIndex);
    106106          } else {
  • trunk/sources/HeuristicLab.DataPreprocessing/3.4/Implementations/StatisticsLogic.cs

    r10812 r11156  
    4949
    5050      for (int i = 0; i < preprocessingData.Columns; ++i) {
    51         if (preprocessingData.IsType<double>(i)) {
     51        if (preprocessingData.VariableHasType<double>(i)) {
    5252          ++count;
    5353        }
     
    8282    public double GetMedian(int columnIndex, bool considerSelection) {
    8383      double median = double.NaN;
    84       if (preprocessingData.IsType<double>(columnIndex)) {
     84      if (preprocessingData.VariableHasType<double>(columnIndex)) {
    8585        median = GetValuesWithoutNaN<double>(columnIndex, considerSelection).Median();
    8686      }
     
    9090    public double GetAverage(int columnIndex, bool considerSelection) {
    9191      double avg = double.NaN;
    92       if (preprocessingData.IsType<double>(columnIndex)) {
     92      if (preprocessingData.VariableHasType<double>(columnIndex)) {
    9393        avg = GetValuesWithoutNaN<double>(columnIndex, considerSelection).Average();
    9494      }
     
    9898    public DateTime GetMedianDateTime(int columnIndex, bool considerSelection) {
    9999      DateTime median = new DateTime();
    100       if (preprocessingData.IsType<DateTime>(columnIndex)) {
     100      if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
    101101        median = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Median());
    102102      }
     
    106106    public DateTime GetAverageDateTime(int columnIndex, bool considerSelection) {
    107107      DateTime avg = new DateTime();
    108       if (preprocessingData.IsType<DateTime>(columnIndex)) {
     108      if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
    109109        avg = GetSecondsAsDateTime(GetDateTimeAsSeconds(columnIndex, considerSelection).Average());
    110110      }
     
    127127    public double GetStandardDeviation(int columnIndex) {
    128128      double stdDev = double.NaN;
    129       if (preprocessingData.IsType<double>(columnIndex)) {
     129      if (preprocessingData.VariableHasType<double>(columnIndex)) {
    130130        stdDev = GetValuesWithoutNaN<double>(columnIndex).StandardDeviation();
    131       } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
     131      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
    132132        stdDev = GetDateTimeAsSeconds(columnIndex).StandardDeviation();
    133133      }
     
    137137    public double GetVariance(int columnIndex) {
    138138      double variance = double.NaN;
    139       if (preprocessingData.IsType<double>(columnIndex)) {
     139      if (preprocessingData.VariableHasType<double>(columnIndex)) {
    140140        variance = GetValuesWithoutNaN<double>(columnIndex).Variance();
    141       } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
     141      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
    142142        variance = GetDateTimeAsSeconds(columnIndex).Variance();
    143143      }
     
    163163    }
    164164
    165     public bool IsType<T>(int columnIndex) {
    166       return preprocessingData.IsType<T>(columnIndex);
     165    public bool VariableHasType<T>(int columnIndex) {
     166      return preprocessingData.VariableHasType<T>(columnIndex);
    167167    }
    168168
    169169    public string GetColumnTypeAsString(int columnIndex) {
    170       if (preprocessingData.IsType<double>(columnIndex)) {
     170      if (preprocessingData.VariableHasType<double>(columnIndex)) {
    171171        return "double";
    172       } else if (preprocessingData.IsType<string>(columnIndex)) {
     172      } else if (preprocessingData.VariableHasType<string>(columnIndex)) {
    173173        return "string";
    174       } else if (preprocessingData.IsType<DateTime>(columnIndex)) {
     174      } else if (preprocessingData.VariableHasType<DateTime>(columnIndex)) {
    175175        return "DateTime";
    176176      }
  • trunk/sources/HeuristicLab.DataPreprocessing/3.4/Implementations/TransactionalPreprocessingData.cs

    r11114 r11156  
    115115    }
    116116
    117     public override bool IsType<T>(int columnIndex) {
     117    public override bool VariableHasType<T>(int columnIndex) {
    118118      return variableValues[columnIndex] is List<T>;
    119119    }
     
    138138    public override void SetValues<T>(int columnIndex, IList<T> values) {
    139139      SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
    140       if (IsType<T>(columnIndex)) {
     140      if (VariableHasType<T>(columnIndex)) {
    141141        variableValues[columnIndex] = (IList)values;
    142142      } else {
     
    149149    public override bool SetValue(string value, int columnIndex, int rowIndex) {
    150150      bool valid = false;
    151       if (IsType<double>(columnIndex)) {
     151      if (VariableHasType<double>(columnIndex)) {
    152152        double val;
    153153        valid = double.TryParse(value, out val);
    154154        SetValueIfValid(columnIndex, rowIndex, valid, val);
    155       } else if (IsType<string>(columnIndex)) {
     155      } else if (VariableHasType<string>(columnIndex)) {
    156156        valid = value != null;
    157157        SetValueIfValid(columnIndex, rowIndex, valid, value);
    158       } else if (IsType<DateTime>(columnIndex)) {
     158      } else if (VariableHasType<DateTime>(columnIndex)) {
    159159        DateTime date;
    160160        valid = DateTime.TryParse(value, out date);
     
    177177      bool valid = false;
    178178      errorMessage = string.Empty;
    179       if (IsType<double>(columnIndex)) {
     179      if (VariableHasType<double>(columnIndex)) {
    180180        double val;
    181181        valid = double.TryParse(value, out val);
     
    183183          errorMessage = "Invalid Value (Valid Value Format: \"" + FormatPatterns.GetDoubleFormatPattern() + "\")";
    184184        }
    185       } else if (IsType<string>(columnIndex)) {
     185      } else if (VariableHasType<string>(columnIndex)) {
    186186        valid = value != null;
    187187        if (!valid) {
    188188          errorMessage = "Invalid Value (string must not be null)";
    189189        }
    190       } else if (IsType<DateTime>(columnIndex)) {
     190      } else if (VariableHasType<DateTime>(columnIndex)) {
    191191        DateTime date;
    192192        valid = DateTime.TryParse(value, out date);
     
    207207
    208208    public override bool AreAllStringColumns(IEnumerable<int> columnIndices) {
    209       return columnIndices.All(x => IsType<string>(x));
     209      return columnIndices.All(x => VariableHasType<string>(x));
    210210    }
    211211
  • trunk/sources/HeuristicLab.DataPreprocessing/3.4/Interfaces/IPreprocessingData.cs

    r11068 r11156  
    6262    int GetColumnIndex(string variableName);
    6363
    64     bool IsType<T>(int columnIndex);
     64    bool VariableHasType<T>(int columnIndex);
    6565
    6666    int Columns { get; }
  • trunk/sources/HeuristicLab.DataPreprocessing/3.4/Interfaces/IStatisticsLogic.cs

    r10811 r11156  
    4646    int GetDifferentValuesCount<T>(int columnIndex);
    4747
    48     bool IsType<T>(int columnIndex);
     48    bool VariableHasType<T>(int columnIndex);
    4949    string GetColumnTypeAsString(int columnIndex);
    5050    string GetVariableName(int columnIndex);
  • trunk/sources/HeuristicLab.DataPreprocessing/3.4/PreprocessingTransformator.cs

    r11068 r11156  
    4242
    4343    public bool ApplyTransformations(IEnumerable<ITransformation> transformations, bool preserveColumns, out string errorMsg) {
    44       bool success;
    45 
     44      bool success = false;
     45      errorMsg = string.Empty;
    4646      preprocessingData.BeginTransaction(DataPreprocessingChangedEventType.Transformation);
    4747
     
    6464          renamedColumns.Clear();
    6565        }
     66        // only accept changes if everything was successful
     67        if (!success) {
     68          preprocessingData.Undo();
     69        }
     70      }
     71      catch (Exception e) {
     72        preprocessingData.Undo();
     73        if (string.IsNullOrEmpty(errorMsg)) errorMsg = e.Message;
    6674      }
    6775      finally {
     
    103111    private IEnumerable<double> ApplyDoubleTransformation(Transformation<double> transformation, IList<double> data, out bool success, out string errorMsg) {
    104112      success = transformation.Check(data, out errorMsg);
    105       return transformation.Apply(data);
     113      // don't apply when the check fails
     114      if (success)
     115        return transformation.Apply(data);
     116      else
     117        return data;
    106118    }
    107119
  • trunk/sources/HeuristicLab.DataPreprocessing/3.4/ProblemDataCreator.cs

    r11098 r11156  
    9191      for (int i = 0; i < context.Data.Columns; ++i) {
    9292        var variableName = context.Data.GetVariableName(i);
    93         if (context.Data.IsType<double>(i)
     93        if (context.Data.VariableHasType<double>(i)
    9494          && variableName != targetVariable
    9595          && IsNotConstantInputVariable(context.Data.GetValues<double>(i))) {
Note: See TracChangeset for help on using the changeset viewer.