Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
10/25/17 12:38:12 (7 years ago)
Author:
pfleck
Message:

#2809: Removed experimental static-typed datacolumns. (reverse merge g15291, r15309)

Location:
branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Content
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Content/DataGridContent.cs

    r15309 r15431  
    9090
    9191    public void DeleteRows(IEnumerable<int> rows) {
    92       PreprocessingData.DeleteRows(rows);
     92      PreprocessingData.DeleteRowsWithIndices(rows);
    9393    }
    9494
     
    134134
    135135    #region Manipulations
     136    private void ReplaceIndicesByValue(IDictionary<int, IList<int>> cells, Func<int, double> doubleAggregator = null,
     137      Func<int, DateTime> dateTimeAggregator = null, Func<int, string> stringAggregator = null) {
     138      PreprocessingData.InTransaction(() => {
     139        foreach (var column in cells) {
     140          if (doubleAggregator != null && PreprocessingData.VariableHasType<double>(column.Key)) {
     141            var value = doubleAggregator(column.Key);
     142            foreach (int index in column.Value)
     143              PreprocessingData.SetCell<double>(column.Key, index, value);
     144          } else if (dateTimeAggregator != null && PreprocessingData.VariableHasType<DateTime>(column.Key)) {
     145            var value = dateTimeAggregator(column.Key);
     146            foreach (int index in column.Value)
     147              PreprocessingData.SetCell<DateTime>(column.Key, index, value);
     148          } else if (stringAggregator != null && PreprocessingData.VariableHasType<string>(column.Key)) {
     149            var value = stringAggregator(column.Key);
     150            foreach (int index in column.Value)
     151              PreprocessingData.SetCell<string>(column.Key, index, value);
     152          }
     153        }
     154      });
     155    }
     156
     157    private void ReplaceIndicesByValues(IDictionary<int, IList<int>> cells, Func<int, IEnumerable<double>> doubleAggregator = null,
     158      Func<int, IEnumerable<DateTime>> dateTimeAggregator = null, Func<int, IEnumerable<string>> stringAggregator = null) {
     159      PreprocessingData.InTransaction(() => {
     160        foreach (var column in cells) {
     161          if (doubleAggregator != null && PreprocessingData.VariableHasType<double>(column.Key)) {
     162            var values = doubleAggregator(column.Key);
     163            foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value }))
     164              PreprocessingData.SetCell<double>(column.Key, pair.row, pair.value);
     165          } else if (dateTimeAggregator != null && PreprocessingData.VariableHasType<DateTime>(column.Key)) {
     166            var values = dateTimeAggregator(column.Key);
     167            foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value }))
     168              PreprocessingData.SetCell<DateTime>(column.Key, pair.row, pair.value);
     169          } else if (stringAggregator != null && PreprocessingData.VariableHasType<string>(column.Key)) {
     170            var values = stringAggregator(column.Key);
     171            foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value }))
     172              PreprocessingData.SetCell<string>(column.Key, pair.row, pair.value);
     173          }
     174        }
     175      });
     176    }
     177
    136178    public void ReplaceIndicesByMean(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
    137       PreprocessingData.InTransaction(() => {
    138         foreach (var column in cells) {
    139           PreprocessingData.DataColumns[column.Key].TypeSwitch(
    140             c => {
    141               var mean = c.GetMean(considerSelection ? column.Value : null);
    142               foreach (var index in column.Value) c[index] = mean;
    143             },
    144             dateTimeAction: c => {
    145               var mean = c.GetMean(considerSelection ? column.Value : null);
    146               foreach (var index in column.Value) c[index] = mean;
    147             });
    148         }
    149       });
     179      ReplaceIndicesByValue(cells,
     180        col => PreprocessingData.GetMean<double>(col, considerSelection),
     181        col => PreprocessingData.GetMean<DateTime>(col, considerSelection));
    150182    }
    151183
    152184    public void ReplaceIndicesByMedianValue(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
    153       PreprocessingData.InTransaction(() => {
    154         foreach (var column in cells) {
    155           PreprocessingData.DataColumns[column.Key].TypeSwitch(
    156             c => {
    157               var median = c.GetMedian(considerSelection ? column.Value : null);
    158               foreach (var index in column.Value) c[index] = median;
    159             },
    160             c => {
    161               var median = c.GetMedian(considerSelection ? column.Value : null);
    162               foreach (var index in column.Value) c[index] = median;
    163             },
    164             c => {
    165               var median = c.GetMedian(considerSelection ? column.Value : null);
    166               foreach (var index in column.Value) c[index] = median;
    167             });
    168         }
    169       });
     185      ReplaceIndicesByValue(cells,
     186        col => PreprocessingData.GetMedian<double>(col, considerSelection),
     187        col => PreprocessingData.GetMedian<DateTime>(col, considerSelection));
    170188    }
    171189
    172190    public void ReplaceIndicesByMode(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
    173       PreprocessingData.InTransaction(() => {
    174         foreach (var column in cells) {
    175           PreprocessingData.DataColumns[column.Key].TypeSwitch(
    176             c => {
    177               var mode = c.GetMode(considerSelection ? column.Value : null);
    178               foreach (var index in column.Value) c[index] = mode;
    179             },
    180             c => {
    181               var mode = c.GetMode(considerSelection ? column.Value : null);
    182               foreach (var index in column.Value) c[index] = mode;
    183             },
    184             c => {
    185               var mode = c.GetMode(considerSelection ? column.Value : null);
    186               foreach (var index in column.Value) c[index] = mode;
    187             });
    188         }
    189       });
     191      ReplaceIndicesByValue(cells,
     192        col => PreprocessingData.GetMode<double>(col, considerSelection),
     193        col => PreprocessingData.GetMode<DateTime>(col, considerSelection),
     194        col => PreprocessingData.GetMode<string>(col, considerSelection));
    190195    }
    191196
    192197    public void ReplaceIndicesByRandomValue(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
    193198      var rand = new FastRandom();
    194       PreprocessingData.InTransaction(() => {
    195         foreach (var column in cells) {
    196           PreprocessingData.DataColumns[column.Key].TypeSwitch(
    197             c => {
    198               double min = c.GetMin(considerSelection ? column.Value : null);
    199               double max = c.GetMax(considerSelection ? column.Value : null);
    200               double range = max - min;
    201               foreach (var index in column.Value) c[index] = min + rand.NextDouble() * range;
    202             },
    203             dateTimeAction: c => {
    204               var min = c.GetMin(considerSelection ? column.Value : null);
    205               var max = c.GetMax(considerSelection ? column.Value : null);
    206               double range = (max - min).TotalSeconds;
    207               foreach (var index in column.Value) c[index] = min + TimeSpan.FromSeconds(rand.NextDouble() * range);
    208             });
    209         }
    210       });
     199      ReplaceIndicesByValues(cells,
     200        col => {
     201          double min = PreprocessingData.GetMin<double>(col, considerSelection);
     202          double max = PreprocessingData.GetMax<double>(col, considerSelection);
     203          double range = max - min;
     204          return cells[col].Select(_ => rand.NextDouble() * range + min);
     205        },
     206        col => {
     207          var min = PreprocessingData.GetMin<DateTime>(col, considerSelection);
     208          var max = PreprocessingData.GetMax<DateTime>(col, considerSelection);
     209          double range = (max - min).TotalSeconds;
     210          return cells[col].Select(_ => min + TimeSpan.FromSeconds(rand.NextDouble() * range));
     211        });
    211212    }
    212213
     
    215216        foreach (var column in cells) {
    216217          foreach (var rowIdx in column.Value) {
    217             PreprocessingData.DataColumns[column.Key].SetValue(value, rowIdx);
     218            PreprocessingData.SetValue(value, column.Key, rowIdx);
    218219          }
    219220        }
     
    256257      int valuesToInterpolate = nextIndex - prevIndex;
    257258
    258       PreprocessingData.DataColumns[column.Key].TypeSwitch(
    259         c => {
    260           double prev = c[prevIndex];
    261           double next = c[nextIndex];
    262           double interpolationStep = (next - prev) / valuesToInterpolate;
    263           for (int i = prevIndex; i < nextIndex; i++) c[i] = prev + (interpolationStep * (i - prevIndex));
    264         },
    265         dateTimeAction: c => {
    266           var prev = c[prevIndex];
    267           var next = c[nextIndex];
    268           double interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate;
    269           for (int i = prevIndex; i < nextIndex; i++) c[i] = prev.AddSeconds(interpolationStep * (i - prevIndex));
    270         }
    271       );
     259      if (PreprocessingData.VariableHasType<double>(column.Key)) {
     260        double prev = PreprocessingData.GetCell<double>(column.Key, prevIndex);
     261        double next = PreprocessingData.GetCell<double>(column.Key, nextIndex);
     262        double interpolationStep = (next - prev) / valuesToInterpolate;
     263
     264        for (int i = prevIndex; i < nextIndex; ++i) {
     265          double interpolated = prev + (interpolationStep * (i - prevIndex));
     266          PreprocessingData.SetCell<double>(column.Key, i, interpolated);
     267        }
     268      } else if (PreprocessingData.VariableHasType<DateTime>(column.Key)) {
     269        DateTime prev = PreprocessingData.GetCell<DateTime>(column.Key, prevIndex);
     270        DateTime next = PreprocessingData.GetCell<DateTime>(column.Key, nextIndex);
     271        double interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate;
     272
     273        for (int i = prevIndex; i < nextIndex; ++i) {
     274          DateTime interpolated = prev.AddSeconds(interpolationStep * (i - prevIndex));
     275          PreprocessingData.SetCell<DateTime>(column.Key, i, interpolated);
     276        }
     277      }
    272278    }
    273279
    274280    private int IndexOfPrevPresentValue(int columnIndex, int start) {
    275       int index = start - 1;
    276       while (index >= 0 && PreprocessingData.IsCellEmpty(columnIndex, index))
    277         index--;
    278       return index;
     281      int offset = start - 1;
     282      while (offset >= 0 && PreprocessingData.IsCellEmpty(columnIndex, offset)) {
     283        offset--;
     284      }
     285
     286      return offset;
    279287    }
    280288
    281289    private int IndexOfNextPresentValue(int columnIndex, int start) {
    282       int index = start + 1;
    283       while (index < PreprocessingData.Rows && PreprocessingData.IsCellEmpty(columnIndex, index))
    284         index++;
    285       return index;
     290      int offset = start + 1;
     291      while (offset < PreprocessingData.Rows && PreprocessingData.IsCellEmpty(columnIndex, offset)) {
     292        offset++;
     293      }
     294
     295      return offset;
    286296    }
    287297
     
    293303        PreprocessingData.InTransaction(() => {
    294304          // process all given ranges - e.g. TrainingPartition, TestPartition
    295           foreach (var range in ranges) {
     305          foreach (IntRange range in ranges) {
    296306            var indices = Enumerable.Range(0, PreprocessingData.Rows).ToArray();
    297307            var shuffledIndices = Enumerable.Range(range.Start, range.Size).Shuffle(random).ToArray();
     
    314324    public void ReOrderToIndices(int[] indices) {
    315325      PreprocessingData.InTransaction(() => {
    316         foreach (var column in PreprocessingData.DataColumns) {
    317           column.TypeSwitch(
    318             c => {
    319               if (indices.Length != c.Values.Count) throw new InvalidOperationException("The number of provided indices does not match the values.");
    320               var originalData = new List<double>(c.Values);
    321               for (int i = 0; i < indices.Length; i++) c[i] = originalData[indices[i]];
    322             },
    323             c => {
    324               if (indices.Length != c.Values.Count) throw new InvalidOperationException("The number of provided indices does not match the values.");
    325               var originalData = new List<string>(c.Values);
    326               for (int i = 0; i < indices.Length; i++) c[i] = originalData[indices[i]];
    327             },
    328             c => {
    329               if (indices.Length != c.Values.Count) throw new InvalidOperationException("The number of provided indices does not match the values.");
    330               var originalData = new List<DateTime>(c.Values);
    331               for (int i = 0; i < indices.Length; i++) c[i] = originalData[indices[i]];
    332             });
    333         }
    334       });
     326        for (int i = 0; i < PreprocessingData.Columns; ++i) {
     327          if (PreprocessingData.VariableHasType<double>(i))
     328            ReOrderToIndices<double>(i, indices);
     329          else if (PreprocessingData.VariableHasType<string>(i))
     330            ReOrderToIndices<string>(i, indices);
     331          else if (PreprocessingData.VariableHasType<DateTime>(i))
     332            ReOrderToIndices<DateTime>(i, indices);
     333        }
     334      });
     335    }
     336
     337    private void ReOrderToIndices<T>(int columnIndex, int[] indices) {
     338      var originalData = new List<T>(PreprocessingData.GetValues<T>(columnIndex));
     339      if (indices.Length != originalData.Count) throw new InvalidOperationException("The number of provided indices does not match the values.");
     340
     341      for (int i = 0; i < indices.Length; i++) {
     342        T newValue = originalData[indices[i]];
     343        PreprocessingData.SetCell<T>(columnIndex, i, newValue);
     344      }
    335345    }
    336346    #endregion
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Content/ManipulationContent.cs

    r15309 r15431  
    5757
    5858      for (int i = 0; i < PreprocessingData.Rows; ++i) {
    59         int missingCount = 0;
    60         for (var col = 0; col < PreprocessingData.DataColumns.Count; col++) {
    61           if (!PreprocessingData.DataColumns[col].IsValidValue(i))
    62             missingCount++;
     59        int missingCount = PreprocessingData.GetRowMissingValueCount(i);
     60        if (100f / PreprocessingData.Columns * missingCount > percent) {
     61          rows.Add(i);
    6362        }
    64         if (100f / PreprocessingData.Columns * missingCount > percent)
    65           rows.Add(i);
    6663      }
    6764
     
    7269      List<int> columns = new List<int>();
    7370      for (int i = 0; i < PreprocessingData.Columns; ++i) {
    74         int missingCount = PreprocessingData.DataColumns[i].GetNumberOfMissingValues();
     71        int missingCount = PreprocessingData.GetMissingValueCount(i);
    7572        if (100f / PreprocessingData.Rows * missingCount > percent) {
    7673          columns.Add(i);
     
    8380    public List<int> ColumnsWithVarianceSmaller(double variance) {
    8481      List<int> columns = new List<int>();
    85 
    86       for (int i = 0; i < PreprocessingData.Columns; i++) {
    87         if (PreprocessingData.DataColumns[i].TypeSwitch<bool>(
    88           c => c.GetVariance() < variance,
    89           c => false,
    90           c => c.GetVariance().Ticks / TimeSpan.TicksPerSecond < variance
    91         ))
    92           columns.Add(i);
     82      for (int i = 0; i < PreprocessingData.Columns; ++i) {
     83        if (PreprocessingData.VariableHasType<double>(i)) {
     84          double columnVariance = PreprocessingData.GetVariance<double>(i);
     85          if (columnVariance < variance) {
     86            columns.Add(i);
     87          }
     88        } else if (PreprocessingData.VariableHasType<DateTime>(i)) {
     89          double columnVariance = (double)PreprocessingData.GetVariance<DateTime>(i).Ticks / TimeSpan.TicksPerSecond;
     90          if (columnVariance < variance) {
     91            columns.Add(i);
     92          }
     93        }
    9394      }
    94 
    9595      return columns;
    9696    }
     
    119119      PreprocessingData.InTransaction(() => {
    120120        foreach (int column in columns.OrderByDescending(x => x)) {
    121           PreprocessingData.DataColumns.RemoveAt(column);
     121          PreprocessingData.DeleteColumn(column);
    122122        }
    123123      });
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Content/PreprocessingChartContent.cs

    r15309 r15431  
    8282
    8383    public static DataRow CreateDataRow(IFilteredPreprocessingData preprocessingData, string variableName, DataRowVisualProperties.DataRowChartType chartType) {
    84       var values = preprocessingData.GetValues<double>(preprocessingData.GetColumnIndex(variableName));
     84      IList<double> values = preprocessingData.GetValues<double>(preprocessingData.GetColumnIndex(variableName));
    8585      DataRow row = new DataRow(variableName, "", values);
    8686      row.VisualProperties.ChartType = chartType;
  • branches/DataPreprocessing Cleanup/HeuristicLab.DataPreprocessing/3.4/Content/ScatterPlotContent.cs

    r15309 r15431  
    2121
    2222using System;
     23using System.Collections.Generic;
    2324using System.Linq;
    2425using HeuristicLab.Analysis;
     
    5051    #endregion
    5152
    52     public static ScatterPlot CreateScatterPlot(IFilteredPreprocessingData preprocessingData, string variableNameX, string variableNameY,
    53       string variableNameGroup = "-", LegendOrder legendOrder = LegendOrder.Alphabetically) {
     53    public static ScatterPlot CreateScatterPlot(IFilteredPreprocessingData preprocessingData, string variableNameX, string variableNameY, string variableNameGroup = "-", LegendOrder legendOrder = LegendOrder.Alphabetically) {
    5454      ScatterPlot scatterPlot = new ScatterPlot();
    5555
    56       var xValues = preprocessingData.GetValues<double>(preprocessingData.GetColumnIndex(variableNameX));
    57       var yValues = preprocessingData.GetValues<double>(preprocessingData.GetColumnIndex(variableNameY));
     56      IList<double> xValues = preprocessingData.GetValues<double>(preprocessingData.GetColumnIndex(variableNameX));
     57      IList<double> yValues = preprocessingData.GetValues<double>(preprocessingData.GetColumnIndex(variableNameY));
    5858
    5959      var points = xValues.Zip(yValues, (x, y) => new Point2D<double>(x, y)).ToList();
Note: See TracChangeset for help on using the changeset viewer.