Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/HeuristicLab.DataPreprocessing/3.4/Content/DataGridContent.cs @ 16565

Last change on this file since 16565 was 16565, checked in by gkronber, 5 years ago

#2520: merged changes from PersistenceOverhaul branch (r16451:16564) into trunk

File size: 14.2 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Drawing;
25using System.Linq;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Data;
29using HEAL.Attic;
30using HeuristicLab.Random;
31
32namespace HeuristicLab.DataPreprocessing {
33  [Item("Data Grid", "Represents a data grid.")]
34  [StorableType("DC6AE5CE-B0FA-4C8C-BDBB-D490C6DE4174")]
35  public class DataGridContent : PreprocessingContent, IStringConvertibleMatrix, IViewShortcut {
36    public static new Image StaticItemImage {
37      get { return HeuristicLab.Common.Resources.VSImageLibrary.Table; }
38    }
39
40    public int Rows {
41      get { return PreprocessingData.Rows; }
42      set { }
43    }
44
45    public int Columns {
46      get { return PreprocessingData.Columns; }
47      set { }
48    }
49
50    public IEnumerable<string> ColumnNames {
51      get { return PreprocessingData.VariableNames; }
52      set { }
53    }
54
55    public IEnumerable<string> RowNames {
56      get { return Enumerable.Range(1, Rows).Select(n => n.ToString()); }
57      set { throw new NotSupportedException(); }
58    }
59
60    public bool SortableView {
61      get { return true; }
62      set { throw new NotSupportedException(); }
63    }
64
65    public bool ReadOnly {
66      get { return false; }
67    }
68
69    public IDictionary<int, IList<int>> Selection {
70      get { return PreprocessingData.Selection; }
71      set { PreprocessingData.Selection = value; }
72    }
73
74    #region Constructor, Cloning & Persistence
75    public DataGridContent(IFilteredPreprocessingData preprocessingData)
76      : base(preprocessingData) {
77    }
78
79    public DataGridContent(DataGridContent original, Cloner cloner)
80      : base(original, cloner) {
81    }
82    public override IDeepCloneable Clone(Cloner cloner) {
83      return new DataGridContent(this, cloner);
84    }
85
86    [StorableConstructor]
87    protected DataGridContent(StorableConstructorFlag _) : base(_) { }
88    #endregion
89
90    public void DeleteRows(IEnumerable<int> rows) {
91      PreprocessingData.DeleteRowsWithIndices(rows);
92    }
93
94    public void DeleteColumn(int column) {
95      PreprocessingData.DeleteColumn(column);
96    }
97
98    public bool Validate(string value, out string errorMessage, int columnIndex) {
99      return PreprocessingData.Validate(value, out errorMessage, columnIndex);
100    }
101
102    public string GetValue(int rowIndex, int columnIndex) {
103      return PreprocessingData.GetCellAsString(columnIndex, rowIndex);
104    }
105
106    public bool SetValue(string value, int rowIndex, int columnIndex) {
107      return PreprocessingData.SetValue(value, columnIndex, rowIndex);
108    }
109
110    public event DataPreprocessingChangedEventHandler Changed {
111      add { PreprocessingData.Changed += value; }
112      remove { PreprocessingData.Changed -= value; }
113    }
114
115    #region unused stuff/not implemented but necessary due to IStringConvertibleMatrix
116#pragma warning disable 0067
117    // Is not used since DataGridContentView overrides dataGridView_CellValidating and uses
118    // DataGridLogic#Validate(string value, out string errorMessage, int columnIndex)
119    public bool Validate(string value, out string errorMessage) {
120      errorMessage = string.Empty;
121      return true;
122    }
123
124    public event EventHandler ColumnsChanged;
125    public event EventHandler RowsChanged;
126    public event EventHandler ColumnNamesChanged;
127    public event EventHandler RowNamesChanged;
128    public event EventHandler SortableViewChanged;
129    public event EventHandler<EventArgs<int, int>> ItemChanged;
130    public event EventHandler Reset;
131#pragma warning restore 0067
132    #endregion
133
134    #region Manipulations
135    private void ReplaceIndicesByValue(IDictionary<int, IList<int>> cells, Func<int, double> doubleAggregator = null,
136      Func<int, DateTime> dateTimeAggregator = null, Func<int, string> stringAggregator = null) {
137      PreprocessingData.InTransaction(() => {
138        foreach (var column in cells) {
139          if (doubleAggregator != null && PreprocessingData.VariableHasType<double>(column.Key)) {
140            var value = doubleAggregator(column.Key);
141            foreach (int index in column.Value)
142              PreprocessingData.SetCell<double>(column.Key, index, value);
143          } else if (dateTimeAggregator != null && PreprocessingData.VariableHasType<DateTime>(column.Key)) {
144            var value = dateTimeAggregator(column.Key);
145            foreach (int index in column.Value)
146              PreprocessingData.SetCell<DateTime>(column.Key, index, value);
147          } else if (stringAggregator != null && PreprocessingData.VariableHasType<string>(column.Key)) {
148            var value = stringAggregator(column.Key);
149            foreach (int index in column.Value)
150              PreprocessingData.SetCell<string>(column.Key, index, value);
151          }
152        }
153      });
154    }
155
156    private void ReplaceIndicesByValues(IDictionary<int, IList<int>> cells, Func<int, IEnumerable<double>> doubleAggregator = null,
157      Func<int, IEnumerable<DateTime>> dateTimeAggregator = null, Func<int, IEnumerable<string>> stringAggregator = null) {
158      PreprocessingData.InTransaction(() => {
159        foreach (var column in cells) {
160          if (doubleAggregator != null && PreprocessingData.VariableHasType<double>(column.Key)) {
161            var values = doubleAggregator(column.Key);
162            foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value }))
163              PreprocessingData.SetCell<double>(column.Key, pair.row, pair.value);
164          } else if (dateTimeAggregator != null && PreprocessingData.VariableHasType<DateTime>(column.Key)) {
165            var values = dateTimeAggregator(column.Key);
166            foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value }))
167              PreprocessingData.SetCell<DateTime>(column.Key, pair.row, pair.value);
168          } else if (stringAggregator != null && PreprocessingData.VariableHasType<string>(column.Key)) {
169            var values = stringAggregator(column.Key);
170            foreach (var pair in column.Value.Zip(values, (row, value) => new { row, value }))
171              PreprocessingData.SetCell<string>(column.Key, pair.row, pair.value);
172          }
173        }
174      });
175    }
176
177    public void ReplaceIndicesByMean(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
178      ReplaceIndicesByValue(cells,
179        col => PreprocessingData.GetMean<double>(col, considerSelection),
180        col => PreprocessingData.GetMean<DateTime>(col, considerSelection));
181    }
182
183    public void ReplaceIndicesByMedianValue(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
184      ReplaceIndicesByValue(cells,
185        col => PreprocessingData.GetMedian<double>(col, considerSelection),
186        col => PreprocessingData.GetMedian<DateTime>(col, considerSelection));
187    }
188
189    public void ReplaceIndicesByMode(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
190      ReplaceIndicesByValue(cells,
191        col => PreprocessingData.GetMode<double>(col, considerSelection),
192        col => PreprocessingData.GetMode<DateTime>(col, considerSelection),
193        col => PreprocessingData.GetMode<string>(col, considerSelection));
194    }
195
196    public void ReplaceIndicesByRandomValue(IDictionary<int, IList<int>> cells, bool considerSelection = false) {
197      var rand = new FastRandom();
198      ReplaceIndicesByValues(cells,
199        col => {
200          double min = PreprocessingData.GetMin<double>(col, considerSelection);
201          double max = PreprocessingData.GetMax<double>(col, considerSelection);
202          double range = max - min;
203          return cells[col].Select(_ => rand.NextDouble() * range + min);
204        },
205        col => {
206          var min = PreprocessingData.GetMin<DateTime>(col, considerSelection);
207          var max = PreprocessingData.GetMax<DateTime>(col, considerSelection);
208          double range = (max - min).TotalSeconds;
209          return cells[col].Select(_ => min + TimeSpan.FromSeconds(rand.NextDouble() * range));
210        });
211    }
212
213    public void ReplaceIndicesByString(IDictionary<int, IList<int>> cells, string value) {
214      PreprocessingData.InTransaction(() => {
215        foreach (var column in cells) {
216          foreach (var rowIdx in column.Value) {
217            PreprocessingData.SetValue(value, column.Key, rowIdx);
218          }
219        }
220      });
221    }
222
223
224    public void ReplaceIndicesByLinearInterpolationOfNeighbours(IDictionary<int, IList<int>> cells) {
225      PreprocessingData.InTransaction(() => {
226        foreach (var column in cells) {
227          IList<Tuple<int, int>> startEndings = GetStartAndEndingsForInterpolation(column);
228          foreach (var tuple in startEndings) {
229            Interpolate(column, tuple.Item1, tuple.Item2);
230          }
231        }
232      });
233    }
234
235    private List<Tuple<int, int>> GetStartAndEndingsForInterpolation(KeyValuePair<int, IList<int>> column) {
236      var startEndings = new List<Tuple<int, int>>();
237      var rowIndices = column.Value.OrderBy(x => x).ToList();
238      var count = rowIndices.Count;
239      int start = int.MinValue;
240      for (int i = 0; i < count; ++i) {
241        if (start == int.MinValue) {
242          start = IndexOfPrevPresentValue(column.Key, rowIndices[i]);
243        }
244        if (i + 1 == count || (i + 1 < count && rowIndices[i + 1] - rowIndices[i] > 1)) {
245          int next = IndexOfNextPresentValue(column.Key, rowIndices[i]);
246          if (start > 0 && next < PreprocessingData.Rows) {
247            startEndings.Add(new Tuple<int, int>(start, next));
248          }
249          start = int.MinValue;
250        }
251      }
252      return startEndings;
253    }
254
255    private void Interpolate(KeyValuePair<int, IList<int>> column, int prevIndex, int nextIndex) {
256      int valuesToInterpolate = nextIndex - prevIndex;
257
258      if (PreprocessingData.VariableHasType<double>(column.Key)) {
259        double prev = PreprocessingData.GetCell<double>(column.Key, prevIndex);
260        double next = PreprocessingData.GetCell<double>(column.Key, nextIndex);
261        double interpolationStep = (next - prev) / valuesToInterpolate;
262
263        for (int i = prevIndex; i < nextIndex; ++i) {
264          double interpolated = prev + (interpolationStep * (i - prevIndex));
265          PreprocessingData.SetCell<double>(column.Key, i, interpolated);
266        }
267      } else if (PreprocessingData.VariableHasType<DateTime>(column.Key)) {
268        DateTime prev = PreprocessingData.GetCell<DateTime>(column.Key, prevIndex);
269        DateTime next = PreprocessingData.GetCell<DateTime>(column.Key, nextIndex);
270        double interpolationStep = (next - prev).TotalSeconds / valuesToInterpolate;
271
272        for (int i = prevIndex; i < nextIndex; ++i) {
273          DateTime interpolated = prev.AddSeconds(interpolationStep * (i - prevIndex));
274          PreprocessingData.SetCell<DateTime>(column.Key, i, interpolated);
275        }
276      }
277    }
278
279    private int IndexOfPrevPresentValue(int columnIndex, int start) {
280      int offset = start - 1;
281      while (offset >= 0 && PreprocessingData.IsCellEmpty(columnIndex, offset)) {
282        offset--;
283      }
284
285      return offset;
286    }
287
288    private int IndexOfNextPresentValue(int columnIndex, int start) {
289      int offset = start + 1;
290      while (offset < PreprocessingData.Rows && PreprocessingData.IsCellEmpty(columnIndex, offset)) {
291        offset++;
292      }
293
294      return offset;
295    }
296
297    public void Shuffle(bool shuffleRangesSeparately) {
298      var random = new FastRandom();
299
300      if (shuffleRangesSeparately) {
301        var ranges = new[] { PreprocessingData.TestPartition, PreprocessingData.TrainingPartition };
302        PreprocessingData.InTransaction(() => {
303          // process all given ranges - e.g. TrainingPartition, TestPartition
304          foreach (IntRange range in ranges) {
305            var indices = Enumerable.Range(0, PreprocessingData.Rows).ToArray();
306            var shuffledIndices = Enumerable.Range(range.Start, range.Size).Shuffle(random).ToArray();
307            for (int i = range.Start, j = 0; i < range.End; i++, j++)
308              indices[i] = shuffledIndices[j];
309
310            ReOrderToIndices(indices);
311          }
312        });
313
314      } else {
315        PreprocessingData.InTransaction(() => {
316          var indices = Enumerable.Range(0, PreprocessingData.Rows).ToArray();
317          indices.ShuffleInPlace(random);
318          ReOrderToIndices(indices);
319        });
320      }
321    }
322
323    public void ReOrderToIndices(int[] indices) {
324      PreprocessingData.InTransaction(() => {
325        for (int i = 0; i < PreprocessingData.Columns; ++i) {
326          if (PreprocessingData.VariableHasType<double>(i))
327            ReOrderToIndices<double>(i, indices);
328          else if (PreprocessingData.VariableHasType<string>(i))
329            ReOrderToIndices<string>(i, indices);
330          else if (PreprocessingData.VariableHasType<DateTime>(i))
331            ReOrderToIndices<DateTime>(i, indices);
332        }
333      });
334    }
335
336    private void ReOrderToIndices<T>(int columnIndex, int[] indices) {
337      var originalData = new List<T>(PreprocessingData.GetValues<T>(columnIndex));
338      if (indices.Length != originalData.Count) throw new InvalidOperationException("The number of provided indices does not match the values.");
339
340      for (int i = 0; i < indices.Length; i++) {
341        T newValue = originalData[indices[i]];
342        PreprocessingData.SetCell<T>(columnIndex, i, newValue);
343      }
344    }
345    #endregion
346  }
347}
Note: See TracBrowser for help on using the repository browser.