[13502] | 1 | #region License Information
|
---|
| 2 | /* HeuristicLab
|
---|
[17180] | 3 | * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
[13502] | 4 | *
|
---|
| 5 | * This file is part of HeuristicLab.
|
---|
| 6 | *
|
---|
| 7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
| 8 | * it under the terms of the GNU General Public License as published by
|
---|
| 9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
| 10 | * (at your option) any later version.
|
---|
| 11 | *
|
---|
| 12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 15 | * GNU General Public License for more details.
|
---|
| 16 | *
|
---|
| 17 | * You should have received a copy of the GNU General Public License
|
---|
| 18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 19 | */
|
---|
| 20 | #endregion
|
---|
| 21 |
|
---|
| 22 | using System;
|
---|
[10783] | 23 | using System.Collections.Generic;
|
---|
[15518] | 24 | using System.Linq;
|
---|
[10783] | 25 | using HeuristicLab.Common;
|
---|
| 26 | using HeuristicLab.Core;
|
---|
| 27 | using HeuristicLab.Data;
|
---|
[16565] | 28 | using HEAL.Attic;
|
---|
[10783] | 29 | using HeuristicLab.Problems.DataAnalysis;
|
---|
| 30 |
|
---|
[13502] | 31 | namespace HeuristicLab.DataPreprocessing {
|
---|
[15518] | 32 | [Item("FilteredPreprocessingData", "Represents filtered data used for preprocessing.")]
|
---|
[16565] | 33 | [StorableType("26BAE57C-A102-483D-8A09-AEC7132FD837")]
|
---|
[15518] | 34 | public sealed class FilteredPreprocessingData : NamedItem, IFilteredPreprocessingData {
|
---|
[10783] | 35 |
|
---|
[15518] | 36 | [Storable]
|
---|
| 37 | private readonly IPreprocessingData originalData;
|
---|
| 38 | [Storable]
|
---|
| 39 | private IPreprocessingData filteredData;
|
---|
[10999] | 40 |
|
---|
[15518] | 41 | public IPreprocessingData ActiveData {
|
---|
[10999] | 42 | get { return IsFiltered ? filteredData : originalData; }
|
---|
| 43 | }
|
---|
| 44 |
|
---|
[15518] | 45 | #region Constructor, Cloning & Persistence
|
---|
| 46 | public FilteredPreprocessingData(IPreprocessingData preprocessingData)
|
---|
[10783] | 47 | : base() {
|
---|
[15518] | 48 | originalData = preprocessingData;
|
---|
[10804] | 49 | filteredData = null;
|
---|
[10783] | 50 | }
|
---|
| 51 |
|
---|
[15518] | 52 | private FilteredPreprocessingData(FilteredPreprocessingData original, Cloner cloner)
|
---|
[10999] | 53 | : base(original, cloner) {
|
---|
| 54 | originalData = original.originalData;
|
---|
| 55 | filteredData = original.filteredData;
|
---|
| 56 | }
|
---|
[10804] | 57 | public override IDeepCloneable Clone(Cloner cloner) {
|
---|
| 58 | return new FilteredPreprocessingData(this, cloner);
|
---|
| 59 | }
|
---|
[10783] | 60 |
|
---|
[15518] | 61 | [StorableConstructor]
|
---|
[16565] | 62 | private FilteredPreprocessingData(StorableConstructorFlag _) : base(_) { }
|
---|
[15518] | 63 | #endregion
|
---|
| 64 |
|
---|
| 65 | #region Cells
|
---|
| 66 | public bool IsCellEmpty(int columnIndex, int rowIndex) {
|
---|
| 67 | return ActiveData.IsCellEmpty(columnIndex, rowIndex);
|
---|
| 68 | }
|
---|
| 69 |
|
---|
[10804] | 70 | public T GetCell<T>(int columnIndex, int rowIndex) {
|
---|
| 71 | return ActiveData.GetCell<T>(columnIndex, rowIndex);
|
---|
| 72 | }
|
---|
[10783] | 73 |
|
---|
[10804] | 74 | public void SetCell<T>(int columnIndex, int rowIndex, T value) {
|
---|
[10978] | 75 | if (IsFiltered)
|
---|
[11003] | 76 | throw new InvalidOperationException("SetValues not possible while data is filtered");
|
---|
[10978] | 77 | originalData.SetCell<T>(columnIndex, rowIndex, value);
|
---|
[10804] | 78 | }
|
---|
[10783] | 79 |
|
---|
[10804] | 80 | public string GetCellAsString(int columnIndex, int rowIndex) {
|
---|
| 81 | return ActiveData.GetCellAsString(columnIndex, rowIndex);
|
---|
| 82 | }
|
---|
[10783] | 83 |
|
---|
[10809] | 84 | public IList<T> GetValues<T>(int columnIndex, bool considerSelection) {
|
---|
| 85 | return ActiveData.GetValues<T>(columnIndex, considerSelection);
|
---|
[10804] | 86 | }
|
---|
[10783] | 87 |
|
---|
[10804] | 88 | public void SetValues<T>(int columnIndex, IList<T> values) {
|
---|
[10978] | 89 | if (IsFiltered)
|
---|
| 90 | throw new InvalidOperationException("SetValues not possible while data is filtered");
|
---|
| 91 |
|
---|
| 92 | originalData.SetValues<T>(columnIndex, values);
|
---|
[10804] | 93 | }
|
---|
[10783] | 94 |
|
---|
[15518] | 95 | public bool SetValue(string value, int columnIndex, int rowIndex) {
|
---|
| 96 | if (IsFiltered)
|
---|
| 97 | throw new InvalidOperationException("SetValue not possible while data is filtered");
|
---|
| 98 | return originalData.SetValue(value, columnIndex, rowIndex);
|
---|
| 99 | }
|
---|
| 100 |
|
---|
| 101 | public int Columns {
|
---|
| 102 | get { return ActiveData.Columns; }
|
---|
| 103 | }
|
---|
| 104 |
|
---|
| 105 | public int Rows {
|
---|
| 106 | get { return ActiveData.Rows; }
|
---|
| 107 | }
|
---|
| 108 | #endregion
|
---|
| 109 |
|
---|
| 110 | #region Rows
|
---|
[10804] | 111 | public void InsertRow(int rowIndex) {
|
---|
[10978] | 112 | if (IsFiltered)
|
---|
| 113 | throw new InvalidOperationException("InsertRow not possible while data is filtered");
|
---|
| 114 |
|
---|
| 115 | originalData.InsertRow(rowIndex);
|
---|
[10804] | 116 | }
|
---|
[10783] | 117 |
|
---|
[10804] | 118 | public void DeleteRow(int rowIndex) {
|
---|
[10978] | 119 | if (IsFiltered)
|
---|
| 120 | throw new InvalidOperationException("DeleteRow not possible while data is filtered");
|
---|
| 121 |
|
---|
| 122 | originalData.DeleteRow(rowIndex);
|
---|
[10804] | 123 | }
|
---|
[10783] | 124 |
|
---|
[15518] | 125 | public void DeleteRowsWithIndices(IEnumerable<int> rows) {
|
---|
| 126 | if (IsFiltered)
|
---|
| 127 | throw new InvalidOperationException("DeleteRowsWithIndices not possible while data is filtered");
|
---|
| 128 |
|
---|
| 129 | originalData.DeleteRowsWithIndices(rows);
|
---|
| 130 | }
|
---|
| 131 |
|
---|
[10804] | 132 | public void InsertColumn<T>(string variableName, int columnIndex) {
|
---|
[10978] | 133 | if (IsFiltered)
|
---|
| 134 | throw new InvalidOperationException("InsertColumn not possible while data is filtered");
|
---|
| 135 |
|
---|
| 136 | originalData.InsertColumn<T>(variableName, columnIndex);
|
---|
[10804] | 137 | }
|
---|
[10783] | 138 |
|
---|
[10804] | 139 | public void DeleteColumn(int columnIndex) {
|
---|
[10978] | 140 | if (IsFiltered)
|
---|
| 141 | throw new InvalidOperationException("DeleteColumn not possible while data is filtered");
|
---|
| 142 | originalData.DeleteColumn(columnIndex);
|
---|
[10804] | 143 | }
|
---|
[10783] | 144 |
|
---|
[13252] | 145 | public void RenameColumn(int columnIndex, string name) {
|
---|
| 146 | if (IsFiltered)
|
---|
| 147 | throw new InvalidOperationException("RenameColumn not possible while data is filtered");
|
---|
| 148 | originalData.RenameColumn(columnIndex, name);
|
---|
| 149 | }
|
---|
| 150 |
|
---|
| 151 | public void RenameColumns(IList<string> names) {
|
---|
| 152 | if (IsFiltered)
|
---|
| 153 | throw new InvalidOperationException("RenameColumns not possible while data is filtered");
|
---|
| 154 | originalData.RenameColumns(names);
|
---|
| 155 | }
|
---|
| 156 |
|
---|
[15518] | 157 | public bool AreAllStringColumns(IEnumerable<int> columnIndices) {
|
---|
| 158 | return originalData.AreAllStringColumns(columnIndices);
|
---|
| 159 | }
|
---|
| 160 | #endregion
|
---|
| 161 |
|
---|
| 162 | #region Variables
|
---|
| 163 | public IEnumerable<string> VariableNames {
|
---|
| 164 | get { return ActiveData.VariableNames; }
|
---|
| 165 | }
|
---|
| 166 | public IEnumerable<string> GetDoubleVariableNames() {
|
---|
| 167 | return originalData.GetDoubleVariableNames();
|
---|
| 168 | }
|
---|
[10804] | 169 | public string GetVariableName(int columnIndex) {
|
---|
| 170 | return ActiveData.GetVariableName(columnIndex);
|
---|
| 171 | }
|
---|
[10783] | 172 |
|
---|
[10804] | 173 | public int GetColumnIndex(string variableName) {
|
---|
| 174 | return ActiveData.GetColumnIndex(variableName);
|
---|
| 175 | }
|
---|
[10783] | 176 |
|
---|
[11156] | 177 | public bool VariableHasType<T>(int columnIndex) {
|
---|
| 178 | return originalData.VariableHasType<T>(columnIndex);
|
---|
[10804] | 179 | }
|
---|
[10783] | 180 |
|
---|
[15518] | 181 | public Type GetVariableType(int columnIndex) {
|
---|
| 182 | return ActiveData.GetVariableType(columnIndex);
|
---|
[10804] | 183 | }
|
---|
[10783] | 184 |
|
---|
[15518] | 185 | public IList<string> InputVariables {
|
---|
| 186 | get { return ActiveData.InputVariables; }
|
---|
[10804] | 187 | }
|
---|
[10783] | 188 |
|
---|
[15518] | 189 | public string TargetVariable {
|
---|
| 190 | get { return ActiveData.TargetVariable; }
|
---|
| 191 | } // optional
|
---|
| 192 | #endregion
|
---|
| 193 |
|
---|
| 194 | #region Partitions
|
---|
| 195 | public IntRange TrainingPartition {
|
---|
| 196 | get { return originalData.TrainingPartition; }
|
---|
[10804] | 197 | }
|
---|
[10783] | 198 |
|
---|
[15518] | 199 | public IntRange TestPartition {
|
---|
| 200 | get { return originalData.TestPartition; }
|
---|
[10804] | 201 | }
|
---|
[15518] | 202 | #endregion
|
---|
[10783] | 203 |
|
---|
[15518] | 204 | #region Transformations
|
---|
| 205 | public IList<ITransformation> Transformations {
|
---|
| 206 | get { return originalData.Transformations; }
|
---|
[10900] | 207 | }
|
---|
[15518] | 208 | #endregion
|
---|
[10900] | 209 |
|
---|
[15518] | 210 | #region Validation
|
---|
| 211 | public bool Validate(string value, out string errorMessage, int columnIndex) {
|
---|
| 212 | return originalData.Validate(value, out errorMessage, columnIndex);
|
---|
[11003] | 213 | }
|
---|
[15518] | 214 | #endregion
|
---|
[11003] | 215 |
|
---|
[15518] | 216 | #region Import & Export
|
---|
| 217 | public void Import(IDataAnalysisProblemData problemData) {
|
---|
[11003] | 218 | if (IsFiltered)
|
---|
[15518] | 219 | throw new InvalidOperationException("Import not possible while data is filtered");
|
---|
| 220 | originalData.Import(problemData);
|
---|
[11003] | 221 | }
|
---|
| 222 |
|
---|
[15518] | 223 | public Dataset ExportToDataset() {
|
---|
| 224 | return originalData.ExportToDataset();
|
---|
[11003] | 225 | }
|
---|
[15518] | 226 | #endregion
|
---|
[11003] | 227 |
|
---|
[15518] | 228 | #region Selection
|
---|
| 229 | public IDictionary<int, IList<int>> Selection {
|
---|
| 230 | get { return originalData.Selection; }
|
---|
| 231 | set { originalData.Selection = value; }
|
---|
| 232 | }
|
---|
[11003] | 233 |
|
---|
[15518] | 234 | public void ClearSelection() {
|
---|
| 235 | originalData.ClearSelection();
|
---|
[11003] | 236 | }
|
---|
| 237 |
|
---|
[15518] | 238 | public event EventHandler SelectionChanged {
|
---|
| 239 | add { originalData.SelectionChanged += value; }
|
---|
| 240 | remove { originalData.SelectionChanged -= value; }
|
---|
| 241 | }
|
---|
| 242 | #endregion
|
---|
| 243 |
|
---|
| 244 | #region Transactions
|
---|
| 245 | public event DataPreprocessingChangedEventHandler Changed {
|
---|
| 246 | add { originalData.Changed += value; }
|
---|
| 247 | remove { originalData.Changed -= value; }
|
---|
| 248 | }
|
---|
| 249 |
|
---|
| 250 | public bool IsUndoAvailable {
|
---|
| 251 | get { return IsFiltered ? false : originalData.IsUndoAvailable; }
|
---|
| 252 | }
|
---|
| 253 |
|
---|
[10804] | 254 | public void Undo() {
|
---|
[10978] | 255 | if (IsFiltered)
|
---|
| 256 | throw new InvalidOperationException("Undo not possible while data is filtered");
|
---|
| 257 |
|
---|
| 258 | originalData.Undo();
|
---|
[10804] | 259 | }
|
---|
[10783] | 260 |
|
---|
[10804] | 261 | public void InTransaction(Action action, DataPreprocessingChangedEventType type = DataPreprocessingChangedEventType.Any) {
|
---|
[10978] | 262 | if (IsFiltered)
|
---|
| 263 | throw new InvalidOperationException("Transaction not possible while data is filtered");
|
---|
| 264 | originalData.InTransaction(action, type);
|
---|
[10804] | 265 | }
|
---|
[10783] | 266 |
|
---|
[10804] | 267 | public void BeginTransaction(DataPreprocessingChangedEventType type) {
|
---|
[10978] | 268 | if (IsFiltered)
|
---|
| 269 | throw new InvalidOperationException("Transaction not possible while data is filtered");
|
---|
| 270 | originalData.BeginTransaction(type);
|
---|
[10804] | 271 | }
|
---|
| 272 |
|
---|
| 273 | public void EndTransaction() {
|
---|
| 274 | originalData.EndTransaction();
|
---|
| 275 | }
|
---|
[15518] | 276 | #endregion
|
---|
[10804] | 277 |
|
---|
[15518] | 278 | #region Statistics
|
---|
| 279 | public T GetMin<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
|
---|
| 280 | return ActiveData.GetMin<T>(columnIndex, considerSelection, emptyValue);
|
---|
[10999] | 281 | }
|
---|
[15518] | 282 | public T GetMax<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
|
---|
| 283 | return ActiveData.GetMax<T>(columnIndex, considerSelection, emptyValue);
|
---|
| 284 | }
|
---|
| 285 | public T GetMean<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
|
---|
| 286 | return ActiveData.GetMean<T>(columnIndex, considerSelection, emptyValue);
|
---|
| 287 | }
|
---|
| 288 | public T GetMedian<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> {
|
---|
[15594] | 289 | return ActiveData.GetMedian<T>(columnIndex, considerSelection, emptyValue);
|
---|
[15518] | 290 | }
|
---|
| 291 | public T GetMode<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IEquatable<T> {
|
---|
| 292 | return ActiveData.GetMode<T>(columnIndex, considerSelection, emptyValue);
|
---|
| 293 | }
|
---|
| 294 | public T GetStandardDeviation<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
|
---|
| 295 | return ActiveData.GetStandardDeviation<T>(columnIndex, considerSelection, emptyValue);
|
---|
| 296 | }
|
---|
| 297 | public T GetVariance<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
|
---|
| 298 | return ActiveData.GetVariance<T>(columnIndex, considerSelection, emptyValue);
|
---|
| 299 | }
|
---|
| 300 | public T GetQuantile<T>(double alpha, int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> {
|
---|
| 301 | return ActiveData.GetQuantile<T>(alpha, columnIndex, considerSelection, emptyValue);
|
---|
| 302 | }
|
---|
| 303 | public int GetDistinctValues<T>(int columnIndex, bool considerSelection = false) {
|
---|
| 304 | return ActiveData.GetDistinctValues<T>(columnIndex, considerSelection);
|
---|
| 305 | }
|
---|
[10900] | 306 |
|
---|
[15518] | 307 | public int GetMissingValueCount() {
|
---|
| 308 | return ActiveData.GetMissingValueCount();
|
---|
[10804] | 309 | }
|
---|
[15518] | 310 | public int GetMissingValueCount(int columnIndex) {
|
---|
| 311 | return ActiveData.GetMissingValueCount(columnIndex);
|
---|
| 312 | }
|
---|
| 313 | public int GetRowMissingValueCount(int rowIndex) {
|
---|
| 314 | return ActiveData.GetRowMissingValueCount(rowIndex);
|
---|
| 315 | }
|
---|
| 316 | #endregion
|
---|
[10804] | 317 |
|
---|
[15518] | 318 | #region Filters
|
---|
| 319 | public void SetFilter(bool[] remainingRows) {
|
---|
| 320 | filteredData = (IPreprocessingData)originalData.Clone();
|
---|
| 321 | filteredData.InTransaction(() => {
|
---|
| 322 | var remainingIndices = Enumerable.Range(0, remainingRows.Length).Where(x => remainingRows[x]);
|
---|
| 323 |
|
---|
| 324 | foreach (var v in filteredData.VariableNames) {
|
---|
| 325 | var ci = filteredData.GetColumnIndex(v);
|
---|
| 326 | if (filteredData.VariableHasType<double>(ci)) {
|
---|
| 327 | var values = filteredData.GetValues<double>(ci);
|
---|
| 328 | var filteredValues = remainingIndices.Select(x => values[x]).ToList();
|
---|
| 329 | filteredData.SetValues(ci, filteredValues);
|
---|
| 330 | } else if (filteredData.VariableHasType<DateTime>(ci)) {
|
---|
| 331 | var values = filteredData.GetValues<DateTime>(ci);
|
---|
| 332 | var filteredValues = remainingIndices.Select(x => values[x]).ToList();
|
---|
| 333 | filteredData.SetValues(ci, filteredValues);
|
---|
| 334 | } else if (filteredData.VariableHasType<string>(ci)) {
|
---|
| 335 | var values = filteredData.GetValues<string>(ci);
|
---|
| 336 | var filteredValues = remainingIndices.Select(x => values[x]).ToList();
|
---|
| 337 | filteredData.SetValues(ci, filteredValues);
|
---|
| 338 | }
|
---|
| 339 | }
|
---|
| 340 | });
|
---|
| 341 | OnFilterChanged();
|
---|
[10847] | 342 | }
|
---|
[10804] | 343 |
|
---|
[15518] | 344 | public void PersistFilter() {
|
---|
| 345 | originalData.InTransaction(() => {
|
---|
| 346 | for (int i = 0; i < filteredData.Columns; ++i) {
|
---|
| 347 | if (filteredData.VariableHasType<double>(i)) {
|
---|
| 348 | originalData.SetValues<double>(i, filteredData.GetValues<double>(i));
|
---|
| 349 | } else if (filteredData.VariableHasType<string>(i)) {
|
---|
| 350 | originalData.SetValues<string>(i, filteredData.GetValues<string>(i));
|
---|
| 351 | } else if (filteredData.VariableHasType<DateTime>(i)) {
|
---|
| 352 | originalData.SetValues<DateTime>(i, filteredData.GetValues<DateTime>(i));
|
---|
| 353 | } else {
|
---|
| 354 | throw new ArgumentException("Data types of columns do not match");
|
---|
| 355 | }
|
---|
| 356 | }
|
---|
| 357 | });
|
---|
| 358 | ResetFilter();
|
---|
[11003] | 359 | }
|
---|
| 360 |
|
---|
[15518] | 361 | public void ResetFilter() {
|
---|
| 362 | filteredData = null;
|
---|
| 363 | OnFilterChanged();
|
---|
| 364 | }
|
---|
| 365 |
|
---|
| 366 | public bool IsFiltered {
|
---|
| 367 | get { return filteredData != null; }
|
---|
| 368 | }
|
---|
| 369 |
|
---|
[10999] | 370 | public event EventHandler FilterChanged;
|
---|
[15518] | 371 |
|
---|
| 372 | private void OnFilterChanged() {
|
---|
| 373 | if (FilterChanged != null) {
|
---|
| 374 | FilterChanged(this, new EventArgs());
|
---|
| 375 | }
|
---|
| 376 | }
|
---|
[11003] | 377 | #endregion
|
---|
[10783] | 378 | }
|
---|
| 379 | }
|
---|