[13502] | 1 | #region License Information
|
---|
| 2 | /* HeuristicLab
|
---|
[14185] | 3 | * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
|
---|
[13502] | 4 | *
|
---|
| 5 | * This file is part of HeuristicLab.
|
---|
| 6 | *
|
---|
| 7 | * HeuristicLab is free software: you can redistribute it and/or modify
|
---|
| 8 | * it under the terms of the GNU General Public License as published by
|
---|
| 9 | * the Free Software Foundation, either version 3 of the License, or
|
---|
| 10 | * (at your option) any later version.
|
---|
| 11 | *
|
---|
| 12 | * HeuristicLab is distributed in the hope that it will be useful,
|
---|
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 15 | * GNU General Public License for more details.
|
---|
| 16 | *
|
---|
| 17 | * You should have received a copy of the GNU General Public License
|
---|
| 18 | * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
|
---|
| 19 | */
|
---|
| 20 | #endregion
|
---|
| 21 |
|
---|
| 22 | using System;
|
---|
[10783] | 23 | using System.Collections.Generic;
|
---|
[15518] | 24 | using System.Linq;
|
---|
[10783] | 25 | using HeuristicLab.Common;
|
---|
| 26 | using HeuristicLab.Core;
|
---|
| 27 | using HeuristicLab.Data;
|
---|
[15518] | 28 | using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
|
---|
[10783] | 29 | using HeuristicLab.Problems.DataAnalysis;
|
---|
| 30 |
|
---|
[13502] | 31 | namespace HeuristicLab.DataPreprocessing {
|
---|
[15518] | 32 | [Item("FilteredPreprocessingData", "Represents filtered data used for preprocessing.")]
|
---|
| 33 | [StorableClass]
|
---|
| 34 | public sealed class FilteredPreprocessingData : NamedItem, IFilteredPreprocessingData {
|
---|
[10783] | 35 |
|
---|
[15518] | 36 | [Storable]
|
---|
| 37 | private readonly IPreprocessingData originalData;
|
---|
| 38 | [Storable]
|
---|
| 39 | private IPreprocessingData filteredData;
|
---|
[10999] | 40 |
|
---|
[15518] | 41 | public IPreprocessingData ActiveData {
|
---|
[10999] | 42 | get { return IsFiltered ? filteredData : originalData; }
|
---|
| 43 | }
|
---|
| 44 |
|
---|
[15518] | 45 | #region Constructor, Cloning & Persistence
|
---|
| 46 | public FilteredPreprocessingData(IPreprocessingData preprocessingData)
|
---|
[10783] | 47 | : base() {
|
---|
[15518] | 48 | originalData = preprocessingData;
|
---|
[10804] | 49 | filteredData = null;
|
---|
[10783] | 50 | }
|
---|
| 51 |
|
---|
[15518] | 52 | private FilteredPreprocessingData(FilteredPreprocessingData original, Cloner cloner)
|
---|
[10999] | 53 | : base(original, cloner) {
|
---|
| 54 | originalData = original.originalData;
|
---|
| 55 | filteredData = original.filteredData;
|
---|
| 56 | }
|
---|
[10804] | 57 | public override IDeepCloneable Clone(Cloner cloner) {
|
---|
| 58 | return new FilteredPreprocessingData(this, cloner);
|
---|
| 59 | }
|
---|
[10783] | 60 |
|
---|
[15518] | 61 | [StorableConstructor]
|
---|
| 62 | private FilteredPreprocessingData(bool deserializing)
|
---|
| 63 | : base(deserializing) { }
|
---|
| 64 | #endregion
|
---|
| 65 |
|
---|
| 66 | #region Cells
|
---|
| 67 | public bool IsCellEmpty(int columnIndex, int rowIndex) {
|
---|
| 68 | return ActiveData.IsCellEmpty(columnIndex, rowIndex);
|
---|
| 69 | }
|
---|
| 70 |
|
---|
[10804] | 71 | public T GetCell<T>(int columnIndex, int rowIndex) {
|
---|
| 72 | return ActiveData.GetCell<T>(columnIndex, rowIndex);
|
---|
| 73 | }
|
---|
[10783] | 74 |
|
---|
[10804] | 75 | public void SetCell<T>(int columnIndex, int rowIndex, T value) {
|
---|
[10978] | 76 | if (IsFiltered)
|
---|
[11003] | 77 | throw new InvalidOperationException("SetValues not possible while data is filtered");
|
---|
[10978] | 78 | originalData.SetCell<T>(columnIndex, rowIndex, value);
|
---|
[10804] | 79 | }
|
---|
[10783] | 80 |
|
---|
[10804] | 81 | public string GetCellAsString(int columnIndex, int rowIndex) {
|
---|
| 82 | return ActiveData.GetCellAsString(columnIndex, rowIndex);
|
---|
| 83 | }
|
---|
[10783] | 84 |
|
---|
[10809] | 85 | public IList<T> GetValues<T>(int columnIndex, bool considerSelection) {
|
---|
| 86 | return ActiveData.GetValues<T>(columnIndex, considerSelection);
|
---|
[10804] | 87 | }
|
---|
[10783] | 88 |
|
---|
[10804] | 89 | public void SetValues<T>(int columnIndex, IList<T> values) {
|
---|
[10978] | 90 | if (IsFiltered)
|
---|
| 91 | throw new InvalidOperationException("SetValues not possible while data is filtered");
|
---|
| 92 |
|
---|
| 93 | originalData.SetValues<T>(columnIndex, values);
|
---|
[10804] | 94 | }
|
---|
[10783] | 95 |
|
---|
[15518] | 96 | public bool SetValue(string value, int columnIndex, int rowIndex) {
|
---|
| 97 | if (IsFiltered)
|
---|
| 98 | throw new InvalidOperationException("SetValue not possible while data is filtered");
|
---|
| 99 | return originalData.SetValue(value, columnIndex, rowIndex);
|
---|
| 100 | }
|
---|
| 101 |
|
---|
| 102 | public int Columns {
|
---|
| 103 | get { return ActiveData.Columns; }
|
---|
| 104 | }
|
---|
| 105 |
|
---|
| 106 | public int Rows {
|
---|
| 107 | get { return ActiveData.Rows; }
|
---|
| 108 | }
|
---|
| 109 | #endregion
|
---|
| 110 |
|
---|
| 111 | #region Rows
|
---|
[10804] | 112 | public void InsertRow(int rowIndex) {
|
---|
[10978] | 113 | if (IsFiltered)
|
---|
| 114 | throw new InvalidOperationException("InsertRow not possible while data is filtered");
|
---|
| 115 |
|
---|
| 116 | originalData.InsertRow(rowIndex);
|
---|
[10804] | 117 | }
|
---|
[10783] | 118 |
|
---|
[10804] | 119 | public void DeleteRow(int rowIndex) {
|
---|
[10978] | 120 | if (IsFiltered)
|
---|
| 121 | throw new InvalidOperationException("DeleteRow not possible while data is filtered");
|
---|
| 122 |
|
---|
| 123 | originalData.DeleteRow(rowIndex);
|
---|
[10804] | 124 | }
|
---|
[10783] | 125 |
|
---|
[15518] | 126 | public void DeleteRowsWithIndices(IEnumerable<int> rows) {
|
---|
| 127 | if (IsFiltered)
|
---|
| 128 | throw new InvalidOperationException("DeleteRowsWithIndices not possible while data is filtered");
|
---|
| 129 |
|
---|
| 130 | originalData.DeleteRowsWithIndices(rows);
|
---|
| 131 | }
|
---|
| 132 |
|
---|
[10804] | 133 | public void InsertColumn<T>(string variableName, int columnIndex) {
|
---|
[10978] | 134 | if (IsFiltered)
|
---|
| 135 | throw new InvalidOperationException("InsertColumn not possible while data is filtered");
|
---|
| 136 |
|
---|
| 137 | originalData.InsertColumn<T>(variableName, columnIndex);
|
---|
[10804] | 138 | }
|
---|
[10783] | 139 |
|
---|
[10804] | 140 | public void DeleteColumn(int columnIndex) {
|
---|
[10978] | 141 | if (IsFiltered)
|
---|
| 142 | throw new InvalidOperationException("DeleteColumn not possible while data is filtered");
|
---|
| 143 | originalData.DeleteColumn(columnIndex);
|
---|
[10804] | 144 | }
|
---|
[10783] | 145 |
|
---|
[13252] | 146 | public void RenameColumn(int columnIndex, string name) {
|
---|
| 147 | if (IsFiltered)
|
---|
| 148 | throw new InvalidOperationException("RenameColumn not possible while data is filtered");
|
---|
| 149 | originalData.RenameColumn(columnIndex, name);
|
---|
| 150 | }
|
---|
| 151 |
|
---|
| 152 | public void RenameColumns(IList<string> names) {
|
---|
| 153 | if (IsFiltered)
|
---|
| 154 | throw new InvalidOperationException("RenameColumns not possible while data is filtered");
|
---|
| 155 | originalData.RenameColumns(names);
|
---|
| 156 | }
|
---|
| 157 |
|
---|
[15518] | 158 | public bool AreAllStringColumns(IEnumerable<int> columnIndices) {
|
---|
| 159 | return originalData.AreAllStringColumns(columnIndices);
|
---|
| 160 | }
|
---|
| 161 | #endregion
|
---|
| 162 |
|
---|
| 163 | #region Variables
|
---|
| 164 | public IEnumerable<string> VariableNames {
|
---|
| 165 | get { return ActiveData.VariableNames; }
|
---|
| 166 | }
|
---|
| 167 | public IEnumerable<string> GetDoubleVariableNames() {
|
---|
| 168 | return originalData.GetDoubleVariableNames();
|
---|
| 169 | }
|
---|
[10804] | 170 | public string GetVariableName(int columnIndex) {
|
---|
| 171 | return ActiveData.GetVariableName(columnIndex);
|
---|
| 172 | }
|
---|
[10783] | 173 |
|
---|
[10804] | 174 | public int GetColumnIndex(string variableName) {
|
---|
| 175 | return ActiveData.GetColumnIndex(variableName);
|
---|
| 176 | }
|
---|
[10783] | 177 |
|
---|
[11156] | 178 | public bool VariableHasType<T>(int columnIndex) {
|
---|
| 179 | return originalData.VariableHasType<T>(columnIndex);
|
---|
[10804] | 180 | }
|
---|
[10783] | 181 |
|
---|
[15518] | 182 | public Type GetVariableType(int columnIndex) {
|
---|
| 183 | return ActiveData.GetVariableType(columnIndex);
|
---|
[10804] | 184 | }
|
---|
[10783] | 185 |
|
---|
[15518] | 186 | public IList<string> InputVariables {
|
---|
| 187 | get { return ActiveData.InputVariables; }
|
---|
[10804] | 188 | }
|
---|
[10783] | 189 |
|
---|
[15518] | 190 | public string TargetVariable {
|
---|
| 191 | get { return ActiveData.TargetVariable; }
|
---|
| 192 | } // optional
|
---|
| 193 | #endregion
|
---|
| 194 |
|
---|
| 195 | #region Partitions
|
---|
| 196 | public IntRange TrainingPartition {
|
---|
| 197 | get { return originalData.TrainingPartition; }
|
---|
[10804] | 198 | }
|
---|
[10783] | 199 |
|
---|
[15518] | 200 | public IntRange TestPartition {
|
---|
| 201 | get { return originalData.TestPartition; }
|
---|
[10804] | 202 | }
|
---|
[15518] | 203 | #endregion
|
---|
[10783] | 204 |
|
---|
[15518] | 205 | #region Transformations
|
---|
| 206 | public IList<ITransformation> Transformations {
|
---|
| 207 | get { return originalData.Transformations; }
|
---|
[10900] | 208 | }
|
---|
[15518] | 209 | #endregion
|
---|
[10900] | 210 |
|
---|
[15518] | 211 | #region Validation
|
---|
| 212 | public bool Validate(string value, out string errorMessage, int columnIndex) {
|
---|
| 213 | return originalData.Validate(value, out errorMessage, columnIndex);
|
---|
[11003] | 214 | }
|
---|
[15518] | 215 | #endregion
|
---|
[11003] | 216 |
|
---|
[15518] | 217 | #region Import & Export
|
---|
| 218 | public void Import(IDataAnalysisProblemData problemData) {
|
---|
[11003] | 219 | if (IsFiltered)
|
---|
[15518] | 220 | throw new InvalidOperationException("Import not possible while data is filtered");
|
---|
| 221 | originalData.Import(problemData);
|
---|
[11003] | 222 | }
|
---|
| 223 |
|
---|
[15518] | 224 | public Dataset ExportToDataset() {
|
---|
| 225 | return originalData.ExportToDataset();
|
---|
[11003] | 226 | }
|
---|
[15518] | 227 | #endregion
|
---|
[11003] | 228 |
|
---|
[15518] | 229 | #region Selection
|
---|
| 230 | public IDictionary<int, IList<int>> Selection {
|
---|
| 231 | get { return originalData.Selection; }
|
---|
| 232 | set { originalData.Selection = value; }
|
---|
| 233 | }
|
---|
[11003] | 234 |
|
---|
[15518] | 235 | public void ClearSelection() {
|
---|
| 236 | originalData.ClearSelection();
|
---|
[11003] | 237 | }
|
---|
| 238 |
|
---|
[15518] | 239 | public event EventHandler SelectionChanged {
|
---|
| 240 | add { originalData.SelectionChanged += value; }
|
---|
| 241 | remove { originalData.SelectionChanged -= value; }
|
---|
| 242 | }
|
---|
| 243 | #endregion
|
---|
| 244 |
|
---|
| 245 | #region Transactions
|
---|
| 246 | public event DataPreprocessingChangedEventHandler Changed {
|
---|
| 247 | add { originalData.Changed += value; }
|
---|
| 248 | remove { originalData.Changed -= value; }
|
---|
| 249 | }
|
---|
| 250 |
|
---|
| 251 | public bool IsUndoAvailable {
|
---|
| 252 | get { return IsFiltered ? false : originalData.IsUndoAvailable; }
|
---|
| 253 | }
|
---|
| 254 |
|
---|
[10804] | 255 | public void Undo() {
|
---|
[10978] | 256 | if (IsFiltered)
|
---|
| 257 | throw new InvalidOperationException("Undo not possible while data is filtered");
|
---|
| 258 |
|
---|
| 259 | originalData.Undo();
|
---|
[10804] | 260 | }
|
---|
[10783] | 261 |
|
---|
[10804] | 262 | public void InTransaction(Action action, DataPreprocessingChangedEventType type = DataPreprocessingChangedEventType.Any) {
|
---|
[10978] | 263 | if (IsFiltered)
|
---|
| 264 | throw new InvalidOperationException("Transaction not possible while data is filtered");
|
---|
| 265 | originalData.InTransaction(action, type);
|
---|
[10804] | 266 | }
|
---|
[10783] | 267 |
|
---|
[10804] | 268 | public void BeginTransaction(DataPreprocessingChangedEventType type) {
|
---|
[10978] | 269 | if (IsFiltered)
|
---|
| 270 | throw new InvalidOperationException("Transaction not possible while data is filtered");
|
---|
| 271 | originalData.BeginTransaction(type);
|
---|
[10804] | 272 | }
|
---|
| 273 |
|
---|
| 274 | public void EndTransaction() {
|
---|
| 275 | originalData.EndTransaction();
|
---|
| 276 | }
|
---|
[15518] | 277 | #endregion
|
---|
[10804] | 278 |
|
---|
[15518] | 279 | #region Statistics
|
---|
| 280 | public T GetMin<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
|
---|
| 281 | return ActiveData.GetMin<T>(columnIndex, considerSelection, emptyValue);
|
---|
[10999] | 282 | }
|
---|
[15518] | 283 | public T GetMax<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
|
---|
| 284 | return ActiveData.GetMax<T>(columnIndex, considerSelection, emptyValue);
|
---|
| 285 | }
|
---|
| 286 | public T GetMean<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
|
---|
| 287 | return ActiveData.GetMean<T>(columnIndex, considerSelection, emptyValue);
|
---|
| 288 | }
|
---|
| 289 | public T GetMedian<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> {
|
---|
| 290 | return ActiveData.GetMean<T>(columnIndex, considerSelection, emptyValue);
|
---|
| 291 | }
|
---|
| 292 | public T GetMode<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IEquatable<T> {
|
---|
| 293 | return ActiveData.GetMode<T>(columnIndex, considerSelection, emptyValue);
|
---|
| 294 | }
|
---|
| 295 | public T GetStandardDeviation<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
|
---|
| 296 | return ActiveData.GetStandardDeviation<T>(columnIndex, considerSelection, emptyValue);
|
---|
| 297 | }
|
---|
| 298 | public T GetVariance<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
|
---|
| 299 | return ActiveData.GetVariance<T>(columnIndex, considerSelection, emptyValue);
|
---|
| 300 | }
|
---|
| 301 | public T GetQuantile<T>(double alpha, int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> {
|
---|
| 302 | return ActiveData.GetQuantile<T>(alpha, columnIndex, considerSelection, emptyValue);
|
---|
| 303 | }
|
---|
| 304 | public int GetDistinctValues<T>(int columnIndex, bool considerSelection = false) {
|
---|
| 305 | return ActiveData.GetDistinctValues<T>(columnIndex, considerSelection);
|
---|
| 306 | }
|
---|
[10900] | 307 |
|
---|
[15518] | 308 | public int GetMissingValueCount() {
|
---|
| 309 | return ActiveData.GetMissingValueCount();
|
---|
[10804] | 310 | }
|
---|
[15518] | 311 | public int GetMissingValueCount(int columnIndex) {
|
---|
| 312 | return ActiveData.GetMissingValueCount(columnIndex);
|
---|
| 313 | }
|
---|
| 314 | public int GetRowMissingValueCount(int rowIndex) {
|
---|
| 315 | return ActiveData.GetRowMissingValueCount(rowIndex);
|
---|
| 316 | }
|
---|
| 317 | #endregion
|
---|
[10804] | 318 |
|
---|
[15518] | 319 | #region Filters
|
---|
| 320 | public void SetFilter(bool[] remainingRows) {
|
---|
| 321 | filteredData = (IPreprocessingData)originalData.Clone();
|
---|
| 322 | filteredData.InTransaction(() => {
|
---|
| 323 | var remainingIndices = Enumerable.Range(0, remainingRows.Length).Where(x => remainingRows[x]);
|
---|
| 324 |
|
---|
| 325 | foreach (var v in filteredData.VariableNames) {
|
---|
| 326 | var ci = filteredData.GetColumnIndex(v);
|
---|
| 327 | if (filteredData.VariableHasType<double>(ci)) {
|
---|
| 328 | var values = filteredData.GetValues<double>(ci);
|
---|
| 329 | var filteredValues = remainingIndices.Select(x => values[x]).ToList();
|
---|
| 330 | filteredData.SetValues(ci, filteredValues);
|
---|
| 331 | } else if (filteredData.VariableHasType<DateTime>(ci)) {
|
---|
| 332 | var values = filteredData.GetValues<DateTime>(ci);
|
---|
| 333 | var filteredValues = remainingIndices.Select(x => values[x]).ToList();
|
---|
| 334 | filteredData.SetValues(ci, filteredValues);
|
---|
| 335 | } else if (filteredData.VariableHasType<string>(ci)) {
|
---|
| 336 | var values = filteredData.GetValues<string>(ci);
|
---|
| 337 | var filteredValues = remainingIndices.Select(x => values[x]).ToList();
|
---|
| 338 | filteredData.SetValues(ci, filteredValues);
|
---|
| 339 | }
|
---|
| 340 | }
|
---|
| 341 | });
|
---|
| 342 | OnFilterChanged();
|
---|
[10847] | 343 | }
|
---|
[10804] | 344 |
|
---|
[15518] | 345 | public void PersistFilter() {
|
---|
| 346 | originalData.InTransaction(() => {
|
---|
| 347 | for (int i = 0; i < filteredData.Columns; ++i) {
|
---|
| 348 | if (filteredData.VariableHasType<double>(i)) {
|
---|
| 349 | originalData.SetValues<double>(i, filteredData.GetValues<double>(i));
|
---|
| 350 | } else if (filteredData.VariableHasType<string>(i)) {
|
---|
| 351 | originalData.SetValues<string>(i, filteredData.GetValues<string>(i));
|
---|
| 352 | } else if (filteredData.VariableHasType<DateTime>(i)) {
|
---|
| 353 | originalData.SetValues<DateTime>(i, filteredData.GetValues<DateTime>(i));
|
---|
| 354 | } else {
|
---|
| 355 | throw new ArgumentException("Data types of columns do not match");
|
---|
| 356 | }
|
---|
| 357 | }
|
---|
| 358 | });
|
---|
| 359 | ResetFilter();
|
---|
[11003] | 360 | }
|
---|
| 361 |
|
---|
[15518] | 362 | public void ResetFilter() {
|
---|
| 363 | filteredData = null;
|
---|
| 364 | OnFilterChanged();
|
---|
| 365 | }
|
---|
| 366 |
|
---|
| 367 | public bool IsFiltered {
|
---|
| 368 | get { return filteredData != null; }
|
---|
| 369 | }
|
---|
| 370 |
|
---|
[10999] | 371 | public event EventHandler FilterChanged;
|
---|
[15518] | 372 |
|
---|
| 373 | private void OnFilterChanged() {
|
---|
| 374 | if (FilterChanged != null) {
|
---|
| 375 | FilterChanged(this, new EventArgs());
|
---|
| 376 | }
|
---|
| 377 | }
|
---|
[11003] | 378 | #endregion
|
---|
[10783] | 379 | }
|
---|
| 380 | }
|
---|