Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
11/20/18 13:52:40 (5 years ago)
Author:
pfleck
Message:

#2845 reverted the last merge (r16307) because some revisions were missing

Location:
branches/2845_EnhancedProgress
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/2845_EnhancedProgress

  • branches/2845_EnhancedProgress/HeuristicLab.DataPreprocessing

  • branches/2845_EnhancedProgress/HeuristicLab.DataPreprocessing/3.4

  • branches/2845_EnhancedProgress/HeuristicLab.DataPreprocessing/3.4/Data/PreprocessingData.cs

    r16307 r16308  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) 2002-2016 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2323using System.Collections;
    2424using System.Collections.Generic;
    25 using System.Globalization;
    2625using System.Linq;
    2726using HeuristicLab.Common;
    2827using HeuristicLab.Core;
    2928using HeuristicLab.Data;
    30 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3129using HeuristicLab.Problems.DataAnalysis;
    3230
     
    3432
    3533  [Item("PreprocessingData", "Represents data used for preprocessing.")]
    36   [StorableClass]
    37   public class PreprocessingData : NamedItem, IPreprocessingData {
    38 
    39     [Storable]
     34  public abstract class PreprocessingData : NamedItem, IPreprocessingData {
     35    public IntRange TrainingPartition { get; set; }
     36    public IntRange TestPartition { get; set; }
     37
     38    public IList<ITransformation> Transformations { get; protected set; }
     39
    4040    protected IList<IList> variableValues;
    41     [Storable]
    4241    protected IList<string> variableNames;
    4342
    44     #region Constructor, Cloning & Persistence
    45     public PreprocessingData(IDataAnalysisProblemData problemData)
    46       : base() {
    47       Name = "Preprocessing Data";
    48 
    49       Transformations = new List<ITransformation>();
    50       selection = new Dictionary<int, IList<int>>();
    51 
    52       Import(problemData);
    53 
    54       RegisterEventHandler();
     43    public IEnumerable<string> VariableNames {
     44      get { return variableNames; }
     45    }
     46
     47    public IEnumerable<string> GetDoubleVariableNames() {
     48      var doubleVariableNames = new List<string>();
     49      for (int i = 0; i < Columns; ++i) {
     50        if (VariableHasType<double>(i)) {
     51          doubleVariableNames.Add(variableNames[i]);
     52        }
     53      }
     54      return doubleVariableNames;
     55    }
     56
     57    public IList<string> InputVariables { get; private set; }
     58    public string TargetVariable { get; private set; } // optional
     59
     60    public int Columns {
     61      get { return variableNames.Count; }
     62    }
     63
     64    public int Rows {
     65      get { return variableValues.Count > 0 ? variableValues[0].Count : 0; }
     66    }
     67
     68    protected IDictionary<int, IList<int>> selection;
     69    public IDictionary<int, IList<int>> Selection {
     70      get { return selection; }
     71      set {
     72        selection = value;
     73        OnSelectionChanged();
     74      }
    5575    }
    5676
     
    6888      RegisterEventHandler();
    6989    }
    70     public override IDeepCloneable Clone(Cloner cloner) {
    71       return new PreprocessingData(this, cloner);
    72     }
    73 
    74     [StorableConstructor]
    75     protected PreprocessingData(bool deserializing)
    76       : base(deserializing) { }
    77     [StorableHook(HookType.AfterDeserialization)]
    78     private void AfterDeserialization() {
     90
     91    protected PreprocessingData(IDataAnalysisProblemData problemData)
     92      : base() {
     93      Name = "Preprocessing Data";
     94
     95      Transformations = new List<ITransformation>();
     96      selection = new Dictionary<int, IList<int>>();
     97
     98      Import(problemData);
     99
    79100      RegisterEventHandler();
    80101    }
    81102
    82     private void RegisterEventHandler() {
    83       Changed += (s, e) => {
    84         switch (e.Type) {
    85           case DataPreprocessingChangedEventType.DeleteRow:
    86           case DataPreprocessingChangedEventType.Any:
    87           case DataPreprocessingChangedEventType.Transformation:
    88             int maxRowIndex = Math.Max(0, Rows);
    89             TrainingPartition.Start = Math.Min(TrainingPartition.Start, maxRowIndex);
    90             TrainingPartition.End = Math.Min(TrainingPartition.End, maxRowIndex);
    91             TestPartition.Start = Math.Min(TestPartition.Start, maxRowIndex);
    92             TestPartition.End = Math.Min(TestPartition.End, maxRowIndex);
    93             break;
    94         }
    95       };
    96     }
    97     #endregion
    98 
    99     #region Cells
    100     public bool IsCellEmpty(int columnIndex, int rowIndex) {
    101       var value = variableValues[columnIndex][rowIndex];
    102       return IsMissingValue(value);
    103     }
    104 
    105     public T GetCell<T>(int columnIndex, int rowIndex) {
    106       return (T)variableValues[columnIndex][rowIndex];
    107     }
    108 
    109     public void SetCell<T>(int columnIndex, int rowIndex, T value) {
    110       SaveSnapshot(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex);
    111 
    112       for (int i = Rows; i <= rowIndex; i++)
    113         InsertRow(i);
    114       for (int i = Columns; i <= columnIndex; i++)
    115         InsertColumn<T>(i.ToString(), i);
    116 
    117       variableValues[columnIndex][rowIndex] = value;
    118       if (!IsInTransaction)
    119         OnChanged(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex);
    120     }
    121 
    122     public string GetCellAsString(int columnIndex, int rowIndex) {
    123       return variableValues[columnIndex][rowIndex].ToString();
    124     }
    125 
    126     public IList<T> GetValues<T>(int columnIndex, bool considerSelection) {
    127       if (considerSelection) {
    128         var list = new List<T>();
    129         foreach (var rowIdx in selection[columnIndex]) {
    130           list.Add((T)variableValues[columnIndex][rowIdx]);
    131         }
    132         return list;
    133       } else {
    134         return (IList<T>)variableValues[columnIndex];
    135       }
    136     }
    137 
    138     public void SetValues<T>(int columnIndex, IList<T> values) {
    139       SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
    140       if (VariableHasType<T>(columnIndex)) {
    141         variableValues[columnIndex] = (IList)values;
    142       } else {
    143         throw new ArgumentException("The datatype of column " + columnIndex + " must be of type " + variableValues[columnIndex].GetType().Name + " but was " + typeof(T).Name);
    144       }
    145       if (!IsInTransaction)
    146         OnChanged(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
    147     }
    148 
    149     public bool SetValue(string value, int columnIndex, int rowIndex) {
    150       bool valid = false;
    151       if (VariableHasType<double>(columnIndex)) {
    152         double val;
    153         if (string.IsNullOrWhiteSpace(value)) {
    154           val = double.NaN;
    155           valid = true;
    156         } else {
    157           valid = double.TryParse(value, out val);
    158         }
    159         if (valid)
    160           SetCell(columnIndex, rowIndex, val);
    161       } else if (VariableHasType<string>(columnIndex)) {
    162         valid = value != null;
    163         if (valid)
    164           SetCell(columnIndex, rowIndex, value);
    165       } else if (VariableHasType<DateTime>(columnIndex)) {
    166         DateTime date;
    167         valid = DateTime.TryParse(value, out date);
    168         if (valid)
    169           SetCell(columnIndex, rowIndex, date);
    170       } else {
    171         throw new ArgumentException("column " + columnIndex + " contains a non supported type.");
    172       }
    173 
    174       if (!IsInTransaction)
    175         OnChanged(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
    176 
    177       return valid;
    178     }
    179 
    180     public int Columns {
    181       get { return variableNames.Count; }
    182     }
    183 
    184     public int Rows {
    185       get { return variableValues.Count > 0 ? variableValues[0].Count : 0; }
    186     }
    187 
    188     public static bool IsMissingValue(object value) {
    189       if (value is double) return double.IsNaN((double)value);
    190       if (value is string) return string.IsNullOrEmpty((string)value);
    191       if (value is DateTime) return ((DateTime)value).Equals(DateTime.MinValue);
    192       throw new ArgumentException();
    193     }
    194     #endregion
    195 
    196     #region Rows
    197     public void InsertRow(int rowIndex) {
    198       SaveSnapshot(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
    199       foreach (IList column in variableValues) {
    200         Type type = column.GetType().GetGenericArguments()[0];
    201         column.Insert(rowIndex, type.IsValueType ? Activator.CreateInstance(type) : null);
    202       }
    203       if (TrainingPartition.Start <= rowIndex && rowIndex <= TrainingPartition.End) {
    204         TrainingPartition.End++;
    205         if (TrainingPartition.End <= TestPartition.Start) {
    206           TestPartition.Start++;
    207           TestPartition.End++;
    208         }
    209       } else if (TestPartition.Start <= rowIndex && rowIndex <= TestPartition.End) {
    210         TestPartition.End++;
    211         if (TestPartition.End <= TrainingPartition.Start) {
    212           TestPartition.Start++;
    213           TestPartition.End++;
    214         }
    215       }
    216       if (!IsInTransaction)
    217         OnChanged(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
    218     }
    219     public void DeleteRow(int rowIndex) {
    220       SaveSnapshot(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
    221       foreach (IList column in variableValues) {
    222         column.RemoveAt(rowIndex);
    223       }
    224       if (TrainingPartition.Start <= rowIndex && rowIndex <= TrainingPartition.End) {
    225         TrainingPartition.End--;
    226         if (TrainingPartition.End <= TestPartition.Start) {
    227           TestPartition.Start--;
    228           TestPartition.End--;
    229         }
    230       } else if (TestPartition.Start <= rowIndex && rowIndex <= TestPartition.End) {
    231         TestPartition.End--;
    232         if (TestPartition.End <= TrainingPartition.Start) {
    233           TestPartition.Start--;
    234           TestPartition.End--;
    235         }
    236       }
    237       if (!IsInTransaction)
    238         OnChanged(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
    239     }
    240     public void DeleteRowsWithIndices(IEnumerable<int> rows) {
    241       SaveSnapshot(DataPreprocessingChangedEventType.AddRow, -1, -1);
    242       foreach (int rowIndex in rows.OrderByDescending(x => x)) {
    243         foreach (IList column in variableValues) {
    244           column.RemoveAt(rowIndex);
    245         }
    246         if (TrainingPartition.Start <= rowIndex && rowIndex <= TrainingPartition.End) {
    247           TrainingPartition.End--;
    248           if (TrainingPartition.End <= TestPartition.Start) {
    249             TestPartition.Start--;
    250             TestPartition.End--;
    251           }
    252         } else if (TestPartition.Start <= rowIndex && rowIndex <= TestPartition.End) {
    253           TestPartition.End--;
    254           if (TestPartition.End <= TrainingPartition.Start) {
    255             TestPartition.Start--;
    256             TestPartition.End--;
    257           }
    258         }
    259       }
    260       if (!IsInTransaction)
    261         OnChanged(DataPreprocessingChangedEventType.DeleteRow, -1, -1);
    262     }
    263 
    264     public void InsertColumn<T>(string variableName, int columnIndex) {
    265       SaveSnapshot(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
    266       variableValues.Insert(columnIndex, new List<T>(Enumerable.Repeat(default(T), Rows)));
    267       variableNames.Insert(columnIndex, variableName);
    268       if (!IsInTransaction)
    269         OnChanged(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
    270     }
    271 
    272     public void DeleteColumn(int columnIndex) {
    273       SaveSnapshot(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
    274       variableValues.RemoveAt(columnIndex);
    275       variableNames.RemoveAt(columnIndex);
    276       if (!IsInTransaction)
    277         OnChanged(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
    278     }
    279 
    280     public void RenameColumn(int columnIndex, string name) {
    281       SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
    282       if (columnIndex < 0 || columnIndex > variableNames.Count)
    283         throw new ArgumentOutOfRangeException("columnIndex");
    284       variableNames[columnIndex] = name;
    285 
    286       if (!IsInTransaction)
    287         OnChanged(DataPreprocessingChangedEventType.ChangeColumn, -1, -1);
    288     }
    289 
    290     public void RenameColumns(IList<string> names) {
    291       if (names == null) throw new ArgumentNullException("names");
    292       if (names.Count != variableNames.Count) throw new ArgumentException("number of names must match the number of columns.", "names");
    293 
    294       SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, -1, -1);
    295       for (int i = 0; i < names.Count; i++)
    296         variableNames[i] = names[i];
    297 
    298       if (!IsInTransaction)
    299         OnChanged(DataPreprocessingChangedEventType.ChangeColumn, -1, -1);
    300     }
    301 
    302     public bool AreAllStringColumns(IEnumerable<int> columnIndices) {
    303       return columnIndices.All(x => VariableHasType<string>(x));
    304     }
    305     #endregion
    306 
    307     #region Variables
    308     public IEnumerable<string> VariableNames {
    309       get { return variableNames; }
    310     }
    311 
    312     public IEnumerable<string> GetDoubleVariableNames() {
    313       var doubleVariableNames = new List<string>();
    314       for (int i = 0; i < Columns; ++i) {
    315         if (VariableHasType<double>(i)) {
    316           doubleVariableNames.Add(variableNames[i]);
    317         }
    318       }
    319       return doubleVariableNames;
    320     }
    321 
    322     public string GetVariableName(int columnIndex) {
    323       return variableNames[columnIndex];
    324     }
    325 
    326     public int GetColumnIndex(string variableName) {
    327       return variableNames.IndexOf(variableName);
    328     }
    329 
    330     public bool VariableHasType<T>(int columnIndex) {
    331       return columnIndex >= variableValues.Count || variableValues[columnIndex] is List<T>;
    332     }
    333 
    334     public Type GetVariableType(int columnIndex) {
    335       var listType = variableValues[columnIndex].GetType();
    336       return listType.GenericTypeArguments.Single();
    337     }
    338 
    339     public IList<string> InputVariables { get; private set; }
    340     public string TargetVariable { get; private set; } // optional
    341     #endregion
    342 
    343     #region Partitions
    344     [Storable]
    345     public IntRange TrainingPartition { get; set; }
    346     [Storable]
    347     public IntRange TestPartition { get; set; }
    348     #endregion
    349 
    350     #region Transformations
    351     [Storable]
    352     public IList<ITransformation> Transformations { get; protected set; }
    353     #endregion
    354 
    355     #region Validation
    356     public bool Validate(string value, out string errorMessage, int columnIndex) {
    357       if (columnIndex < 0 || columnIndex > VariableNames.Count()) {
    358         throw new ArgumentOutOfRangeException("column index is out of range");
    359       }
    360 
    361       bool valid = false;
    362       errorMessage = string.Empty;
    363       if (VariableHasType<double>(columnIndex)) {
    364         if (string.IsNullOrWhiteSpace(value)) {
    365           valid = true;
    366         } else {
    367           double val;
    368           valid = double.TryParse(value, out val);
    369           if (!valid) {
    370             errorMessage = "Invalid Value (Valid Value Format: \"" + FormatPatterns.GetDoubleFormatPattern() + "\")";
    371           }
    372         }
    373       } else if (VariableHasType<string>(columnIndex)) {
    374         valid = value != null;
    375         if (!valid) {
    376           errorMessage = "Invalid Value (string must not be null)";
    377         }
    378       } else if (VariableHasType<DateTime>(columnIndex)) {
    379         DateTime date;
    380         valid = DateTime.TryParse(value, out date);
    381         if (!valid) {
    382           errorMessage = "Invalid Value (Valid Value Format: \"" + CultureInfo.CurrentCulture.DateTimeFormat + "\"";
    383         }
    384       } else {
    385         throw new ArgumentException("column " + columnIndex + " contains a non supported type.");
    386       }
    387 
    388       return valid;
    389     }
    390     #endregion
    391 
    392     #region Import & Export
    393103    public void Import(IDataAnalysisProblemData problemData) {
    394104      Dataset dataset = (Dataset)problemData.Dataset;
     
    397107      TargetVariable = (problemData is IRegressionProblemData) ? ((IRegressionProblemData)problemData).TargetVariable
    398108        : (problemData is IClassificationProblemData) ? ((IClassificationProblemData)problemData).TargetVariable
    399           : null;
     109        : null;
    400110
    401111      int columnIndex = 0;
     
    418128    }
    419129
    420     public Dataset ExportToDataset() {
    421       IList<IList> values = new List<IList>();
    422 
    423       for (int i = 0; i < Columns; ++i) {
    424         values.Add(variableValues[i]);
    425       }
    426 
    427       var dataset = new Dataset(variableNames, values);
    428       return dataset;
    429     }
    430     #endregion
    431 
    432     #region Selection
    433     [Storable]
    434     protected IDictionary<int, IList<int>> selection;
    435     public IDictionary<int, IList<int>> Selection {
    436       get { return selection; }
    437       set {
    438         selection = value;
    439         OnSelectionChanged();
    440       }
    441     }
    442     public void ClearSelection() {
    443       Selection = new Dictionary<int, IList<int>>();
    444     }
    445 
    446     public event EventHandler SelectionChanged;
    447     protected void OnSelectionChanged() {
    448       var listeners = SelectionChanged;
    449       if (listeners != null) listeners(this, EventArgs.Empty);
    450     }
    451     #endregion
    452 
    453     #region Transactions
    454     // Stapshot/History are nost storable/cloneable on purpose
    455     private class Snapshot {
    456       public IList<IList> VariableValues { get; set; }
    457       public IList<string> VariableNames { get; set; }
    458 
    459       public IntRange TrainingPartition { get; set; }
    460       public IntRange TestPartition { get; set; }
    461       public IList<ITransformation> Transformations { get; set; }
    462       public DataPreprocessingChangedEventType ChangedType { get; set; }
    463 
    464       public int ChangedColumn { get; set; }
    465       public int ChangedRow { get; set; }
    466     }
     130    private void RegisterEventHandler() {
     131      Changed += (s, e) => {
     132        switch (e.Type) {
     133          case DataPreprocessingChangedEventType.DeleteRow:
     134            CheckPartitionRanges();
     135            break;
     136          case DataPreprocessingChangedEventType.Any:
     137            CheckPartitionRanges();
     138            break;
     139          case DataPreprocessingChangedEventType.Transformation:
     140            CheckPartitionRanges();
     141            break;
     142        }
     143      };
     144    }
     145
     146    private void CheckPartitionRanges() {
     147      int maxRowIndex = Math.Max(0, Rows);
     148      TrainingPartition.Start = Math.Min(TrainingPartition.Start, maxRowIndex);
     149      TrainingPartition.End = Math.Min(TrainingPartition.End, maxRowIndex);
     150      TestPartition.Start = Math.Min(TestPartition.Start, maxRowIndex);
     151      TestPartition.End = Math.Min(TestPartition.End, maxRowIndex);
     152    }
     153
     154    protected IList<IList> CopyVariableValues(IList<IList> original) {
     155      var copy = new List<IList>(original);
     156      for (int i = 0; i < original.Count; ++i) {
     157        copy[i] = (IList)Activator.CreateInstance(original[i].GetType(), original[i]);
     158      }
     159      return copy;
     160    }
     161
     162
     163    #region IPreprocessingData Members
     164    public abstract T GetCell<T>(int columnIndex, int rowIndex);
     165
     166    public abstract void SetCell<T>(int columnIndex, int rowIndex, T value);
     167
     168    public abstract string GetCellAsString(int columnIndex, int rowIndex);
     169
     170    public abstract string GetVariableName(int columnIndex);
     171
     172    public abstract int GetColumnIndex(string variableName);
     173
     174    public abstract bool VariableHasType<T>(int columnIndex);
     175
     176    [Obsolete("use the index based variant, is faster")]
     177    public abstract IList<T> GetValues<T>(string variableName, bool considerSelection);
     178
     179    public abstract IList<T> GetValues<T>(int columnIndex, bool considerSelection);
     180
     181    public abstract void SetValues<T>(int columnIndex, IList<T> values);
     182
     183    public abstract bool SetValue(string value, int columnIndex, int rowIndex);
     184
     185    public abstract bool Validate(string value, out string errorMessage, int columnIndex);
     186
     187    public abstract bool AreAllStringColumns(IEnumerable<int> columnIndices);
     188
     189    public abstract void DeleteRowsWithIndices(IEnumerable<int> rows);
     190
     191    public abstract void InsertRow(int rowIndex);
     192
     193    public abstract void DeleteRow(int rowIndex);
     194
     195    public abstract void InsertColumn<T>(string variableName, int columnIndex);
     196
     197    public abstract void DeleteColumn(int columnIndex);
     198
     199    public abstract void RenameColumn(int columnIndex, string name);
     200    public abstract void RenameColumns(IList<string> list);
     201
     202    public abstract Dataset ExportToDataset();
     203
     204    public abstract void ClearSelection();
     205
     206    public abstract event EventHandler SelectionChanged;
     207    protected abstract void OnSelectionChanged();
    467208
    468209    public event DataPreprocessingChangedEventHandler Changed;
     
    471212      if (listeners != null) listeners(this, new DataPreprocessingChangedEventArgs(type, column, row));
    472213    }
    473 
    474     private const int MAX_UNDO_DEPTH = 5;
    475 
    476     private readonly IList<Snapshot> undoHistory = new List<Snapshot>();
    477     private readonly Stack<DataPreprocessingChangedEventType> eventStack = new Stack<DataPreprocessingChangedEventType>();
    478 
    479     public bool IsInTransaction { get { return eventStack.Count > 0; } }
    480 
    481     private void SaveSnapshot(DataPreprocessingChangedEventType changedType, int column, int row) {
    482       if (IsInTransaction) return;
    483 
    484       var currentSnapshot = new Snapshot {
    485         VariableValues = CopyVariableValues(variableValues),
    486         VariableNames = new List<string>(variableNames),
    487         TrainingPartition = new IntRange(TrainingPartition.Start, TrainingPartition.End),
    488         TestPartition = new IntRange(TestPartition.Start, TestPartition.End),
    489         Transformations = new List<ITransformation>(Transformations),
    490         ChangedType = changedType,
    491         ChangedColumn = column,
    492         ChangedRow = row
    493       };
    494 
    495       if (undoHistory.Count >= MAX_UNDO_DEPTH)
    496         undoHistory.RemoveAt(0);
    497 
    498       undoHistory.Add(currentSnapshot);
    499     }
    500 
    501     public bool IsUndoAvailable {
    502       get { return undoHistory.Count > 0; }
    503     }
    504 
    505     public void Undo() {
    506       if (IsUndoAvailable) {
    507         Snapshot previousSnapshot = undoHistory[undoHistory.Count - 1];
    508         variableValues = previousSnapshot.VariableValues;
    509         variableNames = previousSnapshot.VariableNames;
    510         TrainingPartition = previousSnapshot.TrainingPartition;
    511         TestPartition = previousSnapshot.TestPartition;
    512         Transformations = previousSnapshot.Transformations;
    513         undoHistory.Remove(previousSnapshot);
    514         OnChanged(previousSnapshot.ChangedType,
    515           previousSnapshot.ChangedColumn,
    516           previousSnapshot.ChangedRow);
    517       }
    518     }
    519 
    520     public void InTransaction(Action action, DataPreprocessingChangedEventType type = DataPreprocessingChangedEventType.Any) {
    521       BeginTransaction(type);
    522       action();
    523       EndTransaction();
    524     }
    525 
    526     public void BeginTransaction(DataPreprocessingChangedEventType type) {
    527       SaveSnapshot(type, -1, -1);
    528       eventStack.Push(type);
    529     }
    530 
    531     public void EndTransaction() {
    532       if (eventStack.Count == 0)
    533         throw new InvalidOperationException("There is no open transaction that can be ended.");
    534 
    535       var @event = eventStack.Pop();
    536       OnChanged(@event, -1, -1);
    537     }
    538     #endregion
    539 
    540     #region Statistics
    541     public T GetMin<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
    542       var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
    543       return values.Any() ? values.Min() : emptyValue;
    544     }
    545 
    546     public T GetMax<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
    547       var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
    548       return values.Any() ? values.Max() : emptyValue;
    549     }
    550 
    551     public T GetMean<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
    552       if (typeof(T) == typeof(double)) {
    553         var values = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
    554         return values.Any() ? Convert<T>(values.Average()) : emptyValue;
    555       }
    556       if (typeof(T) == typeof(string)) {
    557         return Convert<T>(string.Empty);
    558       }
    559       if (typeof(T) == typeof(DateTime)) {
    560         var values = GetValuesWithoutMissingValues<DateTime>(columnIndex, considerSelection);
    561         return values.Any() ? Convert<T>(AggregateAsDouble(values, Enumerable.Average)) : emptyValue;
    562       }
    563 
    564       throw new InvalidOperationException(typeof(T) + " not supported");
    565     }
    566 
    567     public T GetMedian<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> {
    568       if (typeof(T) == typeof(double)) {// IEnumerable<double> is faster 
    569         var doubleValues = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
    570         return doubleValues.Any() ? Convert<T>(doubleValues.Median()) : emptyValue;
    571       }
    572       var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
    573       return values.Any() ? values.Quantile(0.5) : emptyValue;
    574     }
    575 
    576     public T GetMode<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IEquatable<T> {
    577       var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
    578       return values.Any() ? values.GroupBy(x => x).OrderByDescending(g => g.Count()).Select(g => g.Key).First() : emptyValue;
    579     }
    580 
    581     public T GetStandardDeviation<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
    582       if (typeof(T) == typeof(double)) {
    583         var values = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
    584         return values.Any() ? Convert<T>(values.StandardDeviation()) : emptyValue;
    585       }
    586       // For DateTime, std.dev / variance would have to be TimeSpan
    587       //if (typeof(T) == typeof(DateTime)) {
    588       //  var values = GetValuesWithoutMissingValues<DateTime>(columnIndex, considerSelection);
    589       //  return values.Any() ? Convert<T>(AggregateAsDouble(values, EnumerableStatisticExtensions.StandardDeviation)) : emptyValue;
    590       //}
    591       return default(T);
    592     }
    593 
    594     public T GetVariance<T>(int columnIndex, bool considerSelection = false, T emptyValue = default(T)) {
    595       if (typeof(T) == typeof(double)) {
    596         var values = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
    597         return values.Any() ? Convert<T>(values.Variance()) : emptyValue;
    598       }
    599       // DateTime variance often overflows long, thus the corresponding DateTime is invalid
    600       //if (typeof(T) == typeof(DateTime)) {
    601       //  var values = GetValuesWithoutMissingValues<DateTime>(columnIndex, considerSelection);
    602       //  return values.Any() ? Convert<T>(AggregateAsDouble(values, EnumerableStatisticExtensions.Variance)) : emptyValue;
    603       //}
    604       return default(T);
    605     }
    606 
    607     public T GetQuantile<T>(double alpha, int columnIndex, bool considerSelection = false, T emptyValue = default(T)) where T : IComparable<T> {
    608       if (typeof(T) == typeof(double)) {// IEnumerable<double> is faster 
    609         var doubleValues = GetValuesWithoutMissingValues<double>(columnIndex, considerSelection);
    610         return doubleValues.Any() ? Convert<T>(doubleValues.Quantile(alpha)) : emptyValue;
    611       }
    612       var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
    613       return values.Any() ? values.Quantile(alpha) : emptyValue;
    614     }
    615 
    616     public int GetDistinctValues<T>(int columnIndex, bool considerSelection = false) {
    617       var values = GetValuesWithoutMissingValues<T>(columnIndex, considerSelection);
    618       return values.GroupBy(x => x).Count();
    619     }
    620 
    621     private IEnumerable<T> GetValuesWithoutMissingValues<T>(int columnIndex, bool considerSelection) {
    622       return GetValues<T>(columnIndex, considerSelection).Where(x => !IsMissingValue(x));
    623     }
    624 
    625     private static DateTime AggregateAsDouble(IEnumerable<DateTime> values, Func<IEnumerable<double>, double> func) {
    626       return new DateTime((long)(func(values.Select(x => (double)x.Ticks / TimeSpan.TicksPerSecond)) * TimeSpan.TicksPerSecond));
    627     }
    628     private static T Convert<T>(object obj) { return (T)obj; }
    629 
    630     public int GetMissingValueCount() {
    631       int count = 0;
    632       for (int i = 0; i < Columns; ++i) {
    633         count += GetMissingValueCount(i);
    634       }
    635       return count;
    636     }
    637     public int GetMissingValueCount(int columnIndex) {
    638       int sum = 0;
    639       for (int i = 0; i < Rows; i++) {
    640         if (IsCellEmpty(columnIndex, i))
    641           sum++;
    642       }
    643       return sum;
    644     }
    645     public int GetRowMissingValueCount(int rowIndex) {
    646       int sum = 0;
    647       for (int i = 0; i < Columns; i++) {
    648         if (IsCellEmpty(i, rowIndex))
    649           sum++;
    650       }
    651       return sum;
    652     }
    653     #endregion
    654 
    655     #region Helpers
    656     private static IList<IList> CopyVariableValues(IList<IList> original) {
    657       var copy = new List<IList>(original);
    658       for (int i = 0; i < original.Count; ++i) {
    659         copy[i] = (IList)Activator.CreateInstance(original[i].GetType(), original[i]);
    660       }
    661       return copy;
    662     }
    663214    #endregion
    664215  }
    665 
    666   // Adapted from HeuristicLab.Common.EnumerableStatisticExtensions
    667   internal static class EnumerableExtensions {
    668     public static T Quantile<T>(this IEnumerable<T> values, double alpha) where T : IComparable<T> {
    669       T[] valuesArr = values.ToArray();
    670       int n = valuesArr.Length;
    671       if (n == 0) throw new InvalidOperationException("Enumeration contains no elements.");
    672 
    673       var pos = n * alpha;
    674 
    675       return Select((int)Math.Ceiling(pos) - 1, valuesArr);
    676 
    677     }
    678 
    679     private static T Select<T>(int k, T[] arr) where T : IComparable<T> {
    680       int i, ir, j, l, mid, n = arr.Length;
    681       T a;
    682       l = 0;
    683       ir = n - 1;
    684       for (;;) {
    685         if (ir <= l + 1) {
    686           // Active partition contains 1 or 2 elements.
    687           if (ir == l + 1 && arr[ir].CompareTo(arr[l]) < 0) {
    688             // Case of 2 elements.
    689             Swap(arr, l, ir);
    690           }
    691           return arr[k];
    692         } else {
    693           mid = (l + ir) >> 1; // Choose median of left, center, and right elements
    694           Swap(arr, mid, l + 1); // as partitioning element a. Also
    695 
    696           if (arr[l].CompareTo(arr[ir]) > 0) {  // rearrange so that arr[l] arr[ir] <= arr[l+1],
    697             Swap(arr, l, ir); // . arr[ir] >= arr[l+1]
    698           }
    699 
    700           if (arr[l + 1].CompareTo(arr[ir]) > 0) {
    701             Swap(arr, l + 1, ir);
    702           }
    703           if (arr[l].CompareTo(arr[l + 1]) > 0) {
    704             Swap(arr, l, l + 1);
    705           }
    706           i = l + 1; // Initialize pointers for partitioning.
    707           j = ir;
    708           a = arr[l + 1]; // Partitioning element.
    709           for (;;) { // Beginning of innermost loop.
    710             do i++; while (arr[i].CompareTo(a) < 0); // Scan up to find element > a.
    711             do j--; while (arr[j].CompareTo(a) > 0); // Scan down to find element < a.
    712             if (j < i) break; // Pointers crossed. Partitioning complete.
    713             Swap(arr, i, j);
    714           } // End of innermost loop.
    715           arr[l + 1] = arr[j]; // Insert partitioning element.
    716           arr[j] = a;
    717           if (j >= k) ir = j - 1; // Keep active the partition that contains the
    718           if (j <= k) l = i; // kth element.
    719         }
    720       }
    721     }
    722 
    723     private static void Swap<T>(T[] arr, int i, int j) {
    724       T temp = arr[i];
    725       arr[i] = arr[j];
    726       arr[j] = temp;
    727     }
    728   }
    729216}
Note: See TracChangeset for help on using the changeset viewer.