Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
03/12/14 17:03:45 (11 years ago)
Author:
tsteinre
Message:
  • divided/refactored PreprocessingData into TransactionalPreprocessingData and preprocessingData
File:
1 copied

Legend:

Unmodified
Added
Removed
  • branches/DataPreprocessing/HeuristicLab.DataPreprocessing/3.3/Implementations/TransactionalPreprocessingData.cs

    r10585 r10586  
    2323using System.Collections;
    2424using System.Collections.Generic;
    25 using System.Linq;
    2625using HeuristicLab.Common;
    2726using HeuristicLab.Core;
    28 using HeuristicLab.Data;
    2927using HeuristicLab.Problems.DataAnalysis;
    3028
     
    4644
    4745  [Item("PreprocessingData", "Represents data used for preprocessing.")]
    48   public class PreprocessingData : NamedItem, IPreprocessingData {
     46  public class TransactionalPreprocessingData : PreprocessingData, ITransactionalPreprocessingData {
    4947
    5048    private const int MAX_UNDO_DEPTH = 5;
    5149
    52     private IDictionary<int, IList> variableValues;
    53 
    54     private IList<string> variableNames;
    55 
    56     private double trainingToTestRatio;
    57 
    5850    private IList<PDSnapshot> undoHistory;
    5951
    60     //TODO: refactor extract Transaction logic in a own class
    6152    private int transactionDepth = 0;
    6253
    63     private PreprocessingData(PreprocessingData original, Cloner cloner)
     54    private TransactionalPreprocessingData(TransactionalPreprocessingData original, Cloner cloner)
    6455      : base(original, cloner) {
    65       variableValues = CopyVariableValues(original.variableValues);
    66       variableNames = new List<string>(original.variableNames);
    67       trainingToTestRatio = original.trainingToTestRatio;
    6856      undoHistory = new List<PDSnapshot>();
    6957    }
    7058
    71     public PreprocessingData(IDataAnalysisProblemData problemData)
    72       : base() {
    73       Name = "-";
    74 
    75       variableNames = new List<string>(problemData.Dataset.VariableNames);
    76       // create dictionary from variable name to index
    77 
    78       int columnIndex = 0;
    79       variableValues = new Dictionary<int, IList>();
    80       foreach (var variableName in problemData.Dataset.VariableNames) {
    81         if (problemData.Dataset.IsType<double>(variableName)) {
    82           variableValues[columnIndex] = problemData.Dataset.GetDoubleValues(variableName).ToList();
    83         } else if (problemData.Dataset.IsType<string>(variableName)) {
    84           variableValues[columnIndex] = CreateColumn<string>(problemData.Dataset, columnIndex, x => x);
    85         } else if (problemData.Dataset.IsType<DateTime>(variableName)) {
    86           variableValues[columnIndex] = CreateColumn<DateTime>(problemData.Dataset, columnIndex, x => DateTime.Parse(x));
    87         } else {
    88           throw new ArgumentException("The datatype of column " + variableName + " must be of type List<double>, List<string> or List<DateTime>");
    89         }
    90         ++columnIndex;
    91       }
    92 
    93       trainingToTestRatio = (double)problemData.TrainingPartition.Size / Math.Max(problemData.Dataset.Rows, double.Epsilon);
     59    public TransactionalPreprocessingData(IDataAnalysisProblemData problemData)
     60      : base(problemData) {
    9461      undoHistory = new List<PDSnapshot>();
    95     }
    96 
    97     private static IList CreateColumn<T>(Dataset ds, int column, Func<string, T> selector) {
    98       var list = new List<T>(ds.Rows);
    99       for (int row = 0; row < ds.Rows; ++row) {
    100         list.Add(selector(ds.GetValue(row, column)));
    101       }
    102       return list;
    103     }
    104 
    105     private IDictionary<int, IList> CopyVariableValues(IDictionary<int, IList> original) {
    106       var copy = new Dictionary<int, IList>(variableValues);
    107       for (int i = 0; i < original.Count; i++) {
    108         variableValues[i] = (IList)Activator.CreateInstance(original[i].GetType(), original[i]);
    109       }
    110       return copy;
    11162    }
    11263
     
    13182
    13283    public override IDeepCloneable Clone(Cloner cloner) {
    133       return new PreprocessingData(this, cloner);
     84      return new TransactionalPreprocessingData(this, cloner);
    13485    }
    13586
    13687    #endregion
    13788
    138     #region IPreprocessingData Members
     89    #region Overridden IPreprocessingData Members
    13990
    140     public T GetCell<T>(int columnIndex, int rowIndex) {
    141       return (T)variableValues[columnIndex][rowIndex];
    142     }
    143 
    144 
    145     public void SetCell<T>(int columnIndex, int rowIndex, T value) {
     91    public override void SetCell<T>(int columnIndex, int rowIndex, T value) {
    14692      SaveSnapshot(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex);
    147       variableValues[columnIndex][rowIndex] = value;
     93      base.SetCell<T>(columnIndex, rowIndex, value);
    14894      if (transactionDepth <= 0)
    14995        OnChanged(DataPreprocessingChangedEventType.ChangeItem, columnIndex, rowIndex);
    15096    }
    15197
    152 
    153     public string GetCellAsString(int columnIndex, int rowIndex) {
    154       return variableValues[columnIndex][rowIndex].ToString();
    155     }
    156 
    157 
    158     [Obsolete("use the index based variant, is faster")]
    159     public IList<T> GetValues<T>(string variableName) {
    160       return GetValues<T>(GetColumnIndex(variableName));
    161     }
    162 
    163     public IList<T> GetValues<T>(int columnIndex) {
    164       return (IList<T>)variableValues[columnIndex];
    165     }
    166 
    167     public void SetValues<T>(int columnIndex, IList<T> values) {
    168       if (IsType<T>(columnIndex)) {
    169         SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
    170         variableValues[columnIndex] = (IList)values;
    171       } else {
    172         throw new ArgumentException("The datatype of column " + columnIndex + " must be of type " + variableValues[columnIndex].GetType().Name + " but was " + typeof(T).Name);
    173       }
     98    public override void SetValues<T>(int columnIndex, IList<T> values) {
     99      SaveSnapshot(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
     100      base.SetValues<T>(columnIndex, values);
    174101      if (transactionDepth <= 0)
    175102        OnChanged(DataPreprocessingChangedEventType.ChangeColumn, columnIndex, -1);
    176103    }
    177104
    178     public void InsertRow(int rowIndex) {
     105    public override void InsertRow(int rowIndex) {
    179106      SaveSnapshot(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
    180       foreach (IList column in variableValues.Values) {
    181         Type type = column.GetType().GetGenericArguments()[0];
    182         column.Insert(rowIndex, type.IsValueType ? Activator.CreateInstance(type) : null);
    183       }
     107      base.InsertRow(rowIndex);
    184108      if (transactionDepth <= 0)
    185109        OnChanged(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
    186110    }
    187111
    188     public void DeleteRow(int rowIndex) {
     112    public override void DeleteRow(int rowIndex) {
    189113      SaveSnapshot(DataPreprocessingChangedEventType.AddRow, -1, rowIndex);
    190       foreach (IList column in variableValues.Values) {
    191         column.RemoveAt(rowIndex);
    192       }
     114      base.DeleteRow(rowIndex);
    193115      if (transactionDepth <= 0)
    194116        OnChanged(DataPreprocessingChangedEventType.DeleteRow, -1, rowIndex);
    195117    }
    196118
    197     public void InsertColumn<T>(string variableName, int columnIndex) {
     119    public override void InsertColumn<T>(string variableName, int columnIndex) {
    198120      SaveSnapshot(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
    199       variableValues.Add(columnIndex, new List<T>(Rows));
    200       variableNames.Insert(columnIndex, variableName);
     121      base.InsertColumn<T>(variableName, columnIndex);
    201122      if (transactionDepth <= 0)
    202123        OnChanged(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
    203124    }
    204125
    205     public void DeleteColumn(int columnIndex) {
     126    public override void DeleteColumn(int columnIndex) {
    206127      SaveSnapshot(DataPreprocessingChangedEventType.AddColumn, columnIndex, -1);
    207       variableValues.Remove(columnIndex);
    208       variableNames.RemoveAt(columnIndex);
     128      base.DeleteColumn(columnIndex);
    209129      if (transactionDepth <= 0)
    210130        OnChanged(DataPreprocessingChangedEventType.DeleteColumn, columnIndex, -1);
    211131    }
    212132
    213     public IntRange TrainingPartition {
    214       get { return new IntRange(0, (int)(Rows * trainingToTestRatio)); }
    215     }
     133    #endregion
    216134
    217     public IntRange TestPartition {
    218       get { return new IntRange((int)(Rows * trainingToTestRatio), Rows); }
    219     }
    220 
    221     public string GetVariableName(int columnIndex) {
    222       return variableNames[columnIndex];
    223     }
    224 
    225     public IEnumerable<string> VariableNames {
    226       get { return variableNames; }
    227     }
    228 
    229     public int GetColumnIndex(string variableName) {
    230       return variableNames.IndexOf(variableName);
    231     }
    232 
    233     public bool IsType<T>(int columnIndex) {
    234       return variableValues[columnIndex] is List<T>;
    235     }
    236 
    237     public int Columns {
    238       get { return variableNames.Count; }
    239     }
    240 
    241     public int Rows {
    242       get { return variableValues.Count > 0 ? variableValues[0].Count : 0; }
    243     }
    244 
    245     public Dataset ExportToDataset() {
    246       IList<IList> values = new List<IList>();
    247 
    248       for (int i = 0; i < Columns; ++i) {
    249         values.Add(variableValues[i]);
    250       }
    251 
    252       var dataset = new Dataset(variableNames, values);
    253       return dataset;
    254     }
     135    #region TransactionalPreprocessingData members
    255136
    256137    public event DataPreprocessingChangedEventHandler Changed;
Note: See TracChangeset for help on using the changeset viewer.