Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.DataImporter/HeuristicLab.DataImporter.Command/TimeSeries/FuzzyLinkColumnGroupsCommand.cs @ 10256

Last change on this file since 10256 was 6133, checked in by gkronber, 14 years ago

#1471: imported generic parts of DataImporter from private code base

File size: 6.8 KB
Line 
1using System;
2using System.Collections.Generic;
3using System.Linq;
4using System.Text;
5using System.Windows.Forms;
6using HeuristicLab.DataImporter.Data;
7using HeuristicLab.DataImporter.Data.CommandBase;
8using HeuristicLab.DataImporter.Data.Model;
9using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
10
11namespace HeuristicLab.DataImporter.Command {
12  [StorableClass]
13  [ViewableCommandInfo("Fuzzy-link ColumnGroups", 2, ColumnGroupState.Sorted, "Time Series", Position = 4)]
14  public class FuzzyLinkColumnGroupsCommand : DataSetCommandWithAffectedColumnGroupsBase {
15    private ColumnGroup newColumnGroup;
16    private ColumnGroup oldColumnGroup;
17    private int removePos;
18
19    private FuzzyLinkColumnGroupsCommand()
20      : base(null, null) {
21    }
22
23    public FuzzyLinkColumnGroupsCommand(DataSet dataSet, List<string> affectedColumnGroups)
24      : base(dataSet, affectedColumnGroups) {
25    }
26
27    //IMPORTANT this method has not been tested and therefore it is not guaranteed that it works as expected
28    public override void Execute() {
29      if (AffectedColumnGroupNames.Count != 2)
30        throw new CommandExecutionException("This command only works if excatly two column groups are activated.", this);
31      ColumnGroup leftColumnGroup = this.DataSet.GetColumnGroup(AffectedColumnGroupNames[0]);
32      ColumnGroup rightColumnGroup = this.DataSet.GetColumnGroup(AffectedColumnGroupNames[1]);
33      int leftIndex = this.DataSet.IndexOfColumnGroup(leftColumnGroup);
34      int rightIndex = this.DataSet.IndexOfColumnGroup(rightColumnGroup);
35
36      this.oldColumnGroup = rightColumnGroup;
37      this.removePos = rightIndex;
38
39      if (leftColumnGroup.SortedColumnsCount != 2 || rightColumnGroup.SortedColumnsCount != 2)
40        throw new CommandExecutionException("Both ColumnGroups must be sorted by exactly two columns.", this);
41
42      if (leftColumnGroup.SortOrdersForColumns.ElementAt(leftColumnGroup.SortedColumnIndexes.ElementAt(0)) !=
43          rightColumnGroup.SortOrdersForColumns.ElementAt(rightColumnGroup.SortedColumnIndexes.ElementAt(0)) ||
44          leftColumnGroup.SortOrdersForColumns.ElementAt(leftColumnGroup.SortedColumnIndexes.ElementAt(1)) !=
45          rightColumnGroup.SortOrdersForColumns.ElementAt(rightColumnGroup.SortedColumnIndexes.ElementAt(1)) ||
46          leftColumnGroup.SortOrdersForColumns.ElementAt(leftColumnGroup.SortedColumnIndexes.ElementAt(0)) != SortOrder.Ascending ||
47          leftColumnGroup.SortOrdersForColumns.ElementAt(leftColumnGroup.SortedColumnIndexes.ElementAt(1)) != SortOrder.Ascending
48        )
49        throw new CommandExecutionException("Both ColumnGroups must be sorted in the same direction and ascending.", this);
50
51      foreach (int sortColIndex in leftColumnGroup.SortedColumnIndexes) {
52        if (leftColumnGroup.Columns.ElementAt(sortColIndex).ContainsNullValues)
53          throw new CommandExecutionException("KeyColumns must not contain null values.", this);
54      }
55      foreach (int sortColIndex in rightColumnGroup.SortedColumnIndexes) {
56        if (rightColumnGroup.Columns.ElementAt(sortColIndex).ContainsNullValues)
57          throw new CommandExecutionException("KeyColumns must not contain null values.", this);
58      }
59      FuzzyLinkColumnGroups(leftColumnGroup, rightColumnGroup);
60
61      DataSet.ReplaceColumnGroup(removePos, newColumnGroup);
62      DataSet.FireChanged();
63    }
64
65    public override void UndoExecute() {
66      DataSet.ReplaceColumnGroup(removePos, oldColumnGroup);
67      DataSet.FireChanged();
68    }
69
70
71    public void FuzzyLinkColumnGroups(ColumnGroup left, ColumnGroup right) {
72      ColumnBase keyColumn1 = left.Columns.ElementAt(left.SortedColumnIndexes.ElementAt(0));
73      ColumnBase keyColumn2 = right.Columns.ElementAt(right.SortedColumnIndexes.ElementAt(0));
74      ColumnBase fuzzyKeyColumn1 = left.Columns.ElementAt(left.SortedColumnIndexes.ElementAt(1));
75      ColumnBase fuzzyKeyColumn2 = right.Columns.ElementAt(right.SortedColumnIndexes.ElementAt(1));
76
77      this.newColumnGroup = new ColumnGroup(right.Name);
78      foreach (ColumnBase col in right.Columns)
79        newColumnGroup.AddColumn(col.CreateCopyOfColumnWithoutValues());
80
81      // FOR NOW WE ASSERT THAT SORT ORDER IS ASCENDING
82      //int keyCompareDirection = this.oldColumnGroup1.SortOrdersForColumns.ElementAt(oldColumnGroup1.SortedColumnIndices.ElementAt(0))
83      //  == SortOrder.Ascending ? -1 : +1;
84      //int fuzzyKeyCompareDirection = this.oldColumnGroup1.SortOrdersForColumns.ElementAt(oldColumnGroup1.SortedColumnIndices.ElementAt(1))
85      //  == SortOrder.Ascending ? -1 : +1;
86
87      IComparable[] row2;
88      int[] firstMatchingColumn = new int[left.RowCount];
89      int j = 0;
90      // for all keys on left side fuzzy link to right side
91      // first pass find indices of last rows on the right side with matching keys
92      for (int i = 0; i < left.RowCount; i++) {
93        // skip on the right side while the right side key is smaller than the left side key
94        while (j < right.RowCount && keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(j)) > 0) {
95          j++;
96        }
97        // store the first j with the same key
98        if (j < right.RowCount && keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(j)) == 0) {
99          firstMatchingColumn[i] = j;
100        } else {
101          firstMatchingColumn[i] = -1;
102        }
103      }
104      // second pass to find the best fuzzy link (=same or next smaller value)
105      j = firstMatchingColumn[0];
106      for (int i = 0; i < left.RowCount; i++) {
107        if (keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(j)) != 0) j = firstMatchingColumn[i];
108        if (j < 0) {
109          // no matching key => add an empty row
110          newColumnGroup.AddRow(right.GetEmptyRow());
111        } else {
112          int bestMatch = j;
113          // go forward while the fuzzy key on the right is smaller than or equal to the fuzzy key on the left
114          while (j < right.RowCount &&
115            keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(j)) == 0 &&
116            fuzzyKeyColumn1.GetValue(i).CompareTo(fuzzyKeyColumn2.GetValue(j)) >= 0) {
117            bestMatch = j;
118            j++;
119          }
120          // invariant: j = right.RowCount OR best fuzzy-link = fuzzyKeyColumn2(bestMatch)
121          // if we found a match take that row and otherwise use the empty row
122          if (keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(bestMatch)) == 0 &&
123            fuzzyKeyColumn1.GetValue(i).CompareTo(fuzzyKeyColumn2.GetValue(bestMatch)) >= 0) {
124            row2 = right.GetRow(bestMatch);
125            j = bestMatch;
126          } else {
127            row2 = right.GetEmptyRow();
128          }
129          newColumnGroup.AddRow(row2);
130        }
131      }
132    }
133
134    public override string Description {
135      get { return "Fuzzy-link column groups"; }
136    }
137  }
138}
Note: See TracBrowser for help on using the repository browser.