1 | using System;
|
---|
2 | using System.Collections.Generic;
|
---|
3 | using System.Linq;
|
---|
4 | using System.Text;
|
---|
5 | using System.Windows.Forms;
|
---|
6 | using HeuristicLab.DataImporter.Data;
|
---|
7 | using HeuristicLab.DataImporter.Data.CommandBase;
|
---|
8 | using HeuristicLab.DataImporter.Data.Model;
|
---|
9 | using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
|
---|
10 |
|
---|
11 | namespace HeuristicLab.DataImporter.Command {
|
---|
12 | [StorableClass]
|
---|
13 | [ViewableCommandInfo("Fuzzy-link ColumnGroups", 2, ColumnGroupState.Sorted, "Time Series", Position = 4)]
|
---|
14 | public class FuzzyLinkColumnGroupsCommand : DataSetCommandWithAffectedColumnGroupsBase {
|
---|
15 | private ColumnGroup newColumnGroup;
|
---|
16 | private ColumnGroup oldColumnGroup;
|
---|
17 | private int removePos;
|
---|
18 |
|
---|
19 | private FuzzyLinkColumnGroupsCommand()
|
---|
20 | : base(null, null) {
|
---|
21 | }
|
---|
22 |
|
---|
23 | public FuzzyLinkColumnGroupsCommand(DataSet dataSet, List<string> affectedColumnGroups)
|
---|
24 | : base(dataSet, affectedColumnGroups) {
|
---|
25 | }
|
---|
26 |
|
---|
27 | //IMPORTANT this method has not been tested and therefore it is not guaranteed that it works as expected
|
---|
28 | public override void Execute() {
|
---|
29 | if (AffectedColumnGroupNames.Count != 2)
|
---|
30 | throw new CommandExecutionException("This command only works if excatly two column groups are activated.", this);
|
---|
31 | ColumnGroup leftColumnGroup = this.DataSet.GetColumnGroup(AffectedColumnGroupNames[0]);
|
---|
32 | ColumnGroup rightColumnGroup = this.DataSet.GetColumnGroup(AffectedColumnGroupNames[1]);
|
---|
33 | int leftIndex = this.DataSet.IndexOfColumnGroup(leftColumnGroup);
|
---|
34 | int rightIndex = this.DataSet.IndexOfColumnGroup(rightColumnGroup);
|
---|
35 |
|
---|
36 | this.oldColumnGroup = rightColumnGroup;
|
---|
37 | this.removePos = rightIndex;
|
---|
38 |
|
---|
39 | if (leftColumnGroup.SortedColumnsCount != 2 || rightColumnGroup.SortedColumnsCount != 2)
|
---|
40 | throw new CommandExecutionException("Both ColumnGroups must be sorted by exactly two columns.", this);
|
---|
41 |
|
---|
42 | if (leftColumnGroup.SortOrdersForColumns.ElementAt(leftColumnGroup.SortedColumnIndexes.ElementAt(0)) !=
|
---|
43 | rightColumnGroup.SortOrdersForColumns.ElementAt(rightColumnGroup.SortedColumnIndexes.ElementAt(0)) ||
|
---|
44 | leftColumnGroup.SortOrdersForColumns.ElementAt(leftColumnGroup.SortedColumnIndexes.ElementAt(1)) !=
|
---|
45 | rightColumnGroup.SortOrdersForColumns.ElementAt(rightColumnGroup.SortedColumnIndexes.ElementAt(1)) ||
|
---|
46 | leftColumnGroup.SortOrdersForColumns.ElementAt(leftColumnGroup.SortedColumnIndexes.ElementAt(0)) != SortOrder.Ascending ||
|
---|
47 | leftColumnGroup.SortOrdersForColumns.ElementAt(leftColumnGroup.SortedColumnIndexes.ElementAt(1)) != SortOrder.Ascending
|
---|
48 | )
|
---|
49 | throw new CommandExecutionException("Both ColumnGroups must be sorted in the same direction and ascending.", this);
|
---|
50 |
|
---|
51 | foreach (int sortColIndex in leftColumnGroup.SortedColumnIndexes) {
|
---|
52 | if (leftColumnGroup.Columns.ElementAt(sortColIndex).ContainsNullValues)
|
---|
53 | throw new CommandExecutionException("KeyColumns must not contain null values.", this);
|
---|
54 | }
|
---|
55 | foreach (int sortColIndex in rightColumnGroup.SortedColumnIndexes) {
|
---|
56 | if (rightColumnGroup.Columns.ElementAt(sortColIndex).ContainsNullValues)
|
---|
57 | throw new CommandExecutionException("KeyColumns must not contain null values.", this);
|
---|
58 | }
|
---|
59 | FuzzyLinkColumnGroups(leftColumnGroup, rightColumnGroup);
|
---|
60 |
|
---|
61 | DataSet.ReplaceColumnGroup(removePos, newColumnGroup);
|
---|
62 | DataSet.FireChanged();
|
---|
63 | }
|
---|
64 |
|
---|
65 | public override void UndoExecute() {
|
---|
66 | DataSet.ReplaceColumnGroup(removePos, oldColumnGroup);
|
---|
67 | DataSet.FireChanged();
|
---|
68 | }
|
---|
69 |
|
---|
70 |
|
---|
71 | public void FuzzyLinkColumnGroups(ColumnGroup left, ColumnGroup right) {
|
---|
72 | ColumnBase keyColumn1 = left.Columns.ElementAt(left.SortedColumnIndexes.ElementAt(0));
|
---|
73 | ColumnBase keyColumn2 = right.Columns.ElementAt(right.SortedColumnIndexes.ElementAt(0));
|
---|
74 | ColumnBase fuzzyKeyColumn1 = left.Columns.ElementAt(left.SortedColumnIndexes.ElementAt(1));
|
---|
75 | ColumnBase fuzzyKeyColumn2 = right.Columns.ElementAt(right.SortedColumnIndexes.ElementAt(1));
|
---|
76 |
|
---|
77 | this.newColumnGroup = new ColumnGroup(right.Name);
|
---|
78 | foreach (ColumnBase col in right.Columns)
|
---|
79 | newColumnGroup.AddColumn(col.CreateCopyOfColumnWithoutValues());
|
---|
80 |
|
---|
81 | // FOR NOW WE ASSERT THAT SORT ORDER IS ASCENDING
|
---|
82 | //int keyCompareDirection = this.oldColumnGroup1.SortOrdersForColumns.ElementAt(oldColumnGroup1.SortedColumnIndices.ElementAt(0))
|
---|
83 | // == SortOrder.Ascending ? -1 : +1;
|
---|
84 | //int fuzzyKeyCompareDirection = this.oldColumnGroup1.SortOrdersForColumns.ElementAt(oldColumnGroup1.SortedColumnIndices.ElementAt(1))
|
---|
85 | // == SortOrder.Ascending ? -1 : +1;
|
---|
86 |
|
---|
87 | IComparable[] row2;
|
---|
88 | int[] firstMatchingColumn = new int[left.RowCount];
|
---|
89 | int j = 0;
|
---|
90 | // for all keys on left side fuzzy link to right side
|
---|
91 | // first pass find indices of last rows on the right side with matching keys
|
---|
92 | for (int i = 0; i < left.RowCount; i++) {
|
---|
93 | // skip on the right side while the right side key is smaller than the left side key
|
---|
94 | while (j < right.RowCount && keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(j)) > 0) {
|
---|
95 | j++;
|
---|
96 | }
|
---|
97 | // store the first j with the same key
|
---|
98 | if (j < right.RowCount && keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(j)) == 0) {
|
---|
99 | firstMatchingColumn[i] = j;
|
---|
100 | } else {
|
---|
101 | firstMatchingColumn[i] = -1;
|
---|
102 | }
|
---|
103 | }
|
---|
104 | // second pass to find the best fuzzy link (=same or next smaller value)
|
---|
105 | j = firstMatchingColumn[0];
|
---|
106 | for (int i = 0; i < left.RowCount; i++) {
|
---|
107 | if (keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(j)) != 0) j = firstMatchingColumn[i];
|
---|
108 | if (j < 0) {
|
---|
109 | // no matching key => add an empty row
|
---|
110 | newColumnGroup.AddRow(right.GetEmptyRow());
|
---|
111 | } else {
|
---|
112 | int bestMatch = j;
|
---|
113 | // go forward while the fuzzy key on the right is smaller than or equal to the fuzzy key on the left
|
---|
114 | while (j < right.RowCount &&
|
---|
115 | keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(j)) == 0 &&
|
---|
116 | fuzzyKeyColumn1.GetValue(i).CompareTo(fuzzyKeyColumn2.GetValue(j)) >= 0) {
|
---|
117 | bestMatch = j;
|
---|
118 | j++;
|
---|
119 | }
|
---|
120 | // invariant: j = right.RowCount OR best fuzzy-link = fuzzyKeyColumn2(bestMatch)
|
---|
121 | // if we found a match take that row and otherwise use the empty row
|
---|
122 | if (keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(bestMatch)) == 0 &&
|
---|
123 | fuzzyKeyColumn1.GetValue(i).CompareTo(fuzzyKeyColumn2.GetValue(bestMatch)) >= 0) {
|
---|
124 | row2 = right.GetRow(bestMatch);
|
---|
125 | j = bestMatch;
|
---|
126 | } else {
|
---|
127 | row2 = right.GetEmptyRow();
|
---|
128 | }
|
---|
129 | newColumnGroup.AddRow(row2);
|
---|
130 | }
|
---|
131 | }
|
---|
132 | }
|
---|
133 |
|
---|
134 | public override string Description {
|
---|
135 | get { return "Fuzzy-link column groups"; }
|
---|
136 | }
|
---|
137 | }
|
---|
138 | }
|
---|