[6133] | 1 | using System;
|
---|
| 2 | using System.Collections.Generic;
|
---|
| 3 | using System.Linq;
|
---|
| 4 | using System.Text;
|
---|
| 5 | using System.Windows.Forms;
|
---|
| 6 | using HeuristicLab.DataImporter.Data;
|
---|
| 7 | using HeuristicLab.DataImporter.Data.CommandBase;
|
---|
| 8 | using HeuristicLab.DataImporter.Data.Model;
|
---|
| 9 | using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
|
---|
| 10 |
|
---|
| 11 | namespace HeuristicLab.DataImporter.Command {
|
---|
| 12 | [StorableClass]
|
---|
| 13 | [ViewableCommandInfo("Fuzzy-link ColumnGroups", 2, ColumnGroupState.Sorted, "Time Series", Position = 4)]
|
---|
| 14 | public class FuzzyLinkColumnGroupsCommand : DataSetCommandWithAffectedColumnGroupsBase {
|
---|
| 15 | private ColumnGroup newColumnGroup;
|
---|
| 16 | private ColumnGroup oldColumnGroup;
|
---|
| 17 | private int removePos;
|
---|
| 18 |
|
---|
| 19 | private FuzzyLinkColumnGroupsCommand()
|
---|
| 20 | : base(null, null) {
|
---|
| 21 | }
|
---|
| 22 |
|
---|
| 23 | public FuzzyLinkColumnGroupsCommand(DataSet dataSet, List<string> affectedColumnGroups)
|
---|
| 24 | : base(dataSet, affectedColumnGroups) {
|
---|
| 25 | }
|
---|
| 26 |
|
---|
| 27 | //IMPORTANT this method has not been tested and therefore it is not guaranteed that it works as expected
|
---|
| 28 | public override void Execute() {
|
---|
| 29 | if (AffectedColumnGroupNames.Count != 2)
|
---|
| 30 | throw new CommandExecutionException("This command only works if excatly two column groups are activated.", this);
|
---|
| 31 | ColumnGroup leftColumnGroup = this.DataSet.GetColumnGroup(AffectedColumnGroupNames[0]);
|
---|
| 32 | ColumnGroup rightColumnGroup = this.DataSet.GetColumnGroup(AffectedColumnGroupNames[1]);
|
---|
| 33 | int leftIndex = this.DataSet.IndexOfColumnGroup(leftColumnGroup);
|
---|
| 34 | int rightIndex = this.DataSet.IndexOfColumnGroup(rightColumnGroup);
|
---|
| 35 |
|
---|
| 36 | this.oldColumnGroup = rightColumnGroup;
|
---|
| 37 | this.removePos = rightIndex;
|
---|
| 38 |
|
---|
| 39 | if (leftColumnGroup.SortedColumnsCount != 2 || rightColumnGroup.SortedColumnsCount != 2)
|
---|
| 40 | throw new CommandExecutionException("Both ColumnGroups must be sorted by exactly two columns.", this);
|
---|
| 41 |
|
---|
| 42 | if (leftColumnGroup.SortOrdersForColumns.ElementAt(leftColumnGroup.SortedColumnIndexes.ElementAt(0)) !=
|
---|
| 43 | rightColumnGroup.SortOrdersForColumns.ElementAt(rightColumnGroup.SortedColumnIndexes.ElementAt(0)) ||
|
---|
| 44 | leftColumnGroup.SortOrdersForColumns.ElementAt(leftColumnGroup.SortedColumnIndexes.ElementAt(1)) !=
|
---|
| 45 | rightColumnGroup.SortOrdersForColumns.ElementAt(rightColumnGroup.SortedColumnIndexes.ElementAt(1)) ||
|
---|
| 46 | leftColumnGroup.SortOrdersForColumns.ElementAt(leftColumnGroup.SortedColumnIndexes.ElementAt(0)) != SortOrder.Ascending ||
|
---|
| 47 | leftColumnGroup.SortOrdersForColumns.ElementAt(leftColumnGroup.SortedColumnIndexes.ElementAt(1)) != SortOrder.Ascending
|
---|
| 48 | )
|
---|
| 49 | throw new CommandExecutionException("Both ColumnGroups must be sorted in the same direction and ascending.", this);
|
---|
| 50 |
|
---|
| 51 | foreach (int sortColIndex in leftColumnGroup.SortedColumnIndexes) {
|
---|
| 52 | if (leftColumnGroup.Columns.ElementAt(sortColIndex).ContainsNullValues)
|
---|
| 53 | throw new CommandExecutionException("KeyColumns must not contain null values.", this);
|
---|
| 54 | }
|
---|
| 55 | foreach (int sortColIndex in rightColumnGroup.SortedColumnIndexes) {
|
---|
| 56 | if (rightColumnGroup.Columns.ElementAt(sortColIndex).ContainsNullValues)
|
---|
| 57 | throw new CommandExecutionException("KeyColumns must not contain null values.", this);
|
---|
| 58 | }
|
---|
| 59 | FuzzyLinkColumnGroups(leftColumnGroup, rightColumnGroup);
|
---|
| 60 |
|
---|
| 61 | DataSet.ReplaceColumnGroup(removePos, newColumnGroup);
|
---|
| 62 | DataSet.FireChanged();
|
---|
| 63 | }
|
---|
| 64 |
|
---|
| 65 | public override void UndoExecute() {
|
---|
| 66 | DataSet.ReplaceColumnGroup(removePos, oldColumnGroup);
|
---|
| 67 | DataSet.FireChanged();
|
---|
| 68 | }
|
---|
| 69 |
|
---|
| 70 |
|
---|
| 71 | public void FuzzyLinkColumnGroups(ColumnGroup left, ColumnGroup right) {
|
---|
| 72 | ColumnBase keyColumn1 = left.Columns.ElementAt(left.SortedColumnIndexes.ElementAt(0));
|
---|
| 73 | ColumnBase keyColumn2 = right.Columns.ElementAt(right.SortedColumnIndexes.ElementAt(0));
|
---|
| 74 | ColumnBase fuzzyKeyColumn1 = left.Columns.ElementAt(left.SortedColumnIndexes.ElementAt(1));
|
---|
| 75 | ColumnBase fuzzyKeyColumn2 = right.Columns.ElementAt(right.SortedColumnIndexes.ElementAt(1));
|
---|
| 76 |
|
---|
| 77 | this.newColumnGroup = new ColumnGroup(right.Name);
|
---|
| 78 | foreach (ColumnBase col in right.Columns)
|
---|
| 79 | newColumnGroup.AddColumn(col.CreateCopyOfColumnWithoutValues());
|
---|
| 80 |
|
---|
| 81 | // FOR NOW WE ASSERT THAT SORT ORDER IS ASCENDING
|
---|
| 82 | //int keyCompareDirection = this.oldColumnGroup1.SortOrdersForColumns.ElementAt(oldColumnGroup1.SortedColumnIndices.ElementAt(0))
|
---|
| 83 | // == SortOrder.Ascending ? -1 : +1;
|
---|
| 84 | //int fuzzyKeyCompareDirection = this.oldColumnGroup1.SortOrdersForColumns.ElementAt(oldColumnGroup1.SortedColumnIndices.ElementAt(1))
|
---|
| 85 | // == SortOrder.Ascending ? -1 : +1;
|
---|
| 86 |
|
---|
| 87 | IComparable[] row2;
|
---|
| 88 | int[] firstMatchingColumn = new int[left.RowCount];
|
---|
| 89 | int j = 0;
|
---|
| 90 | // for all keys on left side fuzzy link to right side
|
---|
| 91 | // first pass find indices of last rows on the right side with matching keys
|
---|
| 92 | for (int i = 0; i < left.RowCount; i++) {
|
---|
| 93 | // skip on the right side while the right side key is smaller than the left side key
|
---|
| 94 | while (j < right.RowCount && keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(j)) > 0) {
|
---|
| 95 | j++;
|
---|
| 96 | }
|
---|
| 97 | // store the first j with the same key
|
---|
| 98 | if (j < right.RowCount && keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(j)) == 0) {
|
---|
| 99 | firstMatchingColumn[i] = j;
|
---|
| 100 | } else {
|
---|
| 101 | firstMatchingColumn[i] = -1;
|
---|
| 102 | }
|
---|
| 103 | }
|
---|
| 104 | // second pass to find the best fuzzy link (=same or next smaller value)
|
---|
| 105 | j = firstMatchingColumn[0];
|
---|
| 106 | for (int i = 0; i < left.RowCount; i++) {
|
---|
| 107 | if (keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(j)) != 0) j = firstMatchingColumn[i];
|
---|
| 108 | if (j < 0) {
|
---|
| 109 | // no matching key => add an empty row
|
---|
| 110 | newColumnGroup.AddRow(right.GetEmptyRow());
|
---|
| 111 | } else {
|
---|
| 112 | int bestMatch = j;
|
---|
| 113 | // go forward while the fuzzy key on the right is smaller than or equal to the fuzzy key on the left
|
---|
| 114 | while (j < right.RowCount &&
|
---|
| 115 | keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(j)) == 0 &&
|
---|
| 116 | fuzzyKeyColumn1.GetValue(i).CompareTo(fuzzyKeyColumn2.GetValue(j)) >= 0) {
|
---|
| 117 | bestMatch = j;
|
---|
| 118 | j++;
|
---|
| 119 | }
|
---|
| 120 | // invariant: j = right.RowCount OR best fuzzy-link = fuzzyKeyColumn2(bestMatch)
|
---|
| 121 | // if we found a match take that row and otherwise use the empty row
|
---|
| 122 | if (keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(bestMatch)) == 0 &&
|
---|
| 123 | fuzzyKeyColumn1.GetValue(i).CompareTo(fuzzyKeyColumn2.GetValue(bestMatch)) >= 0) {
|
---|
| 124 | row2 = right.GetRow(bestMatch);
|
---|
| 125 | j = bestMatch;
|
---|
| 126 | } else {
|
---|
| 127 | row2 = right.GetEmptyRow();
|
---|
| 128 | }
|
---|
| 129 | newColumnGroup.AddRow(row2);
|
---|
| 130 | }
|
---|
| 131 | }
|
---|
| 132 | }
|
---|
| 133 |
|
---|
| 134 | public override string Description {
|
---|
| 135 | get { return "Fuzzy-link column groups"; }
|
---|
| 136 | }
|
---|
| 137 | }
|
---|
| 138 | }
|
---|