using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Windows.Forms; using HeuristicLab.DataImporter.Data; using HeuristicLab.DataImporter.Data.CommandBase; using HeuristicLab.DataImporter.Data.Model; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; namespace HeuristicLab.DataImporter.Command { [StorableClass] [ViewableCommandInfo("Fuzzy-link ColumnGroups", 2, ColumnGroupState.Sorted, "Time Series", Position = 4)] public class FuzzyLinkColumnGroupsCommand : DataSetCommandWithAffectedColumnGroupsBase { private ColumnGroup newColumnGroup; private ColumnGroup oldColumnGroup; private int removePos; private FuzzyLinkColumnGroupsCommand() : base(null, null) { } public FuzzyLinkColumnGroupsCommand(DataSet dataSet, List affectedColumnGroups) : base(dataSet, affectedColumnGroups) { } //IMPORTANT this method has not been tested and therefore it is not guaranteed that it works as expected public override void Execute() { if (AffectedColumnGroupNames.Count != 2) throw new CommandExecutionException("This command only works if excatly two column groups are activated.", this); ColumnGroup leftColumnGroup = this.DataSet.GetColumnGroup(AffectedColumnGroupNames[0]); ColumnGroup rightColumnGroup = this.DataSet.GetColumnGroup(AffectedColumnGroupNames[1]); int leftIndex = this.DataSet.IndexOfColumnGroup(leftColumnGroup); int rightIndex = this.DataSet.IndexOfColumnGroup(rightColumnGroup); this.oldColumnGroup = rightColumnGroup; this.removePos = rightIndex; if (leftColumnGroup.SortedColumnsCount != 2 || rightColumnGroup.SortedColumnsCount != 2) throw new CommandExecutionException("Both ColumnGroups must be sorted by exactly two columns.", this); if (leftColumnGroup.SortOrdersForColumns.ElementAt(leftColumnGroup.SortedColumnIndexes.ElementAt(0)) != rightColumnGroup.SortOrdersForColumns.ElementAt(rightColumnGroup.SortedColumnIndexes.ElementAt(0)) || leftColumnGroup.SortOrdersForColumns.ElementAt(leftColumnGroup.SortedColumnIndexes.ElementAt(1)) != rightColumnGroup.SortOrdersForColumns.ElementAt(rightColumnGroup.SortedColumnIndexes.ElementAt(1)) || leftColumnGroup.SortOrdersForColumns.ElementAt(leftColumnGroup.SortedColumnIndexes.ElementAt(0)) != SortOrder.Ascending || leftColumnGroup.SortOrdersForColumns.ElementAt(leftColumnGroup.SortedColumnIndexes.ElementAt(1)) != SortOrder.Ascending ) throw new CommandExecutionException("Both ColumnGroups must be sorted in the same direction and ascending.", this); foreach (int sortColIndex in leftColumnGroup.SortedColumnIndexes) { if (leftColumnGroup.Columns.ElementAt(sortColIndex).ContainsNullValues) throw new CommandExecutionException("KeyColumns must not contain null values.", this); } foreach (int sortColIndex in rightColumnGroup.SortedColumnIndexes) { if (rightColumnGroup.Columns.ElementAt(sortColIndex).ContainsNullValues) throw new CommandExecutionException("KeyColumns must not contain null values.", this); } FuzzyLinkColumnGroups(leftColumnGroup, rightColumnGroup); DataSet.ReplaceColumnGroup(removePos, newColumnGroup); DataSet.FireChanged(); } public override void UndoExecute() { DataSet.ReplaceColumnGroup(removePos, oldColumnGroup); DataSet.FireChanged(); } public void FuzzyLinkColumnGroups(ColumnGroup left, ColumnGroup right) { ColumnBase keyColumn1 = left.Columns.ElementAt(left.SortedColumnIndexes.ElementAt(0)); ColumnBase keyColumn2 = right.Columns.ElementAt(right.SortedColumnIndexes.ElementAt(0)); ColumnBase fuzzyKeyColumn1 = left.Columns.ElementAt(left.SortedColumnIndexes.ElementAt(1)); ColumnBase fuzzyKeyColumn2 = right.Columns.ElementAt(right.SortedColumnIndexes.ElementAt(1)); this.newColumnGroup = new ColumnGroup(right.Name); foreach (ColumnBase col in right.Columns) newColumnGroup.AddColumn(col.CreateCopyOfColumnWithoutValues()); // FOR NOW WE ASSERT THAT SORT ORDER IS ASCENDING //int keyCompareDirection = this.oldColumnGroup1.SortOrdersForColumns.ElementAt(oldColumnGroup1.SortedColumnIndices.ElementAt(0)) // == SortOrder.Ascending ? -1 : +1; //int fuzzyKeyCompareDirection = this.oldColumnGroup1.SortOrdersForColumns.ElementAt(oldColumnGroup1.SortedColumnIndices.ElementAt(1)) // == SortOrder.Ascending ? -1 : +1; IComparable[] row2; int[] firstMatchingColumn = new int[left.RowCount]; int j = 0; // for all keys on left side fuzzy link to right side // first pass find indices of last rows on the right side with matching keys for (int i = 0; i < left.RowCount; i++) { // skip on the right side while the right side key is smaller than the left side key while (j < right.RowCount && keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(j)) > 0) { j++; } // store the first j with the same key if (j < right.RowCount && keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(j)) == 0) { firstMatchingColumn[i] = j; } else { firstMatchingColumn[i] = -1; } } // second pass to find the best fuzzy link (=same or next smaller value) j = firstMatchingColumn[0]; for (int i = 0; i < left.RowCount; i++) { if (keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(j)) != 0) j = firstMatchingColumn[i]; if (j < 0) { // no matching key => add an empty row newColumnGroup.AddRow(right.GetEmptyRow()); } else { int bestMatch = j; // go forward while the fuzzy key on the right is smaller than or equal to the fuzzy key on the left while (j < right.RowCount && keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(j)) == 0 && fuzzyKeyColumn1.GetValue(i).CompareTo(fuzzyKeyColumn2.GetValue(j)) >= 0) { bestMatch = j; j++; } // invariant: j = right.RowCount OR best fuzzy-link = fuzzyKeyColumn2(bestMatch) // if we found a match take that row and otherwise use the empty row if (keyColumn1.GetValue(i).CompareTo(keyColumn2.GetValue(bestMatch)) == 0 && fuzzyKeyColumn1.GetValue(i).CompareTo(fuzzyKeyColumn2.GetValue(bestMatch)) >= 0) { row2 = right.GetRow(bestMatch); j = bestMatch; } else { row2 = right.GetEmptyRow(); } newColumnGroup.AddRow(row2); } } } public override string Description { get { return "Fuzzy-link column groups"; } } } }