using System; using System.Collections.Generic; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Data; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; using HeuristicLab.Problems.DataAnalysis.Symbolic; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { [Item("SymbolicExpressionTreeNodeSimilarityComparer", "A comparison operator that checks node equality based on different similarity measures.")] [StorableClass] public class SymbolicExpressionTreeNodeSimilarityComparer : Item, ISymbolicExpressionTreeNodeSimilarityComparer { [StorableConstructor] private SymbolicExpressionTreeNodeSimilarityComparer(bool deserializing) : base(deserializing) { } private SymbolicExpressionTreeNodeSimilarityComparer(SymbolicExpressionTreeNodeSimilarityComparer original, Cloner cloner) : base(original, cloner) { matchConstantValues = original.matchConstantValues; matchVariableNames = original.matchVariableNames; matchVariableWeights = original.matchVariableWeights; } public override IDeepCloneable Clone(Cloner cloner) { return new SymbolicExpressionTreeNodeSimilarityComparer(this, cloner); } // more flexible matching criteria [Storable] private BoolValue matchConstantValues; public bool MatchConstantValues { get { return matchConstantValues.Value; } set { matchConstantValues.Value = value; } } [Storable] private BoolValue matchVariableNames; public bool MatchVariableNames { get { return matchVariableNames.Value; } set { matchVariableNames.Value = value; } } [Storable] private BoolValue matchVariableWeights; public bool MatchVariableWeights { get { return matchVariableWeights.Value; } set { matchVariableWeights.Value = value; } } [StorableHook(HookType.AfterDeserialization)] private void AfterDeserialization() { } public SymbolicExpressionTreeNodeSimilarityComparer() { matchConstantValues = new BoolValue(true); matchVariableNames = new BoolValue(true); matchVariableWeights = new BoolValue(true); } public int GetHashCode(ISymbolicExpressionTreeNode n) { return n.ToString().ToLower().GetHashCode(); } public bool Equals(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) { if (!(a is SymbolicExpressionTreeTerminalNode)) // if a and b are non terminal nodes, check equality of symbol names return !(b is SymbolicExpressionTreeTerminalNode) && a.Symbol.Name.Equals(b.Symbol.Name); var va = a as VariableTreeNode; if (va != null) { var vb = b as VariableTreeNode; if (vb == null) return false; return (!MatchVariableNames || va.VariableName.Equals(vb.VariableName)) && (!MatchVariableWeights || va.Weight.Equals(vb.Weight)); } var ca = a as ConstantTreeNode; if (ca != null) { var cb = b as ConstantTreeNode; if (cb == null) return false; return (!MatchConstantValues || ca.Value.Equals(cb.Value)); } return false; } } public class SymbolicExpressionTreeFragmentSimilarityComparer : IEqualityComparer { public SymbolicExpressionTreeNodeSimilarityComparer SimilarityComparer { get; set; } public bool Equals(IFragment x, IFragment y) { if (SimilarityComparer == null) throw new ArgumentNullException("SimilarityComparer needs to be initialized first."); return x.Length == y.Length && SymbolicExpressionTreeMatching.Match(x.Root, y.Root, SimilarityComparer) == x.Length; } public int GetHashCode(IFragment fragment) { return fragment.Root.Symbol.Name.ToLower().GetHashCode(); } } // this comparer considers that a < b if the type of a is "greater" than the type of b, for example: // - A function node is "greater" than a terminal node // - A variable terminal is "greater" than a constant terminal public class SymbolicExpressionTreeNodeComparer : ISymbolicExpressionTreeNodeComparer { public int Compare(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) { if (!(a is SymbolicExpressionTreeTerminalNode)) { if (b is SymbolicExpressionTreeTerminalNode) return -1; return string.Compare(a.Symbol.Name, b.Symbol.Name, StringComparison.Ordinal); } if (!(b is SymbolicExpressionTreeTerminalNode)) return 1; // at this point we know a and b are terminal nodes var va = a as VariableTreeNode; if (va != null) { if (b is ConstantTreeNode) return -1; var vb = (VariableTreeNode)b; return (va.VariableName.Equals(vb.VariableName) ? va.Weight.CompareTo(vb.Weight) : string.Compare(va.VariableName, vb.VariableName, StringComparison.Ordinal)); } // at this point we know for sure that a is a constant tree node if (b is VariableTreeNode) return 1; var ca = (ConstantTreeNode)a; var cb = (ConstantTreeNode)b; return ca.Value.CompareTo(cb.Value); } } // tree equality public class SymbolicExpressionTreeEqualityComparer : IEqualityComparer { public SymbolicExpressionTreeNodeSimilarityComparer SimilarityComparer { get; set; } public bool Equals(ISymbolicExpressionTree a, ISymbolicExpressionTree b) { if (SimilarityComparer == null) throw new ArgumentNullException("SimilarityComparer needs to be initialized first."); return SymbolicExpressionTreeMatching.Match(a.Root, b.Root, SimilarityComparer) == Math.Min(a.Length, b.Length); } public int GetHashCode(ISymbolicExpressionTree tree) { return (tree.Length << 8) % 12345; } } public class SymbolicExpressionTreeCanonicalSorter { private readonly HashSet nonSymmetricSymbols = new HashSet { typeof(Subtraction), typeof(Division) }; public void SortSubtrees(ISymbolicExpressionTree tree) { SortSubtrees(tree.Root); } public void SortSubtrees(ISymbolicExpressionTreeNode node) { if (node.SubtreeCount == 0) return; var subtrees = node.Subtrees as List ?? node.Subtrees.ToList(); if (IsSymmetric(node.Symbol)) { var comparer = new SymbolicExpressionTreeNodeComparer(); subtrees.Sort(comparer); } foreach (var s in subtrees) SortSubtrees(s); } private bool IsSymmetric(ISymbol s) { return !nonSymmetricSymbols.Contains(s.GetType()); } } } public static class SymbolicExpressionTreeMatching { public static bool ContainsFragment(this ISymbolicExpressionTreeNode root, IFragment fragment, SymbolicExpressionTreeNodeSimilarityComparer comparer) { return FindMatches(root, fragment.Root, comparer).Any(); } public static IEnumerable FindMatches(ISymbolicExpressionTree tree, ISymbolicExpressionTreeNode fragment, SymbolicExpressionTreeNodeSimilarityComparer comparer) { return FindMatches(tree.Root, fragment, comparer); } public static IEnumerable FindMatches(ISymbolicExpressionTreeNode root, ISymbolicExpressionTreeNode fragment, SymbolicExpressionTreeNodeSimilarityComparer comp) { var fragmentLength = fragment.GetLength(); // below, we use ">=" for Match(n, fragment, comp) >= fragmentLength because in case of relaxed conditions, // we can have multiple matches of the same node return root.IterateNodesBreadth().Where(n => n.GetLength() >= fragmentLength && Match(n, fragment, comp) == fragmentLength); } /// /// Finds the longest common subsequence in quadratic time and linear space /// Variant of: /// D. S. Hirschberg. A linear space algorithm for or computing maximal common subsequences. 1975. /// http://dl.acm.org/citation.cfm?id=360861 /// /// Number of pairs that were matched public static int Match(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SymbolicExpressionTreeNodeSimilarityComparer comp) { if (!comp.Equals(a, b)) return 0; int m = a.SubtreeCount; int n = b.SubtreeCount; if (m == 0 || n == 0) return 1; var matrix = new int[m + 1, n + 1]; for (int i = 1; i <= m; ++i) { var ai = a.GetSubtree(i - 1); for (int j = 1; j <= n; ++j) { var bj = b.GetSubtree(j - 1); int match = Match(ai, bj, comp); matrix[i, j] = Math.Max(Math.Max(matrix[i, j - 1], matrix[i - 1, j]), matrix[i - 1, j - 1] + match); } } return matrix[m, n] + 1; } } public class MaximalCommonSubsequenceCalculator { private ISymbolicExpressionTreeNode[] x; private ISymbolicExpressionTreeNode[] y; private List maxCommonSubseq; private double[,] matrix; public SymbolicExpressionTreeNodeSimilarityComparer SimilarityComparer { get; set; } public List Calculate(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2) { if (SimilarityComparer == null) throw new Exception("Comparer cannot be null."); x = n1.IterateNodesPrefix().ToArray(); y = n2.IterateNodesPrefix().ToArray(); if (maxCommonSubseq == null) maxCommonSubseq = new List(); else maxCommonSubseq.Clear(); int n = x.Length; int m = y.Length; matrix = new double[n + 1, m + 1]; for (int i = 0; i <= n; ++i) { for (int j = 0; j <= m; ++j) { if (i == 0 || j == 0) { matrix[i, j] = 0; } else if (SimilarityComparer.Equals(x[i - 1], y[j - 1])) { matrix[i, j] = matrix[i - 1, j - 1] + 1; } else { matrix[i, j] = Math.Max(matrix[i - 1, j], matrix[i, j - 1]); } } } recon(n, m); return new List(maxCommonSubseq); } private void recon(int i, int j) { if (i == 0 || j == 0) return; if (SimilarityComparer.Equals(x[i - 1], y[j - 1])) { recon(i - 1, j - 1); maxCommonSubseq.Add(x[i - 1]); } else if (matrix[i - 1, j] > matrix[i, j - 1]) { recon(i - 1, j); } else recon(i, j - 1); } }