using System; using System.Collections.Generic; using System.IO; using System.Linq; using HeuristicLab.Common; using HeuristicLab.Core; using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; namespace HeuristicLab.Problems.DataAnalysis.Symbolic { public enum SimilarityLevel { Exact, // everything is matched bit by bit (functions and terminals) High, // symbols are matched. leaf node types are matched Relaxed // only symbols are matched, leafs count as wildcards } [Item("SymbolicExpressionTreeNodeSimilarityComparer", "A comparison operator that checks node equality based on different similarity measures.")] [StorableClass] public class SymbolicExpressionTreeNodeSimilarityComparer : Item, ISymbolicExpressionTreeNodeComparer { [StorableConstructor] private SymbolicExpressionTreeNodeSimilarityComparer(bool deserializing) : base(deserializing) { } private SymbolicExpressionTreeNodeSimilarityComparer(SymbolicExpressionTreeNodeSimilarityComparer original, Cloner cloner) : base(original, cloner) { } public override IDeepCloneable Clone(Cloner cloner) { return new SymbolicExpressionTreeNodeSimilarityComparer(this, cloner); } private SimilarityLevel similarityLevel; public SimilarityLevel SimilarityLevel { get { return similarityLevel; } set { similarityLevel = value; } } public SymbolicExpressionTreeNodeSimilarityComparer() { this.similarityLevel = SimilarityLevel.Exact; } public SymbolicExpressionTreeNodeSimilarityComparer(SimilarityLevel similarityLevel) { this.similarityLevel = similarityLevel; } public int GetHashCode(ISymbolicExpressionTreeNode n) { return n.ToString().ToLower().GetHashCode(); } public bool Equals(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) { var ca = a as ConstantTreeNode; var cb = b as ConstantTreeNode; var va = a as VariableTreeNode; var vb = b as VariableTreeNode; var aIsConstant = ca != null; var aIsVariable = va != null; var bIsConstant = cb != null; var bIsVariable = vb != null; var aIsFunction = (!(aIsConstant | aIsVariable)); var bIsFunction = (!(bIsConstant | bIsVariable)); if (aIsFunction) return bIsFunction && a.Symbol.Name.Equals(b.Symbol.Name); if (bIsFunction) // one is function and the other is not, return false return false; switch (similarityLevel) { case (SimilarityLevel.Exact): if (aIsConstant & bIsConstant) return ca.Value.Equals(cb.Value); if (aIsVariable & bIsVariable) return va.Weight.Equals(vb.Weight); return false; case (SimilarityLevel.High): return ((aIsConstant & bIsConstant) || (aIsVariable & bIsVariable)); case (SimilarityLevel.Relaxed): return true; default: return false; } } public static bool ExactlySimilar(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) { return Similar(a, b, SimilarityLevel.Exact); } public static bool Similar(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SimilarityLevel level) { var comp = new SymbolicExpressionTreeNodeSimilarityComparer(level); return comp.Equals(a, b); } } public class SymbolicExpressionTreeNodeOrderingComparer : IComparer { public int Compare(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b) { var ca = a as ConstantTreeNode; var cb = b as ConstantTreeNode; var va = a as VariableTreeNode; var vb = b as VariableTreeNode; var aIsConstant = ca != null; var aIsVariable = va != null; var bIsConstant = cb != null; var bIsVariable = vb != null; var aIsFunction = (!(aIsConstant | aIsVariable)); var bIsFunction = (!(bIsConstant | bIsVariable)); if (aIsFunction) return bIsFunction ? a.Symbol.Name.CompareTo(b.Symbol.Name) : -1; if (bIsFunction) // a is Constant or Variables return 1; if (aIsVariable) return bIsVariable ? va.Weight.CompareTo(vb.Weight) : -1; return bIsVariable ? 1 : ca.Value.CompareTo(cb.Value); } } // tree equality public class SymbolicExpressionTreeEqualityComparer : IEqualityComparer { public bool Equals(ISymbolicExpressionTree a, ISymbolicExpressionTree b) { return SymbolicExpressionTreeMatching.FindMatch(a, b, similarityLevel) == 0; } public int GetHashCode(ISymbolicExpressionTree tree) { return (tree.Length << 8) % 12345; } private SimilarityLevel similarityLevel; public void SetComparisonMode(SimilarityLevel simLevel) { similarityLevel = simLevel; } } public static class SymbolicExpressionTreeMatching { public static bool IsSimilarTo(this ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, SimilarityLevel mode) { return FindMatch(t1, t2, mode) == 0; } public static bool IsSimilarTo(this ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, SimilarityLevel mode) { return FindMatch(t1, t2, mode) == 0; } public static bool ContainsFragment(this ISymbolicExpressionTree tree, ISymbolicExpressionTreeNode fragment, SimilarityLevel mode) { var matches = FindMatches(tree, fragment, mode); return matches.Count > 0; } public static void SortSubtrees(this ISymbolicExpressionTree tree) { SortSubtrees(tree.Root); } public static void SortSubtrees(this ISymbolicExpressionTreeNode node) { if (node.SubtreeCount == 0) return; var subtrees = node.Subtrees as List; if (subtrees == null) return; var comparer = new SymbolicExpressionTreeNodeOrderingComparer(); subtrees.Sort(comparer); foreach (var subtree in subtrees) subtree.SortSubtrees(); } // return child that is the same as node public static ISymbolicExpressionTreeNode FindChild(this ISymbolicExpressionTreeNode parent, ISymbolicExpressionTreeNode node) { var subtrees = parent.Subtrees as List; var comparer = new SymbolicExpressionTreeNodeSimilarityComparer(SimilarityLevel.Exact); return subtrees.Find(x => comparer.Equals(x, node)); } public static int FindMatch(ISymbolicExpressionTree a, ISymbolicExpressionTree b, SimilarityLevel similarityLevel) { var nodesA = a.IterateNodesPostfix() as List; var nodesB = b.IterateNodesPostfix() as List; return FindMatch(nodesA, nodesB, similarityLevel); } public static int FindMatch(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SimilarityLevel similarityLevel) { var nodesA = a.IterateNodesPostfix() as List; var nodesB = b.IterateNodesPostfix() as List; return FindMatch(nodesA, nodesB, similarityLevel); } // returns the index in the sequence 'seq', where pattern 'pat' is matched, or -1 for no match // -1 means no match, 0 means perfect match (the sequences overlap, hence the match index is 0) public static int FindMatch(List seq, List pat, SimilarityLevel similarityLevel) { int patlen = pat.Count; int seqlen = seq.Count; if (patlen == 0 || seqlen == 0) return -1; var comparer = new SymbolicExpressionTreeNodeSimilarityComparer(similarityLevel); if (patlen == seqlen) return seq.SequenceEqual(pat, comparer) ? 0 : -1; for (int i = patlen; i <= seqlen; ++i) { if (comparer.Equals(seq[i - 1], pat.Last())) // first check if last element is a match if (seq.Skip(i - patlen).Take(patlen).SequenceEqual(pat, comparer)) // then check the whole sequence return i - patlen; } return -1; } public static List FindMatches(ISymbolicExpressionTree tree, ISymbolicExpressionTreeNode fragment, SimilarityLevel similarityLevel) { var comp = new SymbolicExpressionTreeNodeSimilarityComparer(similarityLevel); return FindMatches(tree.Root, fragment, comp); } public static List FindMatches(ISymbolicExpressionTreeNode root, ISymbolicExpressionTreeNode fragment, SymbolicExpressionTreeNodeSimilarityComparer comp) { var matches = new List(); root.ForEachNodePostfix(node => { if (Match(node, fragment, comp) == fragment.GetLength()) matches.Add(node); }); return matches; } /// /// Finds the longest common subsequence in quadratic time and linear space /// Variant of: /// D . S. Hirschberg. A linear space algorithm for or computing maximal common subsequences. 1975. /// http://dl.acm.org/citation.cfm?id=360861 /// /// Number of pairs that were matched public static int Match(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SymbolicExpressionTreeNodeSimilarityComparer comp) { if (!comp.Equals(a, b)) return 0; int m = a.SubtreeCount; int n = b.SubtreeCount; if (m == 0 || n == 0) return 1; var matrix = new int[m + 1, n + 1]; for (int i = 1; i <= m; ++i) for (int j = 1; j <= n; ++j) { var ai = a.GetSubtree(i - 1); var bj = b.GetSubtree(j - 1); int match = Match(ai, bj, comp); matrix[i, j] = Math.Max(Math.Max(matrix[i, j - 1], matrix[i - 1, j]), matrix[i - 1, j - 1] + match); } return matrix[m, n] + 1; } public static int LCS(ISymbolicExpressionTreeNode a, ISymbolicExpressionTreeNode b, SymbolicExpressionTreeNodeSimilarityComparer comp) { var nodesA = a.IterateNodesPrefix().ToList(); var nodesB = b.IterateNodesPrefix().ToList(); int m = nodesA.Count; int n = nodesB.Count; var matrix = new int[m + 1, n + 1]; for (int i = 1; i <= m; ++i) { for (int j = 1; j <= n; ++j) { int w = comp.Equals(a, b) ? 1 : 0; matrix[i, j] = Math.Max(Math.Max(matrix[i, j - 1], matrix[i - 1, j]), matrix[i - 1, j - 1] + w); } } return matrix[m, n]; } public static void RenderTree(TextWriter writer, ISymbolicExpressionTree tree) { RenderNode(writer, tree.Root, string.Empty); } public static void RenderNode(TextWriter writer, ISymbolicExpressionTreeNode node, string prefix) { string label; if (node is VariableTreeNode) { var variable = node as VariableTreeNode; label = string.Concat(string.Format("{0:0.000}", variable.Weight), '·', variable.VariableName); } else if (node is ConstantTreeNode) { var constant = node as ConstantTreeNode; label = string.Format("{0:0.000}", constant.Value); } else { label = node.Symbol.ToString(); } writer.Write(label); if (node.SubtreeCount > 0) { char connector, extender = ' '; var padding = prefix + new string(' ', label.Length); for (int i = 0; i != node.SubtreeCount; ++i) { if (i == 0) { if (node.SubtreeCount > 1) { connector = RenderChars.JunctionDown; extender = RenderChars.VerticalLine; } else { connector = RenderChars.HorizontalLine; extender = ' '; } } else { writer.Write(padding); if (i == node.SubtreeCount - 1) { connector = RenderChars.CornerRight; extender = ' '; } else { connector = RenderChars.JunctionRight; extender = RenderChars.VerticalLine; } } writer.Write(string.Concat(connector, RenderChars.HorizontalLine)); var newPrefix = string.Concat(padding, extender, ' '); RenderNode(writer, node.GetSubtree(i), newPrefix); } } else writer.WriteLine(); } } // helper class providing characters for displaying a tree in the console public static class RenderChars { public const char JunctionDown = '┬'; public const char HorizontalLine = '─'; public const char VerticalLine = '│'; public const char JunctionRight = '├'; public const char CornerRight = '└'; } }