Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
07/22/14 01:40:12 (10 years ago)
Author:
bburlacu
Message:

#2215: Added implementation of the BottomUpTreeDistanceCalculator in branches/HeuristicLab.BottomUpTreeDistance.

Location:
branches/HeuristicLab.BottomUpTreeDistance
Files:
1 added
3 copied

Legend:

Unmodified
Added
Removed
  • branches/HeuristicLab.BottomUpTreeDistance/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/TreeDistance/BottomUpTreeDistanceCalculator.cs

    r11210 r11211  
    2222using System;
    2323using System.Collections.Generic;
     24using System.Drawing;
    2425using System.Globalization;
    2526using System.Linq;
     
    3031using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
    3132using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Views;
    32 using HeuristicLab.EvolutionTracking;
    3333using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
    3434
     
    4040  [StorableClass]
    4141  [Item("Bottom-up distance calculator", "An operator that calculates the bottom-up distance between two symbolic expression trees.")]
    42   public class BottomUpDistanceCalculator : Item, IDistanceCalculator {
    43     public BottomUpDistanceCalculator() { }
    44 
    45     protected BottomUpDistanceCalculator(BottomUpDistanceCalculator original, Cloner cloner)
     42  public class BottomUpTreeDistanceCalculator : Item, ISymbolicExpressionTreeDistanceCalculator {
     43    public BottomUpTreeDistanceCalculator() { }
     44
     45    protected BottomUpTreeDistanceCalculator(BottomUpTreeDistanceCalculator original, Cloner cloner)
    4646      : base(original, cloner) {
    4747    }
    4848
    4949    public override IDeepCloneable Clone(Cloner cloner) {
    50       return new BottomUpDistanceCalculator(this, cloner);
    51     }
    52 
    53     private static IArc AddArc(IVertex source, IVertex target) {
    54       var arc = new Arc(source, target);
    55       source.AddForwardArc(arc);
    56       target.AddReverseArc(arc);
    57       return arc;
     50      return new BottomUpTreeDistanceCalculator(this, cloner);
     51    }
     52
     53    // return a normalized distance measure in the interval (0,1)
     54    public double CalculateDistance(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) {
     55      return BottomUpDistance(t1, t2) / (t1.Length + t2.Length);
     56    }
     57
     58    public static double BottomUpDistance(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) {
     59      var compactedGraph = Compact(t1, t2);
     60
     61      var forwardMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t1 => nodes of t2
     62      var reverseMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t2 => nodes of t1
     63
     64      foreach (var v in t1.IterateNodesBreadth()) {
     65        if (forwardMap.ContainsKey(v)) continue;
     66        var kv = compactedGraph[v];
     67        var w = t2.IterateNodesBreadth().FirstOrDefault(x => !reverseMap.ContainsKey(x) && compactedGraph[x] == kv); // not sure of iteration order here (pre/post order or breadth), this seems to work
     68        if (w == null) continue;
     69
     70        // at this point we know that v and w are isomorphic, however, the mapping cannot be done directly (as in the paper) because the trees are unordered (subtree order might differ)
     71        // the solution is to sort subtrees by label using IterateBreadthOrdered (this will work because the subtrees are isomorphic!) and simultaneously iterate over the two subtrees
     72        var eV = IterateBreadthOrdered(v).GetEnumerator();
     73        var eW = IterateBreadthOrdered(w).GetEnumerator();
     74
     75        while (eV.MoveNext() && eW.MoveNext()) {
     76          var s = eV.Current;
     77          var t = eW.Current;
     78          forwardMap[s] = t;
     79          reverseMap[t] = s;
     80        }
     81      }
     82
     83      int c = forwardMap.Count(x => !Label(x.Key).Equals(Label(x.Value)));
     84      double distance = c + t1.Length + t2.Length - 2 * forwardMap.Count;
     85      return distance;
    5886    }
    5987
     
    6593    /// <returns>The compacted DAG representing the two trees</returns>
    6694    private static Dictionary<ISymbolicExpressionTreeNode, IVertex> Compact(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) {
    67       var K = new Dictionary<ISymbolicExpressionTreeNode, IVertex>();
    68       var F = new DisjointUnion(t1, t2);
    69       var L = new Dictionary<string, IVertex>();
    70       var Children = new Dictionary<ISymbolicExpressionTreeNode, int>();
    71       var G = new List<IVertex>();
    72 
    73       var nodes = F.Nodes.ToList();
     95      var nodesToVertices = new Dictionary<ISymbolicExpressionTreeNode, IVertex>(); // K
     96      var disjointUnion = new DisjointUnion(t1, t2); // F
     97      var labelsToVertices = new Dictionary<string, IVertex>(); // L
     98      var childrenCount = new Dictionary<ISymbolicExpressionTreeNode, int>(); // Children
     99      var vertices = new List<IVertex>(); // G
     100
     101      var nodes = disjointUnion.Nodes.ToList();
    74102
    75103      // for all leaf labels l in F
     
    77105        var label = Label(l);
    78106        var z = new Vertex { Content = l, Label = label };
    79         L[z.Label] = z;
    80         G.Add(z);
    81       }
    82 
    83       var Q = new Queue<ISymbolicExpressionTreeNode>();
    84 
    85       // for all nodes
     107        labelsToVertices[z.Label] = z;
     108        vertices.Add(z);
     109      }
     110
     111      var treeNodes = new Queue<ISymbolicExpressionTreeNode>();
     112
    86113      foreach (var v in nodes) {
    87         Children[v] = v.SubtreeCount;
     114        childrenCount[v] = v.SubtreeCount;
    88115        if (v.SubtreeCount == 0) {
    89           Q.Enqueue(v);
    90         }
    91       }
    92 
    93       while (Q.Any()) {
    94         var v = Q.Dequeue();
     116          treeNodes.Enqueue(v);
     117        }
     118      }
     119
     120      while (treeNodes.Any()) {
     121        var v = treeNodes.Dequeue();
    95122        var label = Label(v);
    96123        if (v.SubtreeCount == 0) {
    97           K[v] = L[label]; // 18
     124          nodesToVertices[v] = labelsToVertices[label]; // 18
    98125        } else {
    99126          bool found = false;
    100127          // for all nodes w in G in reverse order
    101           for (int i = G.Count - 1; i >= 0; --i) {
    102             var w = G[i];
    103             if (Height(v) != Height(w) || v.SubtreeCount != w.OutDegree || label != w.Label)
    104               break;
     128          for (int i = vertices.Count - 1; i >= 0; --i) {
     129            var w = vertices[i];
     130            // removed requirement for heights to be equal since a compacted dag should not care about heights
     131            // this ensures correct behavior when isomorphic subtrees are placed at different depths.
     132            // furthermore, if the nodes have the same labels and the same subtrees then that should be sufficient,
     133            // since in the bottom-up approach we are guaranteed that no differences would occur on the lower levels
     134            if (v.SubtreeCount != w.OutDegree || label != w.Label)
     135              continue;
    105136
    106137            // sort V and W because we are dealing with unordered trees
    107             var V = v.Subtrees.Select(s => K[s]).OrderBy(x => x.Label);
    108             var W = w.OutArcs.Select(x => x.Target).OrderBy(x => x.Label);
    109             if (V.SequenceEqual(W)) {
    110               K[v] = w;
     138            var vSubtrees = v.Subtrees.Select(s => nodesToVertices[s]).OrderBy(x => x.Label);
     139            var wSubtrees = w.OutArcs.Select(x => x.Target).OrderBy(x => x.Label);
     140            if (vSubtrees.SequenceEqual(wSubtrees)) {
     141              nodesToVertices[v] = w;
    111142              found = true;
    112143              break;
     
    116147          if (!found) {
    117148            var w = new Vertex { Content = v, Label = label };
    118             G.Add(w);
    119             K[v] = w;
     149            vertices.Add(w);
     150            nodesToVertices[v] = w;
    120151
    121152            foreach (var u in v.Subtrees) {
    122               AddArc(w, K[u]);
     153              AddArc(w, nodesToVertices[u]);
    123154            } // 40: end for
    124155          } // 41: end if
     
    127158        if (v.Parent != null) {
    128159          var p = v.Parent;
    129           Children[p]--;
    130           if (Children[p] == 0)
    131             Q.Enqueue(p);
     160          childrenCount[p]--;
     161          if (childrenCount[p] == 0)
     162            treeNodes.Enqueue(p);
    132163        }
    133164      };
    134165
    135       return K;
    136     }
    137 
    138     private static Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> Mapping(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, Dictionary<ISymbolicExpressionTreeNode, IVertex> K) {
    139       var M = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t1 => nodes of t2
    140       var M_ = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t2 => nodes of t1
    141 
    142       var nodes2 = t2.IterateNodesPrefix().Where(x => !M_.ContainsKey(x)).ToList();
    143       foreach (var v in t1.IterateNodesBreadth().Where(x => !M.ContainsKey(x))) {
    144         var kv = K[v];
    145         var w = nodes2.LastOrDefault(x => K[x] == kv);
    146         if (w == null) continue;
    147 
    148         // simultaneous preorder traversal of the two subtrees
    149         var eV = v.IterateNodesPrefix().GetEnumerator();
    150         var eW = w.IterateNodesPrefix().GetEnumerator();
    151 
    152         while (eV.MoveNext() && eW.MoveNext()) {
    153           var s = eV.Current;
    154           var t = eW.Current;
    155           M[s] = t;
    156           M_[t] = s;
    157         }
    158       }
    159       return M;
    160     }
    161 
    162     // return a normalized distance measure in the interval (0,1)
    163     public double CalculateDistance(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) {
    164       return BottomUpDistance(t1, t2) / (t1.Length + t2.Length);
    165     }
    166 
    167     public static double BottomUpDistance(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, double p = 1, double q = 1, double r = 1) {
    168       var K = Compact(t1, t2);
    169       var M = Mapping(t1, t2, K);
    170 
    171       var keys = M.Keys.ToList();
    172       for (int j = 0; j < keys.Count; ++j) {
    173         var key = keys[j];
    174         var value = M[key];
    175         if (key.Parent != null && value.Parent != null && !M.ContainsKey(key.Parent) && Label(key.Parent).Equals(Label(value.Parent))) {
    176           M.Add(key.Parent, value.Parent);
    177           keys.Add(key.Parent);
    178         }
    179       }
    180 
    181       int d = t1.Length - M.Count;
    182       int s = M.Count(x => !Label(x.Key).Equals(Label(x.Value)));
    183       int i = t2.Length - M.Count;
    184 
    185       double distance = s * p + i * q + d * r;
    186       return distance;
    187     }
     166      return nodesToVertices;
     167    }
     168
     169    private static IEnumerable<ISymbolicExpressionTreeNode> IterateBreadthOrdered(ISymbolicExpressionTreeNode node) {
     170      var list = new List<ISymbolicExpressionTreeNode> { node };
     171      int i = 0;
     172      while (i < list.Count) {
     173        var n = list[i];
     174        if (n.SubtreeCount > 0)
     175          list.AddRange(n.Subtrees.OrderBy(Label));
     176        i++;
     177      }
     178      return list;
     179    }
     180
     181    private static IArc AddArc(IVertex source, IVertex target) {
     182      var arc = new Arc(source, target);
     183      source.AddForwardArc(arc);
     184      target.AddReverseArc(arc);
     185      return arc;
     186    }
     187
    188188
    189189    private static string Label(ISymbolicExpressionTreeNode n) {
     
    208208
    209209      public IEnumerable<ISymbolicExpressionTreeNode> Nodes {
    210         get {
    211           var r1 = Item1.Root;
    212           var r2 = Item2.Root;
    213 
    214           var nodes = r1.IterateNodesPostfix().Select(x => new { Node = x, Height = r1.GetBranchLevel(x) })
    215             .Concat(r2.IterateNodesPostfix().Select(x => new { Node = x, Height = r2.GetBranchLevel(x) }));
    216 
    217           return nodes.OrderByDescending(x => x.Height).Select(x => x.Node);
    218         }
     210        get { return Item1.Root.IterateNodesPostfix().Concat(Item2.Root.IterateNodesPostfix()); }
    219211      }
    220212    }
     
    227219    // draw the mapping between t1 and t2 as a tikz picture
    228220    private static string FormatMapping(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> map) {
    229       var formatter = new SymbolicExpressionTreeLatexFormatter();
     221      var symbolNameMap = new Dictionary<string, string>
     222    {
     223      {"ProgramRootSymbol", "Prog"},
     224      {"StartSymbol","RPB"},
     225      {"Multiplication", "$\\times$"},
     226      {"Division", "$\\div$"},
     227      {"Addition", "$+$"},
     228      {"Subtraction", "$-$"},
     229      {"Exponential", "$\\exp$"},
     230      {"Logarithm", "$\\log$"}
     231    };
     232
    230233      var sb = new StringBuilder();
    231 
    232       string s1, s2;
    233       var m1 = formatter.Format(t1, out s1);
    234       var m2 = formatter.Format(t2, out s2, 20);
    235 
    236       // skip first 6 lines of s2
    237       s2 = RemoveFirstLines(s2, 6);
    238 
    239       sb.Append(s1);
    240       sb.Append(s2);
     234      var nodeIds = new Dictionary<ISymbolicExpressionTreeNode, string>();
     235      int offset = 0;
     236      var layoutEngine = new ReingoldTilfordLayoutEngine<ISymbolicExpressionTreeNode>(x => x.Subtrees);
     237      var nodeCoordinates = layoutEngine.CalculateLayout(t1.Root).ToDictionary(n => n.Content, n => new PointF(n.X, n.Y));
     238
     239      double ws = 0.5;
     240      double hs = 0.5;
     241
     242      var nl = Environment.NewLine;
     243      sb.Append("\\documentclass[class=minimal,border=0pt]{standalone}" + nl +
     244                 "\\usepackage{tikz}" + nl +
     245                 "\\begin{document}" + nl +
     246                 "\\begin{tikzpicture}" + nl +
     247                 "\\def\\ws{1}" + nl +
     248                 "\\def\\hs{0.7}" + nl +
     249                 "\\def\\offs{" + offset + "}" + nl);
     250
     251      foreach (var node in t1.IterateNodesBreadth()) {
     252        var id = Guid.NewGuid().ToString();
     253        nodeIds[node] = id;
     254        var coord = nodeCoordinates[node];
     255        var nodeName = symbolNameMap.ContainsKey(node.Symbol.Name) ? symbolNameMap[node.Symbol.Name] : node.ToString();
     256        sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\node ({0}) at (\\ws*{1} + \\offs,\\hs*{2}) {{{3}}};", nodeIds[node], ws * coord.X, -hs * coord.Y, EscapeLatexString(nodeName)));
     257      }
     258
     259      foreach (ISymbolicExpressionTreeNode t in t1.IterateNodesBreadth()) {
     260        var n = t;
     261        foreach (var s in t.Subtrees) {
     262          sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\draw ({0}) -- ({1});", nodeIds[n], nodeIds[s]));
     263        }
     264      }
     265
     266      nodeCoordinates = layoutEngine.CalculateLayout(t2.Root).ToDictionary(n => n.Content, n => new PointF(n.X, n.Y));
     267
     268      offset = 20;
     269      sb.Append("\\def\\offs{" + offset + "}" + nl);
     270      foreach (var node in t2.IterateNodesBreadth()) {
     271        var id = Guid.NewGuid().ToString();
     272        nodeIds[node] = id;
     273        var coord = nodeCoordinates[node];
     274        var nodeName = symbolNameMap.ContainsKey(node.Symbol.Name) ? symbolNameMap[node.Symbol.Name] : node.ToString();
     275        sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\node ({0}) at (\\ws*{1} + \\offs,\\hs*{2}) {{{3}}};", nodeIds[node], ws * coord.X, -hs * coord.Y, EscapeLatexString(nodeName)));
     276      }
     277
     278      foreach (ISymbolicExpressionTreeNode t in t2.IterateNodesBreadth()) {
     279        var n = t;
     280        foreach (var s in t.Subtrees) {
     281          sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\draw ({0}) -- ({1});", nodeIds[n], nodeIds[s]));
     282        }
     283      }
    241284
    242285      foreach (var p in map) {
    243         var id1 = m1[p.Key];
    244         var id2 = m2[p.Value];
    245 
    246         sb.Append(string.Format(CultureInfo.InvariantCulture, "\\path[draw,->] ({0}) edge[bend left,dashed] ({1});" + Environment.NewLine, id1, id2));
    247       }
    248 
     286        var id1 = nodeIds[p.Key];
     287        var id2 = nodeIds[p.Value];
     288
     289        sb.Append(string.Format(CultureInfo.InvariantCulture, "\\path[draw,->,color=gray] ({0}) edge[bend left,dashed] ({1});" + Environment.NewLine, id1, id2));
     290      }
     291      sb.Append("\\end{tikzpicture}" + nl +
     292                "\\end{document}" + nl);
    249293      return sb.ToString();
    250294    }
    251295
     296    private static string EscapeLatexString(string s) {
     297      return s.Replace("\\", "\\\\").Replace("{", "\\{").Replace("}", "\\}").Replace("_", "\\_");
     298    }
    252299  }
    253300}
Note: See TracChangeset for help on using the changeset viewer.