Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.BottomUpTreeDistance/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/TreeDistance/BottomUpTreeDistanceCalculator.cs @ 11211

Last change on this file since 11211 was 11211, checked in by bburlacu, 10 years ago

#2215: Added implementation of the BottomUpTreeDistanceCalculator in branches/HeuristicLab.BottomUpTreeDistance.

File size: 12.4 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Drawing;
25using System.Globalization;
26using System.Linq;
27using System.Text;
28using System.Text.RegularExpressions;
29using HeuristicLab.Common;
30using HeuristicLab.Core;
31using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
32using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Views;
33using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
34
35namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
36  /// <summary>
37  /// This class implements the bottom-up tree distance described in the following paper:
38  /// G. Valiente, "An Efficient Bottom-up Distance Between Trees", http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.102.7958
39  /// </summary>
40  [StorableClass]
41  [Item("Bottom-up distance calculator", "An operator that calculates the bottom-up distance between two symbolic expression trees.")]
42  public class BottomUpTreeDistanceCalculator : Item, ISymbolicExpressionTreeDistanceCalculator {
43    public BottomUpTreeDistanceCalculator() { }
44
45    protected BottomUpTreeDistanceCalculator(BottomUpTreeDistanceCalculator original, Cloner cloner)
46      : base(original, cloner) {
47    }
48
49    public override IDeepCloneable Clone(Cloner cloner) {
50      return new BottomUpTreeDistanceCalculator(this, cloner);
51    }
52
53    // return a normalized distance measure in the interval (0,1)
54    public double CalculateDistance(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) {
55      return BottomUpDistance(t1, t2) / (t1.Length + t2.Length);
56    }
57
58    public static double BottomUpDistance(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) {
59      var compactedGraph = Compact(t1, t2);
60
61      var forwardMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t1 => nodes of t2
62      var reverseMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t2 => nodes of t1
63
64      foreach (var v in t1.IterateNodesBreadth()) {
65        if (forwardMap.ContainsKey(v)) continue;
66        var kv = compactedGraph[v];
67        var w = t2.IterateNodesBreadth().FirstOrDefault(x => !reverseMap.ContainsKey(x) && compactedGraph[x] == kv); // not sure of iteration order here (pre/post order or breadth), this seems to work
68        if (w == null) continue;
69
70        // at this point we know that v and w are isomorphic, however, the mapping cannot be done directly (as in the paper) because the trees are unordered (subtree order might differ)
71        // the solution is to sort subtrees by label using IterateBreadthOrdered (this will work because the subtrees are isomorphic!) and simultaneously iterate over the two subtrees
72        var eV = IterateBreadthOrdered(v).GetEnumerator();
73        var eW = IterateBreadthOrdered(w).GetEnumerator();
74
75        while (eV.MoveNext() && eW.MoveNext()) {
76          var s = eV.Current;
77          var t = eW.Current;
78          forwardMap[s] = t;
79          reverseMap[t] = s;
80        }
81      }
82
83      int c = forwardMap.Count(x => !Label(x.Key).Equals(Label(x.Value)));
84      double distance = c + t1.Length + t2.Length - 2 * forwardMap.Count;
85      return distance;
86    }
87
88    /// <summary>
89    /// Creates a compact representation of the two trees as a directed acyclic graph
90    /// </summary>
91    /// <param name="t1">The first tree</param>
92    /// <param name="t2">The second tree</param>
93    /// <returns>The compacted DAG representing the two trees</returns>
94    private static Dictionary<ISymbolicExpressionTreeNode, IVertex> Compact(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) {
95      var nodesToVertices = new Dictionary<ISymbolicExpressionTreeNode, IVertex>(); // K
96      var disjointUnion = new DisjointUnion(t1, t2); // F
97      var labelsToVertices = new Dictionary<string, IVertex>(); // L
98      var childrenCount = new Dictionary<ISymbolicExpressionTreeNode, int>(); // Children
99      var vertices = new List<IVertex>(); // G
100
101      var nodes = disjointUnion.Nodes.ToList();
102
103      // for all leaf labels l in F
104      foreach (var l in nodes.Where(x => x.SubtreeCount == 0)) {
105        var label = Label(l);
106        var z = new Vertex { Content = l, Label = label };
107        labelsToVertices[z.Label] = z;
108        vertices.Add(z);
109      }
110
111      var treeNodes = new Queue<ISymbolicExpressionTreeNode>();
112
113      foreach (var v in nodes) {
114        childrenCount[v] = v.SubtreeCount;
115        if (v.SubtreeCount == 0) {
116          treeNodes.Enqueue(v);
117        }
118      }
119
120      while (treeNodes.Any()) {
121        var v = treeNodes.Dequeue();
122        var label = Label(v);
123        if (v.SubtreeCount == 0) {
124          nodesToVertices[v] = labelsToVertices[label]; // 18
125        } else {
126          bool found = false;
127          // for all nodes w in G in reverse order
128          for (int i = vertices.Count - 1; i >= 0; --i) {
129            var w = vertices[i];
130            // removed requirement for heights to be equal since a compacted dag should not care about heights
131            // this ensures correct behavior when isomorphic subtrees are placed at different depths.
132            // furthermore, if the nodes have the same labels and the same subtrees then that should be sufficient,
133            // since in the bottom-up approach we are guaranteed that no differences would occur on the lower levels
134            if (v.SubtreeCount != w.OutDegree || label != w.Label)
135              continue;
136
137            // sort V and W because we are dealing with unordered trees
138            var vSubtrees = v.Subtrees.Select(s => nodesToVertices[s]).OrderBy(x => x.Label);
139            var wSubtrees = w.OutArcs.Select(x => x.Target).OrderBy(x => x.Label);
140            if (vSubtrees.SequenceEqual(wSubtrees)) {
141              nodesToVertices[v] = w;
142              found = true;
143              break;
144            }
145          } // 32: end for
146
147          if (!found) {
148            var w = new Vertex { Content = v, Label = label };
149            vertices.Add(w);
150            nodesToVertices[v] = w;
151
152            foreach (var u in v.Subtrees) {
153              AddArc(w, nodesToVertices[u]);
154            } // 40: end for
155          } // 41: end if
156        } // 42: end if
157
158        if (v.Parent != null) {
159          var p = v.Parent;
160          childrenCount[p]--;
161          if (childrenCount[p] == 0)
162            treeNodes.Enqueue(p);
163        }
164      };
165
166      return nodesToVertices;
167    }
168
169    private static IEnumerable<ISymbolicExpressionTreeNode> IterateBreadthOrdered(ISymbolicExpressionTreeNode node) {
170      var list = new List<ISymbolicExpressionTreeNode> { node };
171      int i = 0;
172      while (i < list.Count) {
173        var n = list[i];
174        if (n.SubtreeCount > 0)
175          list.AddRange(n.Subtrees.OrderBy(Label));
176        i++;
177      }
178      return list;
179    }
180
181    private static IArc AddArc(IVertex source, IVertex target) {
182      var arc = new Arc(source, target);
183      source.AddForwardArc(arc);
184      target.AddReverseArc(arc);
185      return arc;
186    }
187
188
189    private static string Label(ISymbolicExpressionTreeNode n) {
190      return n.ToString();
191    }
192
193    private static int Height(IVertex v) {
194      return Height((ISymbolicExpressionTreeNode)v.Content);
195    }
196
197    private static int Height(ISymbolicExpressionTreeNode n) {
198      var p = n;
199      while (p.Parent != null)
200        p = p.Parent;
201      return p.GetBranchLevel(n);
202    }
203
204    private class DisjointUnion : Tuple<ISymbolicExpressionTree, ISymbolicExpressionTree> {
205      public DisjointUnion(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2)
206        : base(t1, t2) {
207      }
208
209      public IEnumerable<ISymbolicExpressionTreeNode> Nodes {
210        get { return Item1.Root.IterateNodesPostfix().Concat(Item2.Root.IterateNodesPostfix()); }
211      }
212    }
213
214    public static string RemoveFirstLines(string text, int linesCount) {
215      var lines = Regex.Split(text, "\r\n|\r|\n").Skip(linesCount);
216      return string.Join(Environment.NewLine, lines.ToArray());
217    }
218
219    // draw the mapping between t1 and t2 as a tikz picture
220    private static string FormatMapping(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> map) {
221      var symbolNameMap = new Dictionary<string, string>
222    {
223      {"ProgramRootSymbol", "Prog"},
224      {"StartSymbol","RPB"},
225      {"Multiplication", "$\\times$"},
226      {"Division", "$\\div$"},
227      {"Addition", "$+$"},
228      {"Subtraction", "$-$"},
229      {"Exponential", "$\\exp$"},
230      {"Logarithm", "$\\log$"}
231    };
232
233      var sb = new StringBuilder();
234      var nodeIds = new Dictionary<ISymbolicExpressionTreeNode, string>();
235      int offset = 0;
236      var layoutEngine = new ReingoldTilfordLayoutEngine<ISymbolicExpressionTreeNode>(x => x.Subtrees);
237      var nodeCoordinates = layoutEngine.CalculateLayout(t1.Root).ToDictionary(n => n.Content, n => new PointF(n.X, n.Y));
238
239      double ws = 0.5;
240      double hs = 0.5;
241
242      var nl = Environment.NewLine;
243      sb.Append("\\documentclass[class=minimal,border=0pt]{standalone}" + nl +
244                 "\\usepackage{tikz}" + nl +
245                 "\\begin{document}" + nl +
246                 "\\begin{tikzpicture}" + nl +
247                 "\\def\\ws{1}" + nl +
248                 "\\def\\hs{0.7}" + nl +
249                 "\\def\\offs{" + offset + "}" + nl);
250
251      foreach (var node in t1.IterateNodesBreadth()) {
252        var id = Guid.NewGuid().ToString();
253        nodeIds[node] = id;
254        var coord = nodeCoordinates[node];
255        var nodeName = symbolNameMap.ContainsKey(node.Symbol.Name) ? symbolNameMap[node.Symbol.Name] : node.ToString();
256        sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\node ({0}) at (\\ws*{1} + \\offs,\\hs*{2}) {{{3}}};", nodeIds[node], ws * coord.X, -hs * coord.Y, EscapeLatexString(nodeName)));
257      }
258
259      foreach (ISymbolicExpressionTreeNode t in t1.IterateNodesBreadth()) {
260        var n = t;
261        foreach (var s in t.Subtrees) {
262          sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\draw ({0}) -- ({1});", nodeIds[n], nodeIds[s]));
263        }
264      }
265
266      nodeCoordinates = layoutEngine.CalculateLayout(t2.Root).ToDictionary(n => n.Content, n => new PointF(n.X, n.Y));
267
268      offset = 20;
269      sb.Append("\\def\\offs{" + offset + "}" + nl);
270      foreach (var node in t2.IterateNodesBreadth()) {
271        var id = Guid.NewGuid().ToString();
272        nodeIds[node] = id;
273        var coord = nodeCoordinates[node];
274        var nodeName = symbolNameMap.ContainsKey(node.Symbol.Name) ? symbolNameMap[node.Symbol.Name] : node.ToString();
275        sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\node ({0}) at (\\ws*{1} + \\offs,\\hs*{2}) {{{3}}};", nodeIds[node], ws * coord.X, -hs * coord.Y, EscapeLatexString(nodeName)));
276      }
277
278      foreach (ISymbolicExpressionTreeNode t in t2.IterateNodesBreadth()) {
279        var n = t;
280        foreach (var s in t.Subtrees) {
281          sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\draw ({0}) -- ({1});", nodeIds[n], nodeIds[s]));
282        }
283      }
284
285      foreach (var p in map) {
286        var id1 = nodeIds[p.Key];
287        var id2 = nodeIds[p.Value];
288
289        sb.Append(string.Format(CultureInfo.InvariantCulture, "\\path[draw,->,color=gray] ({0}) edge[bend left,dashed] ({1});" + Environment.NewLine, id1, id2));
290      }
291      sb.Append("\\end{tikzpicture}" + nl +
292                "\\end{document}" + nl);
293      return sb.ToString();
294    }
295
296    private static string EscapeLatexString(string s) {
297      return s.Replace("\\", "\\\\").Replace("{", "\\{").Replace("}", "\\}").Replace("_", "\\_");
298    }
299  }
300}
Note: See TracBrowser for help on using the repository browser.