Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.BottomUpTreeDistance/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/TreeDistance/BottomUpTreeDistanceCalculator.cs @ 11212

Last change on this file since 11212 was 11212, checked in by bburlacu, 10 years ago

#2215: Realized that "Height" in the paper might actually refer to the subtree depth (and not it's level in the tree). Changed code according to this interpretation as it makes more sense and seems to produce the correct results.

File size: 11.9 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Drawing;
25using System.Globalization;
26using System.Linq;
27using System.Text;
28using System.Text.RegularExpressions;
29using HeuristicLab.Common;
30using HeuristicLab.Core;
31using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
32using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Views;
33using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
34
35namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
36  /// <summary>
37  /// This class implements the bottom-up tree distance described in the following paper:
38  /// G. Valiente, "An Efficient Bottom-up Distance Between Trees", http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.102.7958
39  /// </summary>
40  [StorableClass]
41  [Item("Bottom-up distance calculator", "An operator that calculates the bottom-up distance between two symbolic expression trees.")]
42  public class BottomUpTreeDistanceCalculator : Item, ISymbolicExpressionTreeDistanceCalculator {
43    public BottomUpTreeDistanceCalculator() { }
44
45    protected BottomUpTreeDistanceCalculator(BottomUpTreeDistanceCalculator original, Cloner cloner)
46      : base(original, cloner) {
47    }
48
49    public override IDeepCloneable Clone(Cloner cloner) {
50      return new BottomUpTreeDistanceCalculator(this, cloner);
51    }
52
53    // return a normalized distance measure in the interval (0,1)
54    public double CalculateDistance(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) {
55      return BottomUpDistance(t1, t2) / (t1.Length + t2.Length);
56    }
57
58    public static double BottomUpDistance(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) {
59      var compactedGraph = Compact(t1, t2);
60
61      var forwardMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t1 => nodes of t2
62      var reverseMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t2 => nodes of t1
63
64      foreach (var v in t1.IterateNodesBreadth()) {
65        if (forwardMap.ContainsKey(v)) continue;
66        var kv = compactedGraph[v];
67        var w = t2.IterateNodesBreadth().FirstOrDefault(x => !reverseMap.ContainsKey(x) && compactedGraph[x] == kv); // not sure of iteration order here (pre/post order or breadth), this seems to work
68        if (w == null) continue;
69
70        // at this point we know that v and w are isomorphic, however, the mapping cannot be done directly (as in the paper) because the trees are unordered (subtree order might differ)
71        // the solution is to sort subtrees by label using IterateBreadthOrdered (this will work because the subtrees are isomorphic!) and simultaneously iterate over the two subtrees
72        var eV = IterateBreadthOrdered(v).GetEnumerator();
73        var eW = IterateBreadthOrdered(w).GetEnumerator();
74
75        while (eV.MoveNext() && eW.MoveNext()) {
76          var s = eV.Current;
77          var t = eW.Current;
78          forwardMap[s] = t;
79          reverseMap[t] = s;
80        }
81      }
82
83      int c = forwardMap.Count(x => !Label(x.Key).Equals(Label(x.Value)));
84      double distance = c + t1.Length + t2.Length - 2 * forwardMap.Count;
85      return distance;
86    }
87
88    /// <summary>
89    /// Creates a compact representation of the two trees as a directed acyclic graph
90    /// </summary>
91    /// <param name="t1">The first tree</param>
92    /// <param name="t2">The second tree</param>
93    /// <returns>The compacted DAG representing the two trees</returns>
94    private static Dictionary<ISymbolicExpressionTreeNode, IVertex> Compact(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) {
95      var nodesToVertices = new Dictionary<ISymbolicExpressionTreeNode, IVertex>(); // K
96      var disjointUnion = new DisjointUnion(t1, t2); // F
97      var labelsToVertices = new Dictionary<string, IVertex>(); // L
98      var childrenCount = new Dictionary<ISymbolicExpressionTreeNode, int>(); // Children
99      var vertices = new List<IVertex>(); // G
100
101      var nodes = disjointUnion.Nodes.ToList();
102
103      // for all leaf labels l in F
104      foreach (var l in nodes.Where(x => x.SubtreeCount == 0)) {
105        var label = Label(l);
106        var z = new Vertex { Content = l, Label = label };
107        labelsToVertices[z.Label] = z;
108        vertices.Add(z);
109      }
110
111      var treeNodes = new Queue<ISymbolicExpressionTreeNode>();
112
113      foreach (var v in nodes) {
114        childrenCount[v] = v.SubtreeCount;
115        if (v.SubtreeCount == 0) {
116          treeNodes.Enqueue(v);
117        }
118      }
119
120      while (treeNodes.Any()) {
121        var v = treeNodes.Dequeue();
122        var label = Label(v);
123        if (v.SubtreeCount == 0) {
124          nodesToVertices[v] = labelsToVertices[label]; // 18
125        } else {
126          bool found = false;
127          // for all nodes w in G in reverse order
128          for (int i = vertices.Count - 1; i >= 0; --i) {
129            var w = vertices[i];
130            if (Height(v) != Height(w) || v.SubtreeCount != w.OutDegree || label != w.Label)
131              continue;
132
133            // sort V and W because we are dealing with unordered trees
134            var vSubtrees = v.Subtrees.Select(s => nodesToVertices[s]).OrderBy(x => x.Label);
135            var wSubtrees = w.OutArcs.Select(x => x.Target).OrderBy(x => x.Label);
136            if (vSubtrees.SequenceEqual(wSubtrees)) {
137              nodesToVertices[v] = w;
138              found = true;
139              break;
140            }
141          } // 32: end for
142
143          if (!found) {
144            var w = new Vertex { Content = v, Label = label };
145            vertices.Add(w);
146            nodesToVertices[v] = w;
147
148            foreach (var u in v.Subtrees) {
149              AddArc(w, nodesToVertices[u]);
150            } // 40: end for
151          } // 41: end if
152        } // 42: end if
153
154        if (v.Parent != null) {
155          var p = v.Parent;
156          childrenCount[p]--;
157          if (childrenCount[p] == 0)
158            treeNodes.Enqueue(p);
159        }
160      };
161
162      return nodesToVertices;
163    }
164
165    private static IEnumerable<ISymbolicExpressionTreeNode> IterateBreadthOrdered(ISymbolicExpressionTreeNode node) {
166      var list = new List<ISymbolicExpressionTreeNode> { node };
167      int i = 0;
168      while (i < list.Count) {
169        var n = list[i];
170        if (n.SubtreeCount > 0)
171          list.AddRange(n.Subtrees.OrderBy(Label));
172        i++;
173      }
174      return list;
175    }
176
177    private static IArc AddArc(IVertex source, IVertex target) {
178      var arc = new Arc(source, target);
179      source.AddForwardArc(arc);
180      target.AddReverseArc(arc);
181      return arc;
182    }
183
184
185    private static string Label(ISymbolicExpressionTreeNode n) {
186      return n.ToString();
187    }
188
189    private static int Height(IVertex v) {
190      return ((ISymbolicExpressionTreeNode)v.Content).GetDepth();
191    }
192
193    private static int Height(ISymbolicExpressionTreeNode n) {
194      return n.GetDepth();
195    }
196
197    private class DisjointUnion : Tuple<ISymbolicExpressionTree, ISymbolicExpressionTree> {
198      public DisjointUnion(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2)
199        : base(t1, t2) {
200      }
201
202      public IEnumerable<ISymbolicExpressionTreeNode> Nodes {
203        get { return Item1.Root.IterateNodesPostfix().Concat(Item2.Root.IterateNodesPostfix()); }
204      }
205    }
206
207    public static string RemoveFirstLines(string text, int linesCount) {
208      var lines = Regex.Split(text, "\r\n|\r|\n").Skip(linesCount);
209      return string.Join(Environment.NewLine, lines.ToArray());
210    }
211
212    // draw the mapping between t1 and t2 as a tikz picture
213    private static string FormatMapping(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> map) {
214      var symbolNameMap = new Dictionary<string, string>
215    {
216      {"ProgramRootSymbol", "Prog"},
217      {"StartSymbol","RPB"},
218      {"Multiplication", "$\\times$"},
219      {"Division", "$\\div$"},
220      {"Addition", "$+$"},
221      {"Subtraction", "$-$"},
222      {"Exponential", "$\\exp$"},
223      {"Logarithm", "$\\log$"}
224    };
225
226      var sb = new StringBuilder();
227      var nodeIds = new Dictionary<ISymbolicExpressionTreeNode, string>();
228      int offset = 0;
229      var layoutEngine = new ReingoldTilfordLayoutEngine<ISymbolicExpressionTreeNode>(x => x.Subtrees);
230      var nodeCoordinates = layoutEngine.CalculateLayout(t1.Root).ToDictionary(n => n.Content, n => new PointF(n.X, n.Y));
231
232      double ws = 0.5;
233      double hs = 0.5;
234
235      var nl = Environment.NewLine;
236      sb.Append("\\documentclass[class=minimal,border=0pt]{standalone}" + nl +
237                 "\\usepackage{tikz}" + nl +
238                 "\\begin{document}" + nl +
239                 "\\begin{tikzpicture}" + nl +
240                 "\\def\\ws{1}" + nl +
241                 "\\def\\hs{0.7}" + nl +
242                 "\\def\\offs{" + offset + "}" + nl);
243
244      foreach (var node in t1.IterateNodesBreadth()) {
245        var id = Guid.NewGuid().ToString();
246        nodeIds[node] = id;
247        var coord = nodeCoordinates[node];
248        var nodeName = symbolNameMap.ContainsKey(node.Symbol.Name) ? symbolNameMap[node.Symbol.Name] : node.ToString();
249        sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\node ({0}) at (\\ws*{1} + \\offs,\\hs*{2}) {{{3}}};", nodeIds[node], ws * coord.X, -hs * coord.Y, EscapeLatexString(nodeName)));
250      }
251
252      foreach (ISymbolicExpressionTreeNode t in t1.IterateNodesBreadth()) {
253        var n = t;
254        foreach (var s in t.Subtrees) {
255          sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\draw ({0}) -- ({1});", nodeIds[n], nodeIds[s]));
256        }
257      }
258
259      nodeCoordinates = layoutEngine.CalculateLayout(t2.Root).ToDictionary(n => n.Content, n => new PointF(n.X, n.Y));
260
261      offset = 20;
262      sb.Append("\\def\\offs{" + offset + "}" + nl);
263      foreach (var node in t2.IterateNodesBreadth()) {
264        var id = Guid.NewGuid().ToString();
265        nodeIds[node] = id;
266        var coord = nodeCoordinates[node];
267        var nodeName = symbolNameMap.ContainsKey(node.Symbol.Name) ? symbolNameMap[node.Symbol.Name] : node.ToString();
268        sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\node ({0}) at (\\ws*{1} + \\offs,\\hs*{2}) {{{3}}};", nodeIds[node], ws * coord.X, -hs * coord.Y, EscapeLatexString(nodeName)));
269      }
270
271      foreach (ISymbolicExpressionTreeNode t in t2.IterateNodesBreadth()) {
272        var n = t;
273        foreach (var s in t.Subtrees) {
274          sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\draw ({0}) -- ({1});", nodeIds[n], nodeIds[s]));
275        }
276      }
277
278      foreach (var p in map) {
279        var id1 = nodeIds[p.Key];
280        var id2 = nodeIds[p.Value];
281
282        sb.Append(string.Format(CultureInfo.InvariantCulture, "\\path[draw,->,color=gray] ({0}) edge[bend left,dashed] ({1});" + Environment.NewLine, id1, id2));
283      }
284      sb.Append("\\end{tikzpicture}" + nl +
285                "\\end{document}" + nl);
286      return sb.ToString();
287    }
288
289    private static string EscapeLatexString(string s) {
290      return s.Replace("\\", "\\\\").Replace("{", "\\{").Replace("}", "\\}").Replace("_", "\\_");
291    }
292  }
293}
Note: See TracBrowser for help on using the repository browser.