Changeset 11211 for branches/HeuristicLab.BottomUpTreeDistance/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/TreeDistance/BottomUpTreeDistanceCalculator.cs
- Timestamp:
- 07/22/14 01:40:12 (10 years ago)
- Location:
- branches/HeuristicLab.BottomUpTreeDistance
- Files:
-
- 1 added
- 3 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/HeuristicLab.BottomUpTreeDistance/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/TreeDistance/BottomUpTreeDistanceCalculator.cs
r11210 r11211 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Drawing; 24 25 using System.Globalization; 25 26 using System.Linq; … … 30 31 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; 31 32 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.Views; 32 using HeuristicLab.EvolutionTracking;33 33 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 34 34 … … 40 40 [StorableClass] 41 41 [Item("Bottom-up distance calculator", "An operator that calculates the bottom-up distance between two symbolic expression trees.")] 42 public class BottomUp DistanceCalculator : Item, IDistanceCalculator {43 public BottomUp DistanceCalculator() { }44 45 protected BottomUp DistanceCalculator(BottomUpDistanceCalculator original, Cloner cloner)42 public class BottomUpTreeDistanceCalculator : Item, ISymbolicExpressionTreeDistanceCalculator { 43 public BottomUpTreeDistanceCalculator() { } 44 45 protected BottomUpTreeDistanceCalculator(BottomUpTreeDistanceCalculator original, Cloner cloner) 46 46 : base(original, cloner) { 47 47 } 48 48 49 49 public override IDeepCloneable Clone(Cloner cloner) { 50 return new BottomUpDistanceCalculator(this, cloner); 51 } 52 53 private static IArc AddArc(IVertex source, IVertex target) { 54 var arc = new Arc(source, target); 55 source.AddForwardArc(arc); 56 target.AddReverseArc(arc); 57 return arc; 50 return new BottomUpTreeDistanceCalculator(this, cloner); 51 } 52 53 // return a normalized distance measure in the interval (0,1) 54 public double CalculateDistance(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) { 55 return BottomUpDistance(t1, t2) / (t1.Length + t2.Length); 56 } 57 58 public static double BottomUpDistance(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) { 59 var compactedGraph = Compact(t1, t2); 60 61 var forwardMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t1 => nodes of t2 62 var reverseMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t2 => nodes of t1 63 64 foreach (var v in t1.IterateNodesBreadth()) { 65 if (forwardMap.ContainsKey(v)) continue; 66 var kv = compactedGraph[v]; 67 var w = t2.IterateNodesBreadth().FirstOrDefault(x => !reverseMap.ContainsKey(x) && compactedGraph[x] == kv); // not sure of iteration order here (pre/post order or breadth), this seems to work 68 if (w == null) continue; 69 70 // at this point we know that v and w are isomorphic, however, the mapping cannot be done directly (as in the paper) because the trees are unordered (subtree order might differ) 71 // the solution is to sort subtrees by label using IterateBreadthOrdered (this will work because the subtrees are isomorphic!) and simultaneously iterate over the two subtrees 72 var eV = IterateBreadthOrdered(v).GetEnumerator(); 73 var eW = IterateBreadthOrdered(w).GetEnumerator(); 74 75 while (eV.MoveNext() && eW.MoveNext()) { 76 var s = eV.Current; 77 var t = eW.Current; 78 forwardMap[s] = t; 79 reverseMap[t] = s; 80 } 81 } 82 83 int c = forwardMap.Count(x => !Label(x.Key).Equals(Label(x.Value))); 84 double distance = c + t1.Length + t2.Length - 2 * forwardMap.Count; 85 return distance; 58 86 } 59 87 … … 65 93 /// <returns>The compacted DAG representing the two trees</returns> 66 94 private static Dictionary<ISymbolicExpressionTreeNode, IVertex> Compact(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) { 67 var K = new Dictionary<ISymbolicExpressionTreeNode, IVertex>();68 var F = new DisjointUnion(t1, t2);69 var L = new Dictionary<string, IVertex>();70 var Children = new Dictionary<ISymbolicExpressionTreeNode, int>();71 var G = new List<IVertex>();72 73 var nodes = F.Nodes.ToList();95 var nodesToVertices = new Dictionary<ISymbolicExpressionTreeNode, IVertex>(); // K 96 var disjointUnion = new DisjointUnion(t1, t2); // F 97 var labelsToVertices = new Dictionary<string, IVertex>(); // L 98 var childrenCount = new Dictionary<ISymbolicExpressionTreeNode, int>(); // Children 99 var vertices = new List<IVertex>(); // G 100 101 var nodes = disjointUnion.Nodes.ToList(); 74 102 75 103 // for all leaf labels l in F … … 77 105 var label = Label(l); 78 106 var z = new Vertex { Content = l, Label = label }; 79 L[z.Label] = z; 80 G.Add(z); 81 } 82 83 var Q = new Queue<ISymbolicExpressionTreeNode>(); 84 85 // for all nodes 107 labelsToVertices[z.Label] = z; 108 vertices.Add(z); 109 } 110 111 var treeNodes = new Queue<ISymbolicExpressionTreeNode>(); 112 86 113 foreach (var v in nodes) { 87 Children[v] = v.SubtreeCount;114 childrenCount[v] = v.SubtreeCount; 88 115 if (v.SubtreeCount == 0) { 89 Q.Enqueue(v);90 } 91 } 92 93 while ( Q.Any()) {94 var v = Q.Dequeue();116 treeNodes.Enqueue(v); 117 } 118 } 119 120 while (treeNodes.Any()) { 121 var v = treeNodes.Dequeue(); 95 122 var label = Label(v); 96 123 if (v.SubtreeCount == 0) { 97 K[v] = L[label]; // 18124 nodesToVertices[v] = labelsToVertices[label]; // 18 98 125 } else { 99 126 bool found = false; 100 127 // for all nodes w in G in reverse order 101 for (int i = G.Count - 1; i >= 0; --i) { 102 var w = G[i]; 103 if (Height(v) != Height(w) || v.SubtreeCount != w.OutDegree || label != w.Label) 104 break; 128 for (int i = vertices.Count - 1; i >= 0; --i) { 129 var w = vertices[i]; 130 // removed requirement for heights to be equal since a compacted dag should not care about heights 131 // this ensures correct behavior when isomorphic subtrees are placed at different depths. 132 // furthermore, if the nodes have the same labels and the same subtrees then that should be sufficient, 133 // since in the bottom-up approach we are guaranteed that no differences would occur on the lower levels 134 if (v.SubtreeCount != w.OutDegree || label != w.Label) 135 continue; 105 136 106 137 // sort V and W because we are dealing with unordered trees 107 var V = v.Subtrees.Select(s => K[s]).OrderBy(x => x.Label);108 var W= w.OutArcs.Select(x => x.Target).OrderBy(x => x.Label);109 if ( V.SequenceEqual(W)) {110 K[v] = w;138 var vSubtrees = v.Subtrees.Select(s => nodesToVertices[s]).OrderBy(x => x.Label); 139 var wSubtrees = w.OutArcs.Select(x => x.Target).OrderBy(x => x.Label); 140 if (vSubtrees.SequenceEqual(wSubtrees)) { 141 nodesToVertices[v] = w; 111 142 found = true; 112 143 break; … … 116 147 if (!found) { 117 148 var w = new Vertex { Content = v, Label = label }; 118 G.Add(w);119 K[v] = w;149 vertices.Add(w); 150 nodesToVertices[v] = w; 120 151 121 152 foreach (var u in v.Subtrees) { 122 AddArc(w, K[u]);153 AddArc(w, nodesToVertices[u]); 123 154 } // 40: end for 124 155 } // 41: end if … … 127 158 if (v.Parent != null) { 128 159 var p = v.Parent; 129 Children[p]--;130 if ( Children[p] == 0)131 Q.Enqueue(p);160 childrenCount[p]--; 161 if (childrenCount[p] == 0) 162 treeNodes.Enqueue(p); 132 163 } 133 164 }; 134 165 135 return K; 136 } 137 138 private static Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> Mapping(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, Dictionary<ISymbolicExpressionTreeNode, IVertex> K) { 139 var M = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t1 => nodes of t2 140 var M_ = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t2 => nodes of t1 141 142 var nodes2 = t2.IterateNodesPrefix().Where(x => !M_.ContainsKey(x)).ToList(); 143 foreach (var v in t1.IterateNodesBreadth().Where(x => !M.ContainsKey(x))) { 144 var kv = K[v]; 145 var w = nodes2.LastOrDefault(x => K[x] == kv); 146 if (w == null) continue; 147 148 // simultaneous preorder traversal of the two subtrees 149 var eV = v.IterateNodesPrefix().GetEnumerator(); 150 var eW = w.IterateNodesPrefix().GetEnumerator(); 151 152 while (eV.MoveNext() && eW.MoveNext()) { 153 var s = eV.Current; 154 var t = eW.Current; 155 M[s] = t; 156 M_[t] = s; 157 } 158 } 159 return M; 160 } 161 162 // return a normalized distance measure in the interval (0,1) 163 public double CalculateDistance(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) { 164 return BottomUpDistance(t1, t2) / (t1.Length + t2.Length); 165 } 166 167 public static double BottomUpDistance(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, double p = 1, double q = 1, double r = 1) { 168 var K = Compact(t1, t2); 169 var M = Mapping(t1, t2, K); 170 171 var keys = M.Keys.ToList(); 172 for (int j = 0; j < keys.Count; ++j) { 173 var key = keys[j]; 174 var value = M[key]; 175 if (key.Parent != null && value.Parent != null && !M.ContainsKey(key.Parent) && Label(key.Parent).Equals(Label(value.Parent))) { 176 M.Add(key.Parent, value.Parent); 177 keys.Add(key.Parent); 178 } 179 } 180 181 int d = t1.Length - M.Count; 182 int s = M.Count(x => !Label(x.Key).Equals(Label(x.Value))); 183 int i = t2.Length - M.Count; 184 185 double distance = s * p + i * q + d * r; 186 return distance; 187 } 166 return nodesToVertices; 167 } 168 169 private static IEnumerable<ISymbolicExpressionTreeNode> IterateBreadthOrdered(ISymbolicExpressionTreeNode node) { 170 var list = new List<ISymbolicExpressionTreeNode> { node }; 171 int i = 0; 172 while (i < list.Count) { 173 var n = list[i]; 174 if (n.SubtreeCount > 0) 175 list.AddRange(n.Subtrees.OrderBy(Label)); 176 i++; 177 } 178 return list; 179 } 180 181 private static IArc AddArc(IVertex source, IVertex target) { 182 var arc = new Arc(source, target); 183 source.AddForwardArc(arc); 184 target.AddReverseArc(arc); 185 return arc; 186 } 187 188 188 189 189 private static string Label(ISymbolicExpressionTreeNode n) { … … 208 208 209 209 public IEnumerable<ISymbolicExpressionTreeNode> Nodes { 210 get { 211 var r1 = Item1.Root; 212 var r2 = Item2.Root; 213 214 var nodes = r1.IterateNodesPostfix().Select(x => new { Node = x, Height = r1.GetBranchLevel(x) }) 215 .Concat(r2.IterateNodesPostfix().Select(x => new { Node = x, Height = r2.GetBranchLevel(x) })); 216 217 return nodes.OrderByDescending(x => x.Height).Select(x => x.Node); 218 } 210 get { return Item1.Root.IterateNodesPostfix().Concat(Item2.Root.IterateNodesPostfix()); } 219 211 } 220 212 } … … 227 219 // draw the mapping between t1 and t2 as a tikz picture 228 220 private static string FormatMapping(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> map) { 229 var formatter = new SymbolicExpressionTreeLatexFormatter(); 221 var symbolNameMap = new Dictionary<string, string> 222 { 223 {"ProgramRootSymbol", "Prog"}, 224 {"StartSymbol","RPB"}, 225 {"Multiplication", "$\\times$"}, 226 {"Division", "$\\div$"}, 227 {"Addition", "$+$"}, 228 {"Subtraction", "$-$"}, 229 {"Exponential", "$\\exp$"}, 230 {"Logarithm", "$\\log$"} 231 }; 232 230 233 var sb = new StringBuilder(); 231 232 string s1, s2; 233 var m1 = formatter.Format(t1, out s1); 234 var m2 = formatter.Format(t2, out s2, 20); 235 236 // skip first 6 lines of s2 237 s2 = RemoveFirstLines(s2, 6); 238 239 sb.Append(s1); 240 sb.Append(s2); 234 var nodeIds = new Dictionary<ISymbolicExpressionTreeNode, string>(); 235 int offset = 0; 236 var layoutEngine = new ReingoldTilfordLayoutEngine<ISymbolicExpressionTreeNode>(x => x.Subtrees); 237 var nodeCoordinates = layoutEngine.CalculateLayout(t1.Root).ToDictionary(n => n.Content, n => new PointF(n.X, n.Y)); 238 239 double ws = 0.5; 240 double hs = 0.5; 241 242 var nl = Environment.NewLine; 243 sb.Append("\\documentclass[class=minimal,border=0pt]{standalone}" + nl + 244 "\\usepackage{tikz}" + nl + 245 "\\begin{document}" + nl + 246 "\\begin{tikzpicture}" + nl + 247 "\\def\\ws{1}" + nl + 248 "\\def\\hs{0.7}" + nl + 249 "\\def\\offs{" + offset + "}" + nl); 250 251 foreach (var node in t1.IterateNodesBreadth()) { 252 var id = Guid.NewGuid().ToString(); 253 nodeIds[node] = id; 254 var coord = nodeCoordinates[node]; 255 var nodeName = symbolNameMap.ContainsKey(node.Symbol.Name) ? symbolNameMap[node.Symbol.Name] : node.ToString(); 256 sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\node ({0}) at (\\ws*{1} + \\offs,\\hs*{2}) {{{3}}};", nodeIds[node], ws * coord.X, -hs * coord.Y, EscapeLatexString(nodeName))); 257 } 258 259 foreach (ISymbolicExpressionTreeNode t in t1.IterateNodesBreadth()) { 260 var n = t; 261 foreach (var s in t.Subtrees) { 262 sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\draw ({0}) -- ({1});", nodeIds[n], nodeIds[s])); 263 } 264 } 265 266 nodeCoordinates = layoutEngine.CalculateLayout(t2.Root).ToDictionary(n => n.Content, n => new PointF(n.X, n.Y)); 267 268 offset = 20; 269 sb.Append("\\def\\offs{" + offset + "}" + nl); 270 foreach (var node in t2.IterateNodesBreadth()) { 271 var id = Guid.NewGuid().ToString(); 272 nodeIds[node] = id; 273 var coord = nodeCoordinates[node]; 274 var nodeName = symbolNameMap.ContainsKey(node.Symbol.Name) ? symbolNameMap[node.Symbol.Name] : node.ToString(); 275 sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\node ({0}) at (\\ws*{1} + \\offs,\\hs*{2}) {{{3}}};", nodeIds[node], ws * coord.X, -hs * coord.Y, EscapeLatexString(nodeName))); 276 } 277 278 foreach (ISymbolicExpressionTreeNode t in t2.IterateNodesBreadth()) { 279 var n = t; 280 foreach (var s in t.Subtrees) { 281 sb.AppendLine(string.Format(CultureInfo.InvariantCulture, "\\draw ({0}) -- ({1});", nodeIds[n], nodeIds[s])); 282 } 283 } 241 284 242 285 foreach (var p in map) { 243 var id1 = m1[p.Key]; 244 var id2 = m2[p.Value]; 245 246 sb.Append(string.Format(CultureInfo.InvariantCulture, "\\path[draw,->] ({0}) edge[bend left,dashed] ({1});" + Environment.NewLine, id1, id2)); 247 } 248 286 var id1 = nodeIds[p.Key]; 287 var id2 = nodeIds[p.Value]; 288 289 sb.Append(string.Format(CultureInfo.InvariantCulture, "\\path[draw,->,color=gray] ({0}) edge[bend left,dashed] ({1});" + Environment.NewLine, id1, id2)); 290 } 291 sb.Append("\\end{tikzpicture}" + nl + 292 "\\end{document}" + nl); 249 293 return sb.ToString(); 250 294 } 251 295 296 private static string EscapeLatexString(string s) { 297 return s.Replace("\\", "\\\\").Replace("{", "\\{").Replace("}", "\\}").Replace("_", "\\_"); 298 } 252 299 } 253 300 }
Note: See TracChangeset
for help on using the changeset viewer.