Free cookie consent management tool by TermsFeed Policy Generator

source: branches/HeuristicLab.BottomUpTreeDistance/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SimilarityCalculators/BottomUpSimilarityCalculator.cs @ 11219

Last change on this file since 11219 was 11219, checked in by bburlacu, 10 years ago

#2215: Refactored the tree distance calculators as similarity calculators (extending SingleObjectiveSolutionSimilarityCalculator). Removed ISymbolicExpressionTreeDistanceCalculator interface. Made small performance enhancements to the BottomUpSimilarityCalculator. Added unit tests to check correctness and performance of bottom up similarity. Added SingleObjectivePopulationDiversityAnalyzer in the default operators list along with the BottomUpSimilarityCalculator.

File size: 8.1 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Linq;
25using HeuristicLab.Common;
26using HeuristicLab.Core;
27using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
28using HeuristicLab.Optimization.Operators;
29using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
30
31namespace HeuristicLab.Problems.DataAnalysis.Symbolic.SimilarityCalculators {
32  [StorableClass]
33  [Item("BottomUpSimilarityCalculator", "A similarity calculator which uses the tree bottom-up distance as a similarity metric.")]
34  public class BottomUpSimilarityCalculator : SingleObjectiveSolutionSimilarityCalculator {
35    private readonly HashSet<string> commutativeSymbols = new HashSet<string> { "Addition", "Multiplication", "Average", "And", "Or", "Xor" };
36
37    public BottomUpSimilarityCalculator() { }
38
39    public override IDeepCloneable Clone(Cloner cloner) {
40      return new BottomUpSimilarityCalculator(this, cloner);
41    }
42
43    protected BottomUpSimilarityCalculator(BottomUpSimilarityCalculator original, Cloner cloner)
44      : base(original, cloner) {
45    }
46
47    public override double CalculateSolutionSimilarity(IScope leftSolution, IScope rightSolution) {
48      var t1 = leftSolution.Variables[SolutionVariableName].Value as ISymbolicExpressionTree;
49      var t2 = rightSolution.Variables[SolutionVariableName].Value as ISymbolicExpressionTree;
50
51      if (t1 == null || t2 == null)
52        throw new ArgumentException("Cannot calculate similarity when one of the arguments is null.");
53
54      return CalculateSolutionSimilarity(t1, t2);
55    }
56
57    public double CalculateSolutionSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) {
58      if (t1 == t2)
59        return 1;
60
61      var map = ComputeBottomUpMapping(t1, t2);
62      return 2.0 * map.Count / (t1.Length + t2.Length);
63    }
64
65
66    public Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> ComputeBottomUpMapping(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) {
67      var compactedGraph = Compact(t1, t2);
68
69      var forwardMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t1 => nodes of t2
70      var reverseMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t2 => nodes of t1
71
72      foreach (var v in t1.IterateNodesBreadth()) {
73        if (forwardMap.ContainsKey(v)) continue;
74        var kv = compactedGraph[v];
75        ISymbolicExpressionTreeNode w = null;
76        foreach (var t in t2.IterateNodesBreadth()) {
77          if (reverseMap.ContainsKey(t) || compactedGraph[t] != kv) continue;
78          w = t;
79          break;
80        }
81        if (w == null) continue;
82
83        // at this point we know that v and w are isomorphic, however, the mapping cannot be done directly (as in the paper) because the trees are unordered (subtree order might differ)
84        // the solution is to sort subtrees by label using IterateBreadthOrdered (this will work because the subtrees are isomorphic!) and simultaneously iterate over the two subtrees
85        var eV = IterateBreadthOrdered(v).GetEnumerator();
86        var eW = IterateBreadthOrdered(w).GetEnumerator();
87
88        while (eV.MoveNext() && eW.MoveNext()) {
89          var s = eV.Current;
90          var t = eW.Current;
91          forwardMap[s] = t;
92          reverseMap[t] = s;
93        }
94      }
95
96      return forwardMap;
97    }
98
99    /// <summary>
100    /// Creates a compact representation of the two trees as a directed acyclic graph
101    /// </summary>
102    /// <param name="t1">The first tree</param>
103    /// <param name="t2">The second tree</param>
104    /// <returns>The compacted DAG representing the two trees</returns>
105    private Dictionary<ISymbolicExpressionTreeNode, IVertex> Compact(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) {
106      var nodesToVertices = new Dictionary<ISymbolicExpressionTreeNode, IVertex>(); // K
107      var labelsToVertices = new Dictionary<string, IVertex>(); // L
108      var childrenCount = new Dictionary<ISymbolicExpressionTreeNode, int>(); // Children
109      var vertices = new List<IVertex>(); // G
110
111      var nodes = t1.IterateNodesPostfix().Concat(t2.IterateNodesPostfix()); // the disjoint union F
112      var queue = new Queue<ISymbolicExpressionTreeNode>();
113
114      foreach (var n in nodes) {
115        if (n.SubtreeCount == 0) {
116          var label = n.ToString();
117          var z = new Vertex { Content = n, Label = label };
118          labelsToVertices[z.Label] = z;
119          vertices.Add(z);
120          queue.Enqueue(n);
121        } else {
122          childrenCount[n] = n.SubtreeCount;
123        }
124      }
125
126      while (queue.Any()) {
127        var v = queue.Dequeue();
128        string label;
129        if (v.SubtreeCount == 0) {
130          label = v.ToString();
131          nodesToVertices[v] = labelsToVertices[label]; // 18
132        } else {
133          label = v.Symbol.Name;
134          bool found = false;
135          var height = v.GetDepth();
136
137          bool sort = commutativeSymbols.Contains(label);
138          var vSubtrees = v.Subtrees.Select(x => nodesToVertices[x]).ToList();
139          if (sort) vSubtrees.Sort((a, b) => String.Compare(a.Label, b.Label, StringComparison.Ordinal));
140
141          // for all nodes w in G in reverse order
142          for (int i = vertices.Count - 1; i >= 0; --i) {
143            var w = vertices[i];
144            var n = (ISymbolicExpressionTreeNode)w.Content;
145            if (n.SubtreeCount == 0) continue; // v is a function node so w will have to be a function node as well
146            if (height != (int)w.Weight || v.SubtreeCount != n.SubtreeCount || label != w.Label)
147              continue;
148
149            // sort V and W when the symbol is commutative because we are dealing with unordered trees
150            var wSubtrees = sort ? w.OutArcs.Select(x => x.Target).OrderBy(x => x.Label)
151                                 : w.OutArcs.Select(x => x.Target);
152
153            if (vSubtrees.SequenceEqual(wSubtrees)) {
154              nodesToVertices[v] = w;
155              found = true;
156              break;
157            }
158          } // 32: end for
159
160          if (!found) {
161            var w = new Vertex { Content = v, Label = label, Weight = height };
162            vertices.Add(w);
163            nodesToVertices[v] = w;
164
165            foreach (var u in v.Subtrees) {
166              AddArc(w, nodesToVertices[u]);
167            } // 40: end for
168          } // 41: end if
169        } // 42: end if
170
171        var p = v.Parent;
172        if (p == null)
173          continue;
174
175        childrenCount[p]--;
176
177        if (childrenCount[p] == 0)
178          queue.Enqueue(p);
179      };
180
181      return nodesToVertices;
182    }
183
184    private IEnumerable<ISymbolicExpressionTreeNode> IterateBreadthOrdered(ISymbolicExpressionTreeNode node) {
185      var list = new List<ISymbolicExpressionTreeNode> { node };
186      int i = 0;
187      while (i < list.Count) {
188        var n = list[i];
189        if (n.SubtreeCount > 0) {
190          var subtrees = commutativeSymbols.Contains(node.Symbol.Name) ? n.Subtrees.OrderBy(s => s.ToString()) : n.Subtrees;
191          list.AddRange(subtrees);
192        }
193        i++;
194      }
195      return list;
196    }
197
198    private static IArc AddArc(IVertex source, IVertex target) {
199      var arc = new Arc(source, target);
200      source.AddForwardArc(arc);
201      target.AddReverseArc(arc);
202      return arc;
203    }
204  }
205}
Note: See TracBrowser for help on using the repository browser.