Free cookie consent management tool by TermsFeed Policy Generator

source: trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/TreeMatching/SymbolicExpressionTreeBottomUpSimilarityCalculator.cs @ 16278

Last change on this file since 16278 was 16278, checked in by bburlacu, 6 years ago

#2959: Address the issues described in the ticket.

File size: 10.3 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24using System.Globalization;
25using System.Linq;
26using HeuristicLab.Common;
27using HeuristicLab.Core;
28using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
29using HeuristicLab.Optimization.Operators;
30using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;
31
32namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
33  [StorableClass]
34  [Item("SymbolicExpressionTreeBottomUpSimilarityCalculator", "A similarity calculator which uses the tree bottom-up distance as a similarity metric.")]
35  public class SymbolicExpressionTreeBottomUpSimilarityCalculator : SolutionSimilarityCalculator {
36    private readonly HashSet<string> commutativeSymbols = new HashSet<string> { "Addition", "Multiplication", "Average", "And", "Or", "Xor" };
37
38    public SymbolicExpressionTreeBottomUpSimilarityCalculator() { }
39    protected override bool IsCommutative { get { return true; } }
40
41    public bool MatchConstantValues { get; set; }
42    public bool MatchVariableWeights { get; set; }
43
44    [StorableConstructor]
45    protected SymbolicExpressionTreeBottomUpSimilarityCalculator(bool deserializing)
46      : base(deserializing) {
47    }
48
49    protected SymbolicExpressionTreeBottomUpSimilarityCalculator(SymbolicExpressionTreeBottomUpSimilarityCalculator original, Cloner cloner)
50      : base(original, cloner) {
51    }
52
53    public override IDeepCloneable Clone(Cloner cloner) {
54      return new SymbolicExpressionTreeBottomUpSimilarityCalculator(this, cloner);
55    }
56
57    public double CalculateSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) {
58      if (t1 == t2)
59        return 1;
60
61      var actualRoot1 = t1.Root.GetSubtree(0).GetSubtree(0); // skip root and start symbols
62      var actualRoot2 = t2.Root.GetSubtree(0).GetSubtree(0); // skip root and start symbols
63      var map = ComputeBottomUpMapping(actualRoot1, actualRoot2);
64      return 2.0 * map.Count / (t1.Length + t2.Length - 4); // -4 for skipping root and start symbols in the two trees
65    }
66
67    public double CalculateSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, out Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> map) {
68      if (t1 == t2) {
69        map = null;
70        return 1;
71      }
72
73      var actualRoot1 = t1.Root.GetSubtree(0).GetSubtree(0); // skip root and start symbols
74      var actualRoot2 = t2.Root.GetSubtree(0).GetSubtree(0); // skip root and start symbols
75      map = ComputeBottomUpMapping(actualRoot1, actualRoot2);
76
77      return 2.0 * map.Count / (t1.Length + t2.Length - 4); // -4 for skipping root and start symbols in the two trees
78    }
79
80    public override double CalculateSolutionSimilarity(IScope leftSolution, IScope rightSolution) {
81      if (leftSolution == rightSolution)
82        return 1.0;
83
84      var t1 = leftSolution.Variables[SolutionVariableName].Value as ISymbolicExpressionTree;
85      var t2 = rightSolution.Variables[SolutionVariableName].Value as ISymbolicExpressionTree;
86
87      if (t1 == null || t2 == null)
88        throw new ArgumentException("Cannot calculate similarity when one of the arguments is null.");
89
90      var similarity = CalculateSimilarity(t1, t2);
91      if (similarity > 1.0)
92        throw new Exception("Similarity value cannot be greater than 1");
93
94      return similarity;
95    }
96
97    public Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> ComputeBottomUpMapping(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2) {
98      var comparer = new SymbolicExpressionTreeNodeComparer(); // use a node comparer because it's faster than calling node.ToString() (strings are expensive) and comparing strings
99      var compactedGraph = Compact(n1, n2);
100
101      var forwardMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t1 => nodes of t2
102      var reverseMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t2 => nodes of t1
103
104      // visit nodes in order of decreasing height to ensure correct mapping
105      var nodes1 = (List<ISymbolicExpressionTreeNode>)n1.IterateNodesPrefix();
106      var nodes2 = (List<ISymbolicExpressionTreeNode>)n2.IterateNodesPrefix();
107
108      foreach (var v in nodes1) {
109        if (forwardMap.ContainsKey(v)) {
110          continue;
111        }
112
113        var kv = compactedGraph[v];
114
115        var w = nodes2.Last();
116        int k = nodes2.Count - 1;
117
118        for (int j = 0; j < nodes2.Count; ++j) {
119          var t = nodes2[j];
120
121          if (reverseMap.ContainsKey(t) || kv != compactedGraph[t])
122            continue;
123
124          if (j < k) {
125            w = t;
126            k = j;
127          }
128        }
129
130        if (kv != compactedGraph[w]) {
131          continue;
132        }
133
134        // at this point we know that v and w are isomorphic, however, the mapping cannot be done directly
135        // (as in the paper) because the trees are unordered (subtree order might differ). the solution is
136        // to sort subtrees from under commutative labels (this will work because the subtrees are isomorphic!)
137        // while iterating over the two subtrees
138        var vv = IterateBreadthOrdered(v, comparer);
139        var ww = IterateBreadthOrdered(w, comparer);
140        int len = Math.Min(vv.Count, ww.Count);
141        for (int j = 0; j < len; ++j) {
142          var s = vv[j];
143          var t = ww[j];
144
145          if (reverseMap.ContainsKey(t))
146            continue;
147
148          forwardMap[s] = t;
149          reverseMap[t] = s;
150        }
151      }
152
153      return forwardMap;
154    }
155
156    private List<ISymbolicExpressionTreeNode> IterateBreadthOrdered(ISymbolicExpressionTreeNode node, ISymbolicExpressionTreeNodeComparer comparer) {
157      var list = new List<ISymbolicExpressionTreeNode> { node };
158      int i = 0;
159      while (i < list.Count) {
160        var n = list[i];
161        if (n.SubtreeCount > 0) {
162          var subtrees = commutativeSymbols.Contains(node.Symbol.Name) ? n.Subtrees.OrderBy(x => x, comparer) : n.Subtrees;
163          list.AddRange(subtrees);
164        }
165        i++;
166      }
167      return list;
168    }
169
170    /// <summary>
171    /// Creates a compact representation of the two trees as a directed acyclic graph
172    /// </summary>
173    /// <param name="n1">The root of the first tree</param>
174    /// <param name="n2">The root of the second tree</param>
175    /// <returns>The compacted DAG representing the two trees</returns>
176    private Dictionary<ISymbolicExpressionTreeNode, GraphNode> Compact(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2) {
177      var nodeMap = new Dictionary<ISymbolicExpressionTreeNode, GraphNode>(); // K
178      var labelMap = new Dictionary<string, GraphNode>(); // L
179      var comparer = new SymbolicExpressionTreeNodeComparer();
180
181      var nodes = n1.IterateNodesPostfix().Concat(n2.IterateNodesPostfix()); // the disjoint union F
182      var graph = new List<GraphNode>();
183
184      foreach (var node in nodes) {
185        var label = GetLabel(node);
186
187        if (node.SubtreeCount == 0) {
188          if (!labelMap.ContainsKey(label)) {
189            var g = new GraphNode(node, label);
190            graph.Add(g);
191            labelMap[label] = g;
192          }
193          nodeMap[node] = labelMap[label];
194        } else {
195          var v = new GraphNode(node, label);
196          bool found = false;
197          var commutative = node.SubtreeCount > 1 && commutativeSymbols.Contains(label);
198
199          IEnumerable<GraphNode> vv, ww;
200
201          for (int i = graph.Count - 1; i >= 0; --i) {
202            var w = graph[i];
203
204            if (v.Depth != w.Depth || v.SubtreeCount != w.SubtreeCount || v.Length != w.Length || v.Label != w.Label) {
205              continue;
206            }
207
208            vv = commutative ? v.SymbolicExpressionTreeNode.Subtrees.Select(x => nodeMap[x]).OrderBy(x => x.Hash) : v.SymbolicExpressionTreeNode.Subtrees.Select(x => nodeMap[x]);
209            ww = commutative ? w.SymbolicExpressionTreeNode.Subtrees.Select(x => nodeMap[x]).OrderBy(x => x.Hash) : w.SymbolicExpressionTreeNode.Subtrees.Select(x => nodeMap[x]);
210            found = vv.SequenceEqual(ww);
211
212            if (found) {
213              nodeMap[node] = w;
214              break;
215            }
216          }
217          if (!found) {
218            nodeMap[node] = v;
219            graph.Add(v);
220          }
221        }
222      }
223      return nodeMap;
224    }
225
226    private string GetLabel(ISymbolicExpressionTreeNode node) {
227      if (node.SubtreeCount > 0)
228        return node.Symbol.Name;
229
230      if (node is ConstantTreeNode constant)
231        return MatchConstantValues ? constant.Value.ToString(CultureInfo.InvariantCulture) : constant.Symbol.Name;
232
233      if (node is VariableTreeNode variable)
234        return MatchVariableWeights ? variable.Weight + variable.VariableName : variable.VariableName;
235
236      return node.ToString();
237    }
238
239    private class GraphNode {
240      private GraphNode() { }
241
242      public GraphNode(ISymbolicExpressionTreeNode node, string label) {
243        SymbolicExpressionTreeNode = node;
244        Label = label;
245        Hash = GetHashCode();
246        Depth = node.GetDepth();
247        Length = node.GetLength();
248      }
249
250      public int Hash { get; }
251
252      public ISymbolicExpressionTreeNode SymbolicExpressionTreeNode { get; }
253
254      public string Label { get; }
255
256      public int Depth { get; }
257
258      public int SubtreeCount { get { return SymbolicExpressionTreeNode.SubtreeCount; } }
259
260      public int Length { get; }
261    }
262  }
263}
Note: See TracBrowser for help on using the repository browser.