1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022014 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Collections.Generic;


24  using System.Diagnostics;


25  using System.Globalization;


26  using System.Linq;


27  using HeuristicLab.Common;


28  using HeuristicLab.Core;


29  using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;


30  using HeuristicLab.Optimization.Operators;


31  using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;


32 


33  namespace HeuristicLab.Problems.DataAnalysis.Symbolic {


34  [StorableClass]


35  [Item("BottomUpSimilarityCalculator", "A similarity calculator which uses the tree bottomup distance as a similarity metric.")]


36  public class BottomUpSimilarityCalculator : SingleObjectiveSolutionSimilarityCalculator {


37  private readonly HashSet<string> commutativeSymbols = new HashSet<string> { "Addition", "Multiplication", "Average", "And", "Or", "Xor" };


38  public bool MatchVariableWeights { get; set; }


39  public bool MatchConstantValues { get; set; }


40 


41  public BottomUpSimilarityCalculator() { }


42 


43  protected BottomUpSimilarityCalculator(BottomUpSimilarityCalculator original, Cloner cloner)


44  : base(original, cloner) {


45  }


46 


47  public override IDeepCloneable Clone(Cloner cloner) {


48  return new BottomUpSimilarityCalculator(this, cloner);


49  }


50 


51  public double CalculateSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) {


52  if (t1 == t2)


53  return 1;


54 


55  var map = ComputeBottomUpMapping(t1.Root, t2.Root);


56  return 2.0 * map.Count / (t1.Length + t2.Length);


57  }


58 


59  public override double CalculateSolutionSimilarity(IScope leftSolution, IScope rightSolution) {


60  var t1 = leftSolution.Variables[SolutionVariableName].Value as ISymbolicExpressionTree;


61  var t2 = rightSolution.Variables[SolutionVariableName].Value as ISymbolicExpressionTree;


62 


63  if (t1 == null  t2 == null)


64  throw new ArgumentException("Cannot calculate similarity when one of the arguments is null.");


65 


66  var similarity = CalculateSimilarity(t1, t2);


67  if (similarity > 1.0)


68  throw new Exception("Similarity value cannot be greater than 1");


69 


70  return similarity;


71  }


72 


73  public Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> ComputeBottomUpMapping(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2) {


74  var comparer = new SymbolicExpressionTreeNodeComparer(); // use a node comparer because it's faster than calling node.ToString() (strings are expensive) and comparing strings


75  var compactedGraph = Compact(n1, n2);


76 


77  var forwardMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t1 => nodes of t2


78  var reverseMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t2 => nodes of t1


79 


80  // visit nodes in order of decreasing height to ensure correct mapping


81  foreach (var v in n1.IterateNodesPrefix().OrderByDescending(x => compactedGraph[x].Depth)) {


82  if (forwardMap.ContainsKey(v))


83  continue;


84  var kv = compactedGraph[v];


85  ISymbolicExpressionTreeNode w = null;


86  foreach (var t in n2.IterateNodesPrefix()) {


87  if (reverseMap.ContainsKey(t)  compactedGraph[t] != kv)


88  continue;


89  w = t;


90  break;


91  }


92  if (w == null) continue;


93 


94  // at this point we know that v and w are isomorphic, however, the mapping cannot be done directly (as in the paper) because the trees are unordered (subtree order might differ)


95  // the solution is to sort subtrees by label using IterateBreadthOrdered (this will work because the subtrees are isomorphic!) and simultaneously iterate over the two subtrees


96  var eV = IterateBreadthOrdered(v, comparer).GetEnumerator();


97  var eW = IterateBreadthOrdered(w, comparer).GetEnumerator();


98 


99  while (eV.MoveNext() && eW.MoveNext()) {


100  var s = eV.Current;


101  var t = eW.Current;


102 


103  Debug.Assert(!reverseMap.ContainsKey(t));


104 


105  forwardMap[s] = t;


106  reverseMap[t] = s;


107  }


108  }


109 


110  return forwardMap;


111  }


112 


113  /// <summary>


114  /// Creates a compact representation of the two trees as a directed acyclic graph


115  /// </summary>


116  /// <param name="n1">The root of the first tree</param>


117  /// <param name="n2">The root of the second tree</param>


118  /// <returns>The compacted DAG representing the two trees</returns>


119  private Dictionary<ISymbolicExpressionTreeNode, GraphNode> Compact(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2) {


120  var nodeMap = new Dictionary<ISymbolicExpressionTreeNode, GraphNode>(); // K


121  var labelMap = new Dictionary<string, GraphNode>(); // L


122  var childrenCount = new Dictionary<ISymbolicExpressionTreeNode, int>(); // Children


123 


124  var nodes = n1.IterateNodesPostfix().Concat(n2.IterateNodesPostfix()); // the disjoint union F


125  var list = new List<GraphNode>(); // preallocate size to avoid list resizing as it has a performance hit


126  var queue = new Queue<ISymbolicExpressionTreeNode>();


127 


128  foreach (var n in nodes) {


129  if (n.SubtreeCount == 0) {


130  var label = Label(n);


131  if (!labelMap.ContainsKey(label)) {


132  var z = new GraphNode { SymbolicExpressionTreeNode = n, Label = label };


133  labelMap[z.Label] = z;


134  list.Add(z);


135  }


136  nodeMap[n] = labelMap[label];


137  queue.Enqueue(n);


138  } else {


139  childrenCount[n] = n.SubtreeCount;


140  }


141  }


142  while (queue.Any()) {


143  var n = queue.Dequeue();


144 


145  if (n.SubtreeCount > 0) {


146  var label = n.Symbol.Name;


147  bool found = false;


148  var depth = n.GetDepth();


149 


150  bool sort = commutativeSymbols.Contains(label);


151  var nNodes = n.Subtrees.Select(x => nodeMap[x]).ToList();


152  if (sort) nNodes.Sort((a, b) => string.CompareOrdinal(a.Label, b.Label));


153 


154  for (int i = list.Count  1; i >= 0; i) {


155  var w = list[i];


156 


157  if (!(n.SubtreeCount == w.ChildrenCount && label == w.Label && depth == w.Depth))


158  continue;


159 


160  // sort V and W when the symbol is commutative because we are dealing with unordered trees


161  var m = w.SymbolicExpressionTreeNode;


162  var mNodes = m.Subtrees.Select(x => nodeMap[x]).ToList();


163  if (sort) mNodes.Sort((a, b) => string.CompareOrdinal(a.Label, b.Label));


164 


165  if (nNodes.SequenceEqual(mNodes)) {


166  nodeMap[n] = w;


167  found = true;


168  break;


169  }


170  }


171 


172  if (!found) {


173  var w = new GraphNode { SymbolicExpressionTreeNode = n, Label = label, Depth = depth };


174  list.Add(w);


175  nodeMap[n] = w;


176  }


177  }


178 


179  if (n == n1  n == n2)


180  continue;


181 


182  var p = n.Parent;


183  if (p == null)


184  continue;


185 


186  childrenCount[p];


187 


188  if (childrenCount[p] == 0)


189  queue.Enqueue(p);


190  }


191 


192  return nodeMap;


193  }


194 


195  private IEnumerable<ISymbolicExpressionTreeNode> IterateBreadthOrdered(ISymbolicExpressionTreeNode node, ISymbolicExpressionTreeNodeComparer comparer) {


196  var list = new List<ISymbolicExpressionTreeNode> { node };


197  int i = 0;


198  while (i < list.Count) {


199  var n = list[i];


200  if (n.SubtreeCount > 0) {


201  var subtrees = commutativeSymbols.Contains(node.Symbol.Name) ? n.Subtrees.OrderBy(x => x, comparer) : n.Subtrees;


202  list.AddRange(subtrees);


203  }


204  i++;


205  }


206  return list;


207  }


208 


209  private string Label(ISymbolicExpressionTreeNode node) {


210  if (node.SubtreeCount > 0)


211  return node.Symbol.Name;


212 


213  var constant = node as ConstantTreeNode;


214  if (constant != null)


215  return MatchConstantValues ? constant.Value.ToString(CultureInfo.InvariantCulture) : constant.Symbol.Name;


216  var variable = node as VariableTreeNode;


217  if (variable != null) {


218  return MatchVariableWeights ? variable.Weight + variable.VariableName : variable.VariableName;


219  }


220 


221  return node.ToString();


222  }


223 


224  private class GraphNode {


225  public ISymbolicExpressionTreeNode SymbolicExpressionTreeNode;


226  public string Label;


227  public int Depth;


228  public int ChildrenCount { get { return SymbolicExpressionTreeNode.SubtreeCount; } }


229  }


230  }


231  }

