1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022015 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Collections.Generic;


24  using System.Diagnostics;


25  using System.Globalization;


26  using System.Linq;


27  using HeuristicLab.Common;


28  using HeuristicLab.Core;


29  using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;


30  using HeuristicLab.Optimization.Operators;


31  using HeuristicLab.Persistence.Default.CompositeSerializers.Storable;


32 


33  namespace HeuristicLab.Problems.DataAnalysis.Symbolic {


34  [StorableClass]


35  [Item("SymbolicExpressionTreeBottomUpSimilarityCalculator", "A similarity calculator which uses the tree bottomup distance as a similarity metric.")]


36  public class SymbolicExpressionTreeBottomUpSimilarityCalculator : SingleObjectiveSolutionSimilarityCalculator {


37  private readonly HashSet<string> commutativeSymbols = new HashSet<string> { "Addition", "Multiplication", "Average", "And", "Or", "Xor" };


38 


39  protected override bool IsCommutative { get { return true; } }


40 


41  public SymbolicExpressionTreeBottomUpSimilarityCalculator() { }


42 


43  [StorableConstructor]


44  protected SymbolicExpressionTreeBottomUpSimilarityCalculator(bool deserializing)


45  : base(deserializing) {


46  }


47 


48  protected SymbolicExpressionTreeBottomUpSimilarityCalculator(SymbolicExpressionTreeBottomUpSimilarityCalculator original, Cloner cloner)


49  : base(original, cloner) {


50  }


51 


52  public override IDeepCloneable Clone(Cloner cloner) {


53  return new SymbolicExpressionTreeBottomUpSimilarityCalculator(this, cloner);


54  }


55 


56  public double CalculateSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) {


57  if (t1 == t2)


58  return 1;


59 


60  var map = ComputeBottomUpMapping(t1.Root, t2.Root);


61  return 2.0 * map.Count / (t1.Length + t2.Length);


62  }


63 


64  public override double CalculateSolutionSimilarity(IScope leftSolution, IScope rightSolution) {


65  var t1 = leftSolution.Variables[SolutionVariableName].Value as ISymbolicExpressionTree;


66  var t2 = rightSolution.Variables[SolutionVariableName].Value as ISymbolicExpressionTree;


67 


68  if (t1 == null  t2 == null)


69  throw new ArgumentException("Cannot calculate similarity when one of the arguments is null.");


70 


71  var similarity = CalculateSimilarity(t1, t2);


72  if (similarity > 1.0)


73  throw new Exception("Similarity value cannot be greater than 1");


74 


75  return similarity;


76  }


77 


78  public Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> ComputeBottomUpMapping(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2) {


79  var comparer = new SymbolicExpressionTreeNodeComparer(); // use a node comparer because it's faster than calling node.ToString() (strings are expensive) and comparing strings


80  var compactedGraph = Compact(n1, n2);


81 


82  var forwardMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t1 => nodes of t2


83  var reverseMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t2 => nodes of t1


84 


85  // visit nodes in order of decreasing height to ensure correct mapping


86  var nodes1 = n1.IterateNodesPrefix().OrderByDescending(x => x.GetDepth()).ToList();


87  var nodes2 = n2.IterateNodesPrefix().ToList();


88  for (int i = 0; i < nodes1.Count; ++i) {


89  var v = nodes1[i];


90  if (forwardMap.ContainsKey(v))


91  continue;


92  var kv = compactedGraph[v];


93  ISymbolicExpressionTreeNode w = null;


94  for (int j = 0; j < nodes2.Count; ++j) {


95  var t = nodes2[j];


96  if (reverseMap.ContainsKey(t)  compactedGraph[t] != kv)


97  continue;


98  w = t;


99  break;


100  }


101  if (w == null) continue;


102 


103  // at this point we know that v and w are isomorphic, however, the mapping cannot be done directly


104  // (as in the paper) because the trees are unordered (subtree order might differ). the solution is


105  // to sort subtrees from under commutative labels (this will work because the subtrees are isomorphic!)


106  // while iterating over the two subtrees


107  var vv = IterateBreadthOrdered(v, comparer).ToList();


108  var ww = IterateBreadthOrdered(w, comparer).ToList();


109  int len = Math.Min(vv.Count, ww.Count);


110  for (int j = 0; j < len; ++j) {


111  var s = vv[j];


112  var t = ww[j];


113  Debug.Assert(!reverseMap.ContainsKey(t));


114 


115  forwardMap[s] = t;


116  reverseMap[t] = s;


117  }


118  }


119 


120  return forwardMap;


121  }


122 


123  /// <summary>


124  /// Creates a compact representation of the two trees as a directed acyclic graph


125  /// </summary>


126  /// <param name="n1">The root of the first tree</param>


127  /// <param name="n2">The root of the second tree</param>


128  /// <returns>The compacted DAG representing the two trees</returns>


129  private Dictionary<ISymbolicExpressionTreeNode, GraphNode> Compact(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2) {


130  var nodeMap = new Dictionary<ISymbolicExpressionTreeNode, GraphNode>(); // K


131  var labelMap = new Dictionary<string, GraphNode>(); // L


132  var childrenCount = new Dictionary<ISymbolicExpressionTreeNode, int>(); // Children


133 


134  var nodes = n1.IterateNodesPostfix().Concat(n2.IterateNodesPostfix()); // the disjoint union F


135  var list = new List<GraphNode>();


136  var queue = new Queue<ISymbolicExpressionTreeNode>();


137 


138  foreach (var n in nodes) {


139  if (n.SubtreeCount == 0) {


140  var label = GetLabel(n);


141  if (!labelMap.ContainsKey(label)) {


142  var z = new GraphNode { SymbolicExpressionTreeNode = n, Label = label };


143  labelMap[z.Label] = z;


144  }


145  nodeMap[n] = labelMap[label];


146  queue.Enqueue(n);


147  } else {


148  childrenCount[n] = n.SubtreeCount;


149  }


150  }


151  while (queue.Any()) {


152  var n = queue.Dequeue();


153  if (n.SubtreeCount > 0) {


154  bool found = false;


155  var label = n.Symbol.Name;


156  var depth = n.GetDepth();


157 


158  bool sort = n.SubtreeCount > 1 && commutativeSymbols.Contains(label);


159  var nSubtrees = n.Subtrees.Select(x => nodeMap[x]).ToList();


160  if (sort) nSubtrees.Sort((a, b) => string.CompareOrdinal(a.Label, b.Label));


161 


162  for (int i = list.Count  1; i >= 0; i) {


163  var w = list[i];


164  if (!(n.SubtreeCount == w.SubtreeCount && label == w.Label && depth == w.Depth))


165  continue;


166 


167  // sort V and W when the symbol is commutative because we are dealing with unordered trees


168  var m = w.SymbolicExpressionTreeNode;


169  var mSubtrees = m.Subtrees.Select(x => nodeMap[x]).ToList();


170  if (sort) mSubtrees.Sort((a, b) => string.CompareOrdinal(a.Label, b.Label));


171 


172  found = nSubtrees.SequenceEqual(mSubtrees);


173  if (found) {


174  nodeMap[n] = w;


175  break;


176  }


177  }


178 


179  if (!found) {


180  var w = new GraphNode { SymbolicExpressionTreeNode = n, Label = label, Depth = depth };


181  list.Add(w);


182  nodeMap[n] = w;


183  }


184  }


185 


186  if (n == n1  n == n2)


187  continue;


188 


189  var p = n.Parent;


190  if (p == null)


191  continue;


192 


193  childrenCount[p];


194 


195  if (childrenCount[p] == 0)


196  queue.Enqueue(p);


197  }


198 


199  return nodeMap;


200  }


201 


202  private IEnumerable<ISymbolicExpressionTreeNode> IterateBreadthOrdered(ISymbolicExpressionTreeNode node, ISymbolicExpressionTreeNodeComparer comparer) {


203  var list = new List<ISymbolicExpressionTreeNode> { node };


204  int i = 0;


205  while (i < list.Count) {


206  var n = list[i];


207  if (n.SubtreeCount > 0) {


208  var subtrees = commutativeSymbols.Contains(node.Symbol.Name) ? n.Subtrees.OrderBy(x => x, comparer) : n.Subtrees;


209  list.AddRange(subtrees);


210  }


211  i++;


212  }


213  return list;


214  }


215 


216  private static string GetLabel(ISymbolicExpressionTreeNode node) {


217  if (node.SubtreeCount > 0)


218  return node.Symbol.Name;


219 


220  var constant = node as ConstantTreeNode;


221  if (constant != null)


222  return constant.Value.ToString(CultureInfo.InvariantCulture);


223 


224  var variable = node as VariableTreeNode;


225  if (variable != null)


226  return variable.Weight + variable.VariableName;


227 


228  return node.ToString();


229  }


230 


231  private class GraphNode {


232  public ISymbolicExpressionTreeNode SymbolicExpressionTreeNode;


233  public string Label;


234  public int Depth;


235  public int SubtreeCount { get { return SymbolicExpressionTreeNode.SubtreeCount; } }


236  }


237  }


238  }

