1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Collections.Generic;


24  using System.Linq;


25  using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;


26  using static HeuristicLab.Problems.DataAnalysis.Symbolic.SymbolicExpressionHashExtensions;


27 


28  namespace HeuristicLab.Problems.DataAnalysis.Symbolic {


29  public static class SymbolicExpressionTreeHash {


30  private static readonly Addition add = new Addition();


31  private static readonly Subtraction sub = new Subtraction();


32  private static readonly Multiplication mul = new Multiplication();


33  private static readonly Division div = new Division();


34  private static readonly Logarithm log = new Logarithm();


35  private static readonly Exponential exp = new Exponential();


36  private static readonly Sine sin = new Sine();


37  private static readonly Cosine cos = new Cosine();


38  private static readonly Constant constant = new Constant();


39 


40  private static readonly ISymbolicExpressionTreeNodeComparer comparer = new SymbolicExpressionTreeNodeComparer();


41  private static ISymbolicExpressionTreeNode ActualRoot(this ISymbolicExpressionTree tree) => tree.Root.GetSubtree(0).GetSubtree(0);


42 


43  #region tree hashing


44  public static ulong[] Hash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) {


45  return tree.ActualRoot().Hash(simplify, strict);


46  }


47 


48  public static ulong[] Hash(this ISymbolicExpressionTreeNode node, bool simplify = false, bool strict = false) {


49  ulong hashFunction(byte[] input) => HashUtil.DJBHash(input);


50 


51  var hashNodes = simplify ? node.MakeNodes(strict).Simplify(hashFunction) : node.MakeNodes(strict).Sort(hashFunction);


52  var hashes = new ulong[hashNodes.Length];


53  for (int i = 0; i < hashes.Length; ++i) {


54  hashes[i] = hashNodes[i].CalculatedHashValue;


55  }


56  return hashes;


57  }


58 


59  public static ulong ComputeHash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) {


60  return ComputeHash(tree.ActualRoot(), simplify, strict);


61  }


62 


63  public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool simplify = false, bool strict = false) {


64  return treeNode.Hash(simplify, strict).Last();


65  }


66 


67  public static HashNode<ISymbolicExpressionTreeNode> ToHashNode(this ISymbolicExpressionTreeNode node, bool strict = false) {


68  var symbol = node.Symbol;


69  var name = symbol.Name;


70  if (node is ConstantTreeNode constantNode) {


71  name = strict ? constantNode.Value.ToString() : symbol.Name;


72  } else if (node is VariableTreeNode variableNode) {


73  name = strict ? variableNode.Weight.ToString() + variableNode.VariableName : variableNode.VariableName;


74  }


75  var hash = (ulong)name.GetHashCode();


76  var hashNode = new HashNode<ISymbolicExpressionTreeNode>(comparer) {


77  Data = node,


78  Arity = node.SubtreeCount,


79  Size = node.SubtreeCount,


80  IsCommutative = node.Symbol is Addition  node.Symbol is Multiplication,


81  Enabled = true,


82  HashValue = hash,


83  CalculatedHashValue = hash


84  };


85  if (symbol is Addition) {


86  hashNode.Simplify = SimplifyAddition;


87  } else if (symbol is Multiplication) {


88  hashNode.Simplify = SimplifyMultiplication;


89  } else if (symbol is Division) {


90  hashNode.Simplify = SimplifyDivision;


91  } else if (symbol is Logarithm  symbol is Exponential  symbol is Sine  symbol is Cosine) {


92  hashNode.Simplify = SimplifyUnaryNode;


93  } else if (symbol is Subtraction) {


94  hashNode.Simplify = SimplifyBinaryNode;


95  }


96  return hashNode;


97  }


98 


99  public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTree tree, bool strict = false) {


100  return MakeNodes(tree.ActualRoot(), strict);


101  }


102 


103  public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTreeNode node, bool strict = false) {


104  return node.IterateNodesPostfix().Select(x => x.ToHashNode(strict)).ToArray().UpdateNodeSizes();


105  }


106  #endregion


107 


108  #region tree similarity


109  public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false, bool strict = false) {


110  return ComputeSimilarity(t1.ActualRoot(), t2.ActualRoot(), simplify, strict);


111  }


112 


113  public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false, bool strict = false) {


114  var lh = t1.Hash(simplify, strict);


115  var rh = t2.Hash(simplify, strict);


116 


117  Array.Sort(lh);


118  Array.Sort(rh);


119 


120  return ComputeSimilarity(lh, rh);


121  }


122 


123  // requires lhs and rhs to be sorted


124  public static int IntersectCount(this ulong[] lh, ulong[] rh) {


125  int count = 0;


126  for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {


127  var h1 = lh[i];


128  var h2 = rh[j];


129  if (h1 == h2) {


130  ++count;


131  ++i;


132  ++j;


133  } else if (h1 < h2) {


134  ++i;


135  } else if (h1 > h2) {


136  ++j;


137  }


138  }


139  return count;


140  }


141 


142  public static IEnumerable<ulong> Intersect(this ulong[] lh, ulong[] rh) {


143  for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {


144  var h1 = lh[i];


145  var h2 = rh[j];


146  if (h1 == h2) {


147  yield return h1;


148  ++i;


149  ++j;


150  } else if (h1 < h2) {


151  ++i;


152  } else if (h1 > h2) {


153  ++j;


154  }


155  }


156  }


157 


158  // this will only work if lh and rh are sorted


159  public static double ComputeSimilarity(ulong[] lh, ulong[] rh) {


160  return 2d * IntersectCount(lh, rh) / (lh.Length + rh.Length);


161  }


162 


163  public static double ComputeAverageSimilarity(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {


164  var total = trees.Count * (trees.Count  1) / 2;


165  double avg = 0;


166  var hashes = new ulong[trees.Count][];


167  // build hash arrays


168  for (int i = 0; i < trees.Count; ++i) {


169  var nodes = trees[i].MakeNodes(strict);


170  hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();


171  Array.Sort(hashes[i]);


172  }


173  // compute similarity matrix


174  for (int i = 0; i < trees.Count  1; ++i) {


175  for (int j = i + 1; j < trees.Count; ++j) {


176  avg += ComputeSimilarity(hashes[i], hashes[j]);


177  }


178  }


179  return avg / total;


180  }


181 


182  public static double[,] ComputeSimilarityMatrix(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {


183  var sim = new double[trees.Count, trees.Count];


184  var hashes = new ulong[trees.Count][];


185  // build hash arrays


186  for (int i = 0; i < trees.Count; ++i) {


187  var nodes = trees[i].MakeNodes(strict);


188  hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();


189  Array.Sort(hashes[i]);


190  }


191  // compute similarity matrix


192  for (int i = 0; i < trees.Count  1; ++i) {


193  for (int j = i + 1; j < trees.Count; ++j) {


194  sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);


195  }


196  }


197  return sim;


198  }


199  #endregion


200 


201  #region parse a nodes array back into a tree


202  public static ISymbolicExpressionTree ToTree(this HashNode<ISymbolicExpressionTreeNode>[] nodes) {


203  var root = new ProgramRootSymbol().CreateTreeNode();


204  var start = new StartSymbol().CreateTreeNode();


205  root.AddSubtree(start);


206  start.AddSubtree(nodes.ToSubtree());


207  return new SymbolicExpressionTree(root);


208  }


209 


210  public static ISymbolicExpressionTreeNode ToSubtree(this HashNode<ISymbolicExpressionTreeNode>[] nodes) {


211  var treeNodes = nodes.Select(x => x.Data.Symbol.CreateTreeNode()).ToArray();


212 


213  for (int i = nodes.Length  1; i >= 0; i) {


214  var node = nodes[i];


215 


216  if (node.IsLeaf) {


217  if (node.Data is VariableTreeNode variable) {


218  var variableTreeNode = (VariableTreeNode)treeNodes[i];


219  variableTreeNode.VariableName = variable.VariableName;


220  variableTreeNode.Weight = variable.Weight;


221  } else if (node.Data is ConstantTreeNode @const) {


222  var constantTreeNode = (ConstantTreeNode)treeNodes[i];


223  constantTreeNode.Value = @const.Value;


224  }


225  continue;


226  }


227 


228  var treeNode = treeNodes[i];


229 


230  foreach (var j in nodes.IterateChildren(i)) {


231  treeNode.AddSubtree(treeNodes[j]);


232  }


233  }


234 


235  return treeNodes.Last();


236  }


237 


238  private static T CreateTreeNode<T>(this ISymbol symbol) where T : class, ISymbolicExpressionTreeNode {


239  return (T)symbol.CreateTreeNode();


240  }


241  #endregion


242 


243  #region tree simplification


244  // these simplification methods rely on the assumption that child nodes of the current node have already been simplified


245  // (in other words simplification should be applied in a bottomup fashion)


246  public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) {


247  ulong hashFunction(byte[] bytes) => HashUtil.JSHash(bytes);


248  var root = tree.Root.GetSubtree(0).GetSubtree(0);


249  var nodes = root.MakeNodes();


250  var simplified = nodes.Simplify(hashFunction);


251  return simplified.ToTree();


252  }


253 


254  public static void SimplifyAddition(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {


255  // simplify additions of terms by eliminating terms with the same symbol and hash


256  var children = nodes.IterateChildren(i);


257 


258  // we always assume the child nodes are sorted


259  var curr = children[0];


260  var node = nodes[i];


261 


262  foreach (var j in children.Skip(1)) {


263  if (nodes[j] == nodes[curr]) {


264  nodes.SetEnabled(j, false);


265  node.Arity;


266  } else {


267  curr = j;


268  }


269  }


270  if (node.Arity == 1) { // if the arity is 1 we don't need the addition node at all


271  node.Enabled = false;


272  }


273  }


274 


275  // simplify multiplications by reducing constants and div terms


276  public static void SimplifyMultiplication(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {


277  var node = nodes[i];


278  var children = nodes.IterateChildren(i);


279 


280  for (int j = 0; j < children.Length; ++j) {


281  var c = children[j];


282  var child = nodes[c];


283 


284  if (!child.Enabled)


285  continue;


286 


287  var symbol = child.Data.Symbol;


288  if (symbol is Constant) {


289  for (int k = j + 1; k < children.Length; ++k) {


290  var d = children[k];


291  if (nodes[d].Data.Symbol is Constant) {


292  nodes[d].Enabled = false;


293  node.Arity;


294  } else {


295  break;


296  }


297  }


298  } else if (symbol is Division) {


299  var div = nodes[c];


300  var denominator =


301  div.Arity == 1 ?


302  nodes[c  1] : // 1 / x is expressed as div(x) (with a single child)


303  nodes[c  nodes[c  1].Size  2]; // assume division always has arity 1 or 2


304 


305  foreach (var d in children) {


306  if (nodes[d].Enabled && nodes[d] == denominator) {


307  nodes[c].Enabled = nodes[d].Enabled = denominator.Enabled = false;


308  node.Arity = 2; // matching child + division node


309  break;


310  }


311  }


312  }


313 


314  if (node.Arity == 0) { // if everything is simplified this node becomes constant


315  var constantTreeNode = constant.CreateTreeNode<ConstantTreeNode>();


316  constantTreeNode.Value = 1;


317  nodes[i] = constantTreeNode.ToHashNode();


318  } else if (node.Arity == 1) { // when i have only 1 arg left i can skip this node


319  node.Enabled = false;


320  }


321  }


322  }


323 


324  public static void SimplifyDivision(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {


325  var node = nodes[i];


326  var children = nodes.IterateChildren(i);


327 


328  var tmp = nodes;


329 


330  if (children.All(x => tmp[x].Data.Symbol is Constant)) {


331  var v = ((ConstantTreeNode)nodes[children.First()].Data).Value;


332  if (node.Arity == 1) {


333  v = 1 / v;


334  } else if (node.Arity > 1) {


335  foreach (var j in children.Skip(1)) {


336  v /= ((ConstantTreeNode)nodes[j].Data).Value;


337  }


338  }


339  var constantTreeNode = constant.CreateTreeNode<ConstantTreeNode>();


340  constantTreeNode.Value = v;


341  nodes[i] = constantTreeNode.ToHashNode();


342  return;


343  }


344 


345  var nominator = nodes[children[0]];


346  foreach (var j in children.Skip(1)) {


347  var denominator = nodes[j];


348  if (nominator == denominator) {


349  // disable all the children of the division node (nominator and children + denominator and children)


350  nominator.Enabled = denominator.Enabled = false;


351  node.Arity = 2; // nominator + denominator


352  }


353  if (node.Arity == 0) {


354  var constantTreeNode = constant.CreateTreeNode<ConstantTreeNode>();


355  constantTreeNode.Value = 1; // x / x = 1


356  nodes[i] = constantTreeNode.ToHashNode();


357  }


358  }


359  }


360 


361  public static void SimplifyUnaryNode(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {


362  // check if the child of the unary node is a constant, then the whole node can be simplified


363  var parent = nodes[i];


364  var child = nodes[i  1];


365 


366  var parentSymbol = parent.Data.Symbol;


367  var childSymbol = child.Data.Symbol;


368 


369  if (childSymbol is Constant) {


370  nodes[i].Enabled = false;


371  } else if ((parentSymbol is Exponential && childSymbol is Logarithm)  (parentSymbol is Logarithm && childSymbol is Exponential)) {


372  child.Enabled = parent.Enabled = false;


373  }


374  }


375 


376  public static void SimplifyBinaryNode(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {


377  var children = nodes.IterateChildren(i);


378  var tmp = nodes;


379  if (children.All(x => tmp[x].Data.Symbol is Constant)) {


380  foreach (var j in children) {


381  nodes[j].Enabled = false;


382  }


383  nodes[i] = constant.CreateTreeNode().ToHashNode();


384  }


385  }


386  #endregion


387  }


388  }

