1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Linq;


24  using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;


25  using static HeuristicLab.Problems.DataAnalysis.Symbolic.SymbolicExpressionHashExtensions;


26 


27  namespace HeuristicLab.Problems.DataAnalysis.Symbolic {


28  public static class SymbolicExpressionTreeHash {


29  private static readonly Addition add = new Addition();


30  private static readonly Subtraction sub = new Subtraction();


31  private static readonly Multiplication mul = new Multiplication();


32  private static readonly Division div = new Division();


33  private static readonly Logarithm log = new Logarithm();


34  private static readonly Exponential exp = new Exponential();


35  private static readonly Sine sin = new Sine();


36  private static readonly Cosine cos = new Cosine();


37  private static readonly Constant constant = new Constant();


38 


39  private static readonly ISymbolicExpressionTreeNodeComparer comparer = new SymbolicExpressionTreeNodeComparer();


40 


41  public static ulong ComputeHash(this ISymbolicExpressionTree tree) {


42  return ComputeHash(tree.Root.GetSubtree(0).GetSubtree(0));


43  }


44 


45  public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false) {


46  return ComputeSimilarity(t1.Root.GetSubtree(0).GetSubtree(0), t2.Root.GetSubtree(0).GetSubtree(0), simplify);


47  }


48 


49  public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false) {


50  HashNode<ISymbolicExpressionTreeNode>[] lhs;


51  HashNode<ISymbolicExpressionTreeNode>[] rhs;


52 


53  ulong hashFunction(byte[] input) => HashUtil.DJBHash(input);


54 


55  if (simplify) {


56  lhs = t1.MakeNodes().Simplify(hashFunction);


57  rhs = t2.MakeNodes().Simplify(hashFunction);


58  } else {


59  lhs = t1.MakeNodes().Sort(hashFunction); // sort calculates hash values


60  rhs = t2.MakeNodes().Sort(hashFunction);


61  }


62 


63  var lh = lhs.Select(x => x.CalculatedHashValue).ToArray();


64  var rh = rhs.Select(x => x.CalculatedHashValue).ToArray();


65 


66  Array.Sort(lh);


67  Array.Sort(rh);


68 


69  return ComputeSimilarity(lh, rh);


70  }


71 


72  // this will only work if lh and rh are sorted


73  private static double ComputeSimilarity(ulong[] lh, ulong[] rh) {


74  double count = 0;


75  for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {


76  var h1 = lh[i];


77  var h2 = rh[j];


78  if (h1 == h2) {


79  ++count;


80  ++i;


81  ++j;


82  } else if (h1 < h2) {


83  ++i;


84  } else if (h1 > h2) {


85  ++j;


86  }


87  }


88 


89  return 2d * count / (lh.Length + rh.Length);


90  }


91 


92  public static double ComputeAverageSimilarity(ISymbolicExpressionTree[] trees, bool simplify = false, bool strict = false) {


93  var total = (double)trees.Length * (trees.Length  1) / 2;


94  double avg = 0;


95  var hashes = new ulong[trees.Length][];


96  // build hash arrays


97  for (int i = 0; i < trees.Length; ++i) {


98  var nodes = trees[i].MakeNodes(strict);


99  hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();


100  Array.Sort(hashes[i]);


101  }


102  // compute similarity matrix


103  for (int i = 0; i < trees.Length  1; ++i) {


104  for (int j = i + 1; j < trees.Length; ++j) {


105  avg += ComputeSimilarity(hashes[i], hashes[j]);


106  }


107  }


108  return avg / total;


109  }


110 


111  public static double[,] ComputeSimilarityMatrix(ISymbolicExpressionTree[] trees, bool simplify = false, bool strict = false) {


112  var sim = new double[trees.Length, trees.Length];


113  var hashes = new ulong[trees.Length][];


114  // build hash arrays


115  for (int i = 0; i < trees.Length; ++i) {


116  var nodes = trees[i].MakeNodes(strict);


117  hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();


118  Array.Sort(hashes[i]);


119  }


120  // compute similarity matrix


121  for (int i = 0; i < trees.Length  1; ++i) {


122  for (int j = i + 1; j < trees.Length; ++j) {


123  sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);


124  }


125  }


126  return sim;


127  }


128 


129  public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool strict = false) {


130  ulong hashFunction(byte[] input) => HashUtil.JSHash(input);


131  var hashNodes = treeNode.MakeNodes(strict);


132  var simplified = hashNodes.Simplify(hashFunction);


133  return simplified.Last().CalculatedHashValue;


134  }


135 


136  public static HashNode<ISymbolicExpressionTreeNode> ToHashNode(this ISymbolicExpressionTreeNode node, bool strict = false) {


137  var symbol = node.Symbol;


138  var name = symbol.Name;


139  if (node is ConstantTreeNode constantNode) {


140  name = strict ? constantNode.Value.ToString() : symbol.Name;


141  } else if (node is VariableTreeNode variableNode) {


142  name = strict ? variableNode.Weight.ToString() + variableNode.VariableName : variableNode.VariableName;


143  }


144  var hash = (ulong)name.GetHashCode();


145  var hashNode = new HashNode<ISymbolicExpressionTreeNode>(comparer) {


146  Data = node,


147  Arity = node.SubtreeCount,


148  Size = node.SubtreeCount,


149  IsCommutative = node.Symbol is Addition  node.Symbol is Multiplication,


150  Enabled = true,


151  HashValue = hash,


152  CalculatedHashValue = hash


153  };


154  if (symbol is Addition) {


155  hashNode.Simplify = SimplifyAddition;


156  } else if (symbol is Multiplication) {


157  hashNode.Simplify = SimplifyMultiplication;


158  } else if (symbol is Division) {


159  hashNode.Simplify = SimplifyDivision;


160  } else if (symbol is Logarithm  symbol is Exponential  symbol is Sine  symbol is Cosine) {


161  hashNode.Simplify = SimplifyUnaryNode;


162  } else if (symbol is Subtraction) {


163  hashNode.Simplify = SimplifyBinaryNode;


164  }


165  return hashNode;


166  }


167 


168  public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTree tree, bool strict = false) {


169  return MakeNodes(tree.Root.GetSubtree(0).GetSubtree(0), strict);


170  }


171 


172  public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTreeNode node, bool strict = false) {


173  return node.IterateNodesPostfix().Select(x => x.ToHashNode(strict)).ToArray().UpdateNodeSizes();


174  }


175 


176  #region parse a nodes array back into a tree


177  public static ISymbolicExpressionTree ToTree(this HashNode<ISymbolicExpressionTreeNode>[] nodes) {


178  var root = new ProgramRootSymbol().CreateTreeNode();


179  var start = new StartSymbol().CreateTreeNode();


180  root.AddSubtree(start);


181  start.AddSubtree(nodes.ToSubtree());


182  return new SymbolicExpressionTree(root);


183  }


184 


185  public static ISymbolicExpressionTreeNode ToSubtree(this HashNode<ISymbolicExpressionTreeNode>[] nodes) {


186  var treeNodes = nodes.Select(x => x.Data.Symbol.CreateTreeNode()).ToArray();


187 


188  for (int i = nodes.Length  1; i >= 0; i) {


189  var node = nodes[i];


190 


191  if (node.IsLeaf) {


192  if (node.Data is VariableTreeNode variable) {


193  var variableTreeNode = (VariableTreeNode)treeNodes[i];


194  variableTreeNode.VariableName = variable.VariableName;


195  variableTreeNode.Weight = variable.Weight;


196  } else if (node.Data is ConstantTreeNode @const) {


197  var constantTreeNode = (ConstantTreeNode)treeNodes[i];


198  constantTreeNode.Value = @const.Value;


199  }


200  continue;


201  }


202 


203  var treeNode = treeNodes[i];


204 


205  foreach (var j in nodes.IterateChildren(i)) {


206  treeNode.AddSubtree(treeNodes[j]);


207  }


208  }


209 


210  return treeNodes.Last();


211  }


212 


213  private static T CreateTreeNode<T>(this ISymbol symbol) where T : class, ISymbolicExpressionTreeNode {


214  return (T)symbol.CreateTreeNode();


215  }


216  #endregion


217 


218  #region tree simplification


219  // these simplification methods rely on the assumption that child nodes of the current node have already been simplified


220  // (in other words simplification should be applied in a bottomup fashion)


221  public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) {


222  ulong hashFunction(byte[] bytes) => HashUtil.JSHash(bytes);


223  var root = tree.Root.GetSubtree(0).GetSubtree(0);


224  var nodes = root.MakeNodes();


225  var simplified = nodes.Simplify(hashFunction);


226  return simplified.ToTree();


227  }


228 


229  public static void SimplifyAddition(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {


230  // simplify additions of terms by eliminating terms with the same symbol and hash


231  var children = nodes.IterateChildren(i);


232 


233  // we always assume the child nodes are sorted


234  var curr = children[0];


235  var node = nodes[i];


236 


237  foreach (var j in children.Skip(1)) {


238  if (nodes[j] == nodes[curr]) {


239  nodes.SetEnabled(j, false);


240  node.Arity;


241  } else {


242  curr = j;


243  }


244  }


245  if (node.Arity == 1) { // if the arity is 1 we don't need the addition node at all


246  node.Enabled = false;


247  }


248  }


249 


250  // simplify multiplications by reducing constants and div terms


251  public static void SimplifyMultiplication(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {


252  var node = nodes[i];


253  var children = nodes.IterateChildren(i);


254 


255  for (int j = 0; j < children.Length; ++j) {


256  var c = children[j];


257  var child = nodes[c];


258 


259  if (!child.Enabled)


260  continue;


261 


262  var symbol = child.Data.Symbol;


263  if (symbol is Constant) {


264  for (int k = j + 1; k < children.Length; ++k) {


265  var d = children[k];


266  if (nodes[d].Data.Symbol is Constant) {


267  nodes[d].Enabled = false;


268  node.Arity;


269  } else {


270  break;


271  }


272  }


273  } else if (symbol is Division) {


274  var div = nodes[c];


275  var denominator =


276  div.Arity == 1 ?


277  nodes[c  1] : // 1 / x is expressed as div(x) (with a single child)


278  nodes[c  nodes[c  1].Size  2]; // assume division always has arity 1 or 2


279 


280  foreach (var d in children) {


281  if (nodes[d].Enabled && nodes[d] == denominator) {


282  nodes[c].Enabled = nodes[d].Enabled = denominator.Enabled = false;


283  node.Arity = 2; // matching child + division node


284  break;


285  }


286  }


287  }


288 


289  if (node.Arity == 0) { // if everything is simplified this node becomes constant


290  var constantTreeNode = constant.CreateTreeNode<ConstantTreeNode>();


291  constantTreeNode.Value = 1;


292  nodes[i] = constantTreeNode.ToHashNode();


293  } else if (node.Arity == 1) { // when i have only 1 arg left i can skip this node


294  node.Enabled = false;


295  }


296  }


297  }


298 


299  public static void SimplifyDivision(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {


300  var node = nodes[i];


301  var children = nodes.IterateChildren(i);


302 


303  var tmp = nodes;


304 


305  if (children.All(x => tmp[x].Data.Symbol is Constant)) {


306  var v = ((ConstantTreeNode)nodes[children.First()].Data).Value;


307  if (node.Arity == 1) {


308  v = 1 / v;


309  } else if (node.Arity > 1) {


310  foreach (var j in children.Skip(1)) {


311  v /= ((ConstantTreeNode)nodes[j].Data).Value;


312  }


313  }


314  var constantTreeNode = constant.CreateTreeNode<ConstantTreeNode>();


315  constantTreeNode.Value = v;


316  nodes[i] = constantTreeNode.ToHashNode();


317  return;


318  }


319 


320  var nominator = nodes[children[0]];


321  foreach (var j in children.Skip(1)) {


322  var denominator = nodes[j];


323  if (nominator == denominator) {


324  // disable all the children of the division node (nominator and children + denominator and children)


325  nominator.Enabled = denominator.Enabled = false;


326  node.Arity = 2; // nominator + denominator


327  }


328  if (node.Arity == 0) {


329  var constantTreeNode = constant.CreateTreeNode<ConstantTreeNode>();


330  constantTreeNode.Value = 1; // x / x = 1


331  nodes[i] = constantTreeNode.ToHashNode();


332  }


333  }


334  }


335 


336  public static void SimplifyUnaryNode(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {


337  // check if the child of the unary node is a constant, then the whole node can be simplified


338  var parent = nodes[i];


339  var child = nodes[i  1];


340 


341  var parentSymbol = parent.Data.Symbol;


342  var childSymbol = child.Data.Symbol;


343 


344  if (childSymbol is Constant) {


345  nodes[i].Enabled = false;


346  } else if ((parentSymbol is Exponential && childSymbol is Logarithm)  (parentSymbol is Logarithm && childSymbol is Exponential)) {


347  child.Enabled = parent.Enabled = false;


348  }


349  }


350 


351  public static void SimplifyBinaryNode(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {


352  var children = nodes.IterateChildren(i);


353  var tmp = nodes;


354  if (children.All(x => tmp[x].Data.Symbol is Constant)) {


355  foreach (var j in children) {


356  nodes[j].Enabled = false;


357  }


358  nodes[i] = constant.CreateTreeNode().ToHashNode();


359  }


360  }


361  #endregion


362  }


363  }

