1  #region License Information


2  /* HeuristicLab


3  * Copyright (C) 20022019 Heuristic and Evolutionary Algorithms Laboratory (HEAL)


4  *


5  * This file is part of HeuristicLab.


6  *


7  * HeuristicLab is free software: you can redistribute it and/or modify


8  * it under the terms of the GNU General Public License as published by


9  * the Free Software Foundation, either version 3 of the License, or


10  * (at your option) any later version.


11  *


12  * HeuristicLab is distributed in the hope that it will be useful,


13  * but WITHOUT ANY WARRANTY; without even the implied warranty of


14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the


15  * GNU General Public License for more details.


16  *


17  * You should have received a copy of the GNU General Public License


18  * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.


19  */


20  #endregion


21 


22  using System;


23  using System.Collections.Generic;


24  using System.Linq;


25  using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;


26  using static HeuristicLab.Problems.DataAnalysis.Symbolic.SymbolicExpressionHashExtensions;


27 


28  namespace HeuristicLab.Problems.DataAnalysis.Symbolic {


29  public static class SymbolicExpressionTreeHash {


30  private static readonly Addition add = new Addition();


31  private static readonly Subtraction sub = new Subtraction();


32  private static readonly Multiplication mul = new Multiplication();


33  private static readonly Division div = new Division();


34  private static readonly Logarithm log = new Logarithm();


35  private static readonly Exponential exp = new Exponential();


36  private static readonly Sine sin = new Sine();


37  private static readonly Cosine cos = new Cosine();


38  private static readonly Constant constant = new Constant();


39 


40  private static ISymbolicExpressionTreeNode ActualRoot(this ISymbolicExpressionTree tree) => tree.Root.GetSubtree(0).GetSubtree(0);


41 


42  #region tree hashing


43  public static ulong[] Hash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) {


44  return tree.ActualRoot().Hash(simplify, strict);


45  }


46 


47  public static ulong[] Hash(this ISymbolicExpressionTreeNode node, bool simplify = false, bool strict = false) {


48  ulong hashFunction(byte[] input) => HashUtil.DJBHash(input);


49 


50  var hashNodes = simplify ? node.MakeNodes(strict).Simplify(hashFunction) : node.MakeNodes(strict).Sort(hashFunction);


51  var hashes = new ulong[hashNodes.Length];


52  for (int i = 0; i < hashes.Length; ++i) {


53  hashes[i] = hashNodes[i].CalculatedHashValue;


54  }


55  return hashes;


56  }


57 


58  public static ulong ComputeHash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) {


59  return ComputeHash(tree.ActualRoot(), simplify, strict);


60  }


61 


62  public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool simplify = false, bool strict = false) {


63  return treeNode.Hash(simplify, strict).Last();


64  }


65 


66  public static HashNode<ISymbolicExpressionTreeNode> ToHashNode(this ISymbolicExpressionTreeNode node, bool strict = false) {


67  var symbol = node.Symbol;


68  var name = symbol.Name;


69  if (node is ConstantTreeNode constantNode) {


70  name = strict ? constantNode.Value.ToString() : symbol.Name;


71  } else if (node is VariableTreeNode variableNode) {


72  name = strict ? variableNode.Weight.ToString() + variableNode.VariableName : variableNode.VariableName;


73  }


74  var hash = (ulong)name.GetHashCode();


75  var hashNode = new HashNode<ISymbolicExpressionTreeNode> {


76  Data = node,


77  Arity = node.SubtreeCount,


78  Size = node.SubtreeCount,


79  IsCommutative = node.Symbol is Addition  node.Symbol is Multiplication,


80  Enabled = true,


81  HashValue = hash,


82  CalculatedHashValue = hash


83  };


84  if (symbol is Addition) {


85  hashNode.Simplify = SimplifyAddition;


86  } else if (symbol is Multiplication) {


87  hashNode.Simplify = SimplifyMultiplication;


88  } else if (symbol is Division) {


89  hashNode.Simplify = SimplifyDivision;


90  } else if (symbol is Logarithm  symbol is Exponential  symbol is Sine  symbol is Cosine) {


91  hashNode.Simplify = SimplifyUnaryNode;


92  } else if (symbol is Subtraction) {


93  hashNode.Simplify = SimplifyBinaryNode;


94  }


95  return hashNode;


96  }


97 


98  public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTree tree, bool strict = false) {


99  return MakeNodes(tree.ActualRoot(), strict);


100  }


101 


102  public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTreeNode node, bool strict = false) {


103  return node.IterateNodesPostfix().Select(x => x.ToHashNode(strict)).ToArray().UpdateNodeSizes();


104  }


105  #endregion


106 


107  #region tree similarity


108  public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false, bool strict = false) {


109  return ComputeSimilarity(t1.ActualRoot(), t2.ActualRoot(), simplify, strict);


110  }


111 


112  public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false, bool strict = false) {


113  var lh = t1.Hash(simplify, strict);


114  var rh = t2.Hash(simplify, strict);


115 


116  Array.Sort(lh);


117  Array.Sort(rh);


118 


119  return ComputeSimilarity(lh, rh);


120  }


121 


122  // requires lhs and rhs to be sorted


123  public static int IntersectCount(this ulong[] lh, ulong[] rh) {


124  int count = 0;


125  for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {


126  var h1 = lh[i];


127  var h2 = rh[j];


128  if (h1 == h2) {


129  ++count;


130  ++i;


131  ++j;


132  } else if (h1 < h2) {


133  ++i;


134  } else if (h1 > h2) {


135  ++j;


136  }


137  }


138  return count;


139  }


140 


141  public static IEnumerable<ulong> Intersect(this ulong[] lh, ulong[] rh) {


142  for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {


143  var h1 = lh[i];


144  var h2 = rh[j];


145  if (h1 == h2) {


146  yield return h1;


147  ++i;


148  ++j;


149  } else if (h1 < h2) {


150  ++i;


151  } else if (h1 > h2) {


152  ++j;


153  }


154  }


155  }


156 


157  // this will only work if lh and rh are sorted


158  public static double ComputeSimilarity(ulong[] lh, ulong[] rh) {


159  return 2d * IntersectCount(lh, rh) / (lh.Length + rh.Length);


160  }


161 


162  public static double ComputeAverageSimilarity(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {


163  var total = trees.Count * (trees.Count  1) / 2;


164  double avg = 0;


165  var hashes = new ulong[trees.Count][];


166  // build hash arrays


167  for (int i = 0; i < trees.Count; ++i) {


168  var nodes = trees[i].MakeNodes(strict);


169  hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();


170  Array.Sort(hashes[i]);


171  }


172  // compute similarity matrix


173  for (int i = 0; i < trees.Count  1; ++i) {


174  for (int j = i + 1; j < trees.Count; ++j) {


175  avg += ComputeSimilarity(hashes[i], hashes[j]);


176  }


177  }


178  return avg / total;


179  }


180 


181  public static double[,] ComputeSimilarityMatrix(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {


182  var sim = new double[trees.Count, trees.Count];


183  var hashes = new ulong[trees.Count][];


184  // build hash arrays


185  for (int i = 0; i < trees.Count; ++i) {


186  var nodes = trees[i].MakeNodes(strict);


187  hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();


188  Array.Sort(hashes[i]);


189  }


190  // compute similarity matrix


191  for (int i = 0; i < trees.Count  1; ++i) {


192  for (int j = i + 1; j < trees.Count; ++j) {


193  sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);


194  }


195  }


196  return sim;


197  }


198  #endregion


199 


200  #region parse a nodes array back into a tree


201  public static ISymbolicExpressionTree ToTree(this HashNode<ISymbolicExpressionTreeNode>[] nodes) {


202  var root = new ProgramRootSymbol().CreateTreeNode();


203  var start = new StartSymbol().CreateTreeNode();


204  root.AddSubtree(start);


205  start.AddSubtree(nodes.ToSubtree());


206  return new SymbolicExpressionTree(root);


207  }


208 


209  public static ISymbolicExpressionTreeNode ToSubtree(this HashNode<ISymbolicExpressionTreeNode>[] nodes) {


210  var treeNodes = nodes.Select(x => x.Data.Symbol.CreateTreeNode()).ToArray();


211 


212  for (int i = nodes.Length  1; i >= 0; i) {


213  var node = nodes[i];


214 


215  if (node.IsLeaf) {


216  if (node.Data is VariableTreeNode variable) {


217  var variableTreeNode = (VariableTreeNode)treeNodes[i];


218  variableTreeNode.VariableName = variable.VariableName;


219  variableTreeNode.Weight = variable.Weight;


220  } else if (node.Data is ConstantTreeNode @const) {


221  var constantTreeNode = (ConstantTreeNode)treeNodes[i];


222  constantTreeNode.Value = @const.Value;


223  }


224  continue;


225  }


226 


227  var treeNode = treeNodes[i];


228 


229  foreach (var j in nodes.IterateChildren(i)) {


230  treeNode.AddSubtree(treeNodes[j]);


231  }


232  }


233 


234  return treeNodes.Last();


235  }


236 


237  private static T CreateTreeNode<T>(this ISymbol symbol) where T : class, ISymbolicExpressionTreeNode {


238  return (T)symbol.CreateTreeNode();


239  }


240  #endregion


241 


242  #region tree simplification


243  // these simplification methods rely on the assumption that child nodes of the current node have already been simplified


244  // (in other words simplification should be applied in a bottomup fashion)


245  public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) {


246  ulong hashFunction(byte[] bytes) => HashUtil.JSHash(bytes);


247  var root = tree.Root.GetSubtree(0).GetSubtree(0);


248  var nodes = root.MakeNodes();


249  var simplified = nodes.Simplify(hashFunction);


250  return simplified.ToTree();


251  }


252 


253  public static void SimplifyAddition(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {


254  // simplify additions of terms by eliminating terms with the same symbol and hash


255  var children = nodes.IterateChildren(i);


256 


257  // we always assume the child nodes are sorted


258  var curr = children[0];


259  var node = nodes[i];


260 


261  foreach (var j in children.Skip(1)) {


262  if (nodes[j] == nodes[curr]) {


263  nodes.SetEnabled(j, false);


264  node.Arity;


265  } else {


266  curr = j;


267  }


268  }


269  if (node.Arity == 1) { // if the arity is 1 we don't need the addition node at all


270  node.Enabled = false;


271  }


272  }


273 


274  // simplify multiplications by reducing constants and div terms


275  public static void SimplifyMultiplication(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {


276  var node = nodes[i];


277  var children = nodes.IterateChildren(i);


278 


279  for (int j = 0; j < children.Length; ++j) {


280  var c = children[j];


281  var child = nodes[c];


282 


283  if (!child.Enabled)


284  continue;


285 


286  var symbol = child.Data.Symbol;


287  if (symbol is Constant) {


288  for (int k = j + 1; k < children.Length; ++k) {


289  var d = children[k];


290  if (nodes[d].Data.Symbol is Constant) {


291  nodes[d].Enabled = false;


292  node.Arity;


293  } else {


294  break;


295  }


296  }


297  } else if (symbol is Division) {


298  var div = nodes[c];


299  var denominator =


300  div.Arity == 1 ?


301  nodes[c  1] : // 1 / x is expressed as div(x) (with a single child)


302  nodes[c  nodes[c  1].Size  2]; // assume division always has arity 1 or 2


303 


304  foreach (var d in children) {


305  if (nodes[d].Enabled && nodes[d] == denominator) {


306  nodes[c].Enabled = nodes[d].Enabled = denominator.Enabled = false;


307  node.Arity = 2; // matching child + division node


308  break;


309  }


310  }


311  }


312 


313  if (node.Arity == 0) { // if everything is simplified this node becomes constant


314  var constantTreeNode = constant.CreateTreeNode<ConstantTreeNode>();


315  constantTreeNode.Value = 1;


316  nodes[i] = constantTreeNode.ToHashNode();


317  } else if (node.Arity == 1) { // when i have only 1 arg left i can skip this node


318  node.Enabled = false;


319  }


320  }


321  }


322 


323  public static void SimplifyDivision(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {


324  var node = nodes[i];


325  var children = nodes.IterateChildren(i);


326 


327  var tmp = nodes;


328 


329  if (children.All(x => tmp[x].Data.Symbol is Constant)) {


330  var v = ((ConstantTreeNode)nodes[children.First()].Data).Value;


331  if (node.Arity == 1) {


332  v = 1 / v;


333  } else if (node.Arity > 1) {


334  foreach (var j in children.Skip(1)) {


335  v /= ((ConstantTreeNode)nodes[j].Data).Value;


336  }


337  }


338  var constantTreeNode = constant.CreateTreeNode<ConstantTreeNode>();


339  constantTreeNode.Value = v;


340  nodes[i] = constantTreeNode.ToHashNode();


341  return;


342  }


343 


344  var nominator = nodes[children[0]];


345  foreach (var j in children.Skip(1)) {


346  var denominator = nodes[j];


347  if (nominator == denominator) {


348  // disable all the children of the division node (nominator and children + denominator and children)


349  nominator.Enabled = denominator.Enabled = false;


350  node.Arity = 2; // nominator + denominator


351  }


352  if (node.Arity == 0) {


353  var constantTreeNode = constant.CreateTreeNode<ConstantTreeNode>();


354  constantTreeNode.Value = 1; // x / x = 1


355  nodes[i] = constantTreeNode.ToHashNode();


356  }


357  }


358  }


359 


360  public static void SimplifyUnaryNode(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {


361  // check if the child of the unary node is a constant, then the whole node can be simplified


362  var parent = nodes[i];


363  var child = nodes[i  1];


364 


365  var parentSymbol = parent.Data.Symbol;


366  var childSymbol = child.Data.Symbol;


367 


368  if (childSymbol is Constant) {


369  nodes[i].Enabled = false;


370  } else if ((parentSymbol is Exponential && childSymbol is Logarithm)  (parentSymbol is Logarithm && childSymbol is Exponential)) {


371  child.Enabled = parent.Enabled = false;


372  }


373  }


374 


375  public static void SimplifyBinaryNode(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {


376  var children = nodes.IterateChildren(i);


377  var tmp = nodes;


378  if (children.All(x => tmp[x].Data.Symbol is Constant)) {


379  foreach (var j in children) {


380  nodes[j].Enabled = false;


381  }


382  nodes[i] = constant.CreateTreeNode().ToHashNode();


383  }


384  }


385  #endregion


386  }


387  }

