Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
04/10/20 13:10:26 (5 years ago)
Author:
chaider
Message:

#2968 merged trunk into branch

Location:
branches/2968_infix_string_formatter/HeuristicLab.Problems.DataAnalysis.Symbolic
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/2968_infix_string_formatter/HeuristicLab.Problems.DataAnalysis.Symbolic

  • branches/2968_infix_string_formatter/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashExtensions.cs

    r16305 r17503  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2121
    2222using System;
    23 using System.Collections.Generic;
     23using System.Linq;
    2424
    2525namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
    2626  public static class SymbolicExpressionHashExtensions {
     27    /// <summary>
     28    /// Holds data that is necessary to handle tree nodes in hashing / simplification.
     29    /// </summary>
     30    /// <typeparam name="T">The tree node type</typeparam>
    2731    public sealed class HashNode<T> : IComparable<HashNode<T>>, IEquatable<HashNode<T>> where T : class {
    2832      public T Data;
     
    3842      public SimplifyAction Simplify;
    3943
    40       public IComparer<T> Comparer;
    41 
    4244      public bool IsLeaf => Arity == 0;
    4345
    44       public HashNode(IComparer<T> comparer) {
    45         Comparer = comparer;
    46       }
    47 
    48       private HashNode() { }
    49 
    5046      public int CompareTo(HashNode<T> other) {
    51         var res = Comparer.Compare(Data, other.Data);
     47        var res = HashValue.CompareTo(other.HashValue);
    5248        return res == 0 ? CalculatedHashValue.CompareTo(other.CalculatedHashValue) : res;
    5349      }
     
    10399
    104100    public static HashNode<T>[] Simplify<T>(this HashNode<T>[] nodes, Func<byte[], ulong> hashFunction) where T : class {
    105       var reduced = nodes.UpdateNodeSizes().Reduce().Sort(hashFunction);
    106 
    107       for (int i = 0; i < reduced.Length; ++i) {
    108         var node = reduced[i];
    109         if (node.IsLeaf) {
    110           continue;
    111         }
    112         node.Simplify?.Invoke(ref reduced, i);
    113       }
    114       // detect if anything was simplified
    115       var count = 0;
    116       foreach (var node in reduced) {
    117         if (!node.Enabled) { ++count; }
    118       }
    119       if (count == 0) {
    120         return reduced;
    121       }
    122 
    123       var simplified = new HashNode<T>[reduced.Length - count];
    124       int j = 0;
    125       foreach (var node in reduced) {
    126         if (node.Enabled) {
    127           simplified[j++] = node;
    128         }
    129       }
    130       return simplified.UpdateNodeSizes().Reduce().Sort(hashFunction);
     101      bool simplified = false;
     102      nodes = nodes.UpdateNodeSizes().Reduce().Sort(hashFunction);
     103      do {
     104        if (simplified) {
     105          simplified = false;
     106          nodes = nodes.Where(x => x.Enabled).ToArray().UpdateNodeSizes().Reduce().Sort(hashFunction);
     107        }
     108
     109        for (int i = 0; i < nodes.Length; ++i) {
     110          var node = nodes[i];
     111          if (node.IsLeaf) {
     112            continue;
     113          }
     114          node.Simplify?.Invoke(ref nodes, i);
     115          for (int j = i - node.Size; j <= i; ++j) {
     116            // detect if anything was simplified
     117            if (!nodes[j].Enabled) {
     118              simplified = true;
     119              break;
     120            }
     121          }
     122        }
     123      } while (simplified);
     124      return nodes.UpdateNodeSizes().Sort(hashFunction);
    131125    }
    132126
     
    173167
    174168    /// <summary>
    175     /// Get a function node's child indicest
     169    /// Get a function node's child indices
    176170    /// </summary>
    177171    /// <typeparam name="T">The data type encapsulated by a hash node</typeparam>
    178     /// <param name="nodes">An array of hash nodes with up-to-date node sizes</param>
     172    /// <param name="nodes">An array of hash nodes with up-to-date node sizes (see UpdateNodeSizes)</param>
    179173    /// <param name="i">The index in the array of hash nodes of the node whose children we want to iterate</param>
    180174    /// <returns>An array containing child indices</returns>
     
    191185    }
    192186
     187    /// <summary>
     188    /// Determines size of each branch and sets the results for each node.
     189    /// </summary>
     190    /// <typeparam name="T">The data type encapsulated by a hash node</typeparam>
     191    /// <param name="nodes">An array of hash nodes in postfix order.</param>
     192    /// <returns>The array with updated node sizes. The array is not copied.</returns>
    193193    public static HashNode<T>[] UpdateNodeSizes<T>(this HashNode<T>[] nodes) where T : class {
    194194      for (int i = 0; i < nodes.Length; ++i) {
     
    199199        }
    200200        node.Size = node.Arity;
    201 
     201        // visit all children and sum up their size (assumes postfix order).
    202202        for (int j = i - 1, k = 0; k < node.Arity; j -= 1 + nodes[j].Size, ++k) {
    203203          node.Size += nodes[j].Size;
     
    207207    }
    208208
    209     private static HashNode<T>[] Reduce<T>(this HashNode<T>[] nodes) where T : class {
     209    // disables duplicate branches and removes the disabled nodes
     210    public static HashNode<T>[] Reduce<T>(this HashNode<T>[] nodes) where T : class {
    210211      int count = 0;
    211212      for (int i = 0; i < nodes.Length; ++i) {
  • branches/2968_infix_string_formatter/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashUtil.cs

    r16272 r17503  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
  • branches/2968_infix_string_formatter/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs

    r16305 r17503  
    11#region License Information
    22/* HeuristicLab
    3  * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
     3 * Copyright (C) Heuristic and Evolutionary Algorithms Laboratory (HEAL)
    44 *
    55 * This file is part of HeuristicLab.
     
    2121
    2222using System;
     23using System.Collections.Generic;
    2324using System.Linq;
    2425using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
     
    3738    private static readonly Constant constant = new Constant();
    3839
    39     private static readonly ISymbolicExpressionTreeNodeComparer comparer = new SymbolicExpressionTreeNodeComparer();
    40 
    41     public static ulong ComputeHash(this ISymbolicExpressionTree tree) {
    42       return ComputeHash(tree.Root.GetSubtree(0).GetSubtree(0));
    43     }
    44 
    45     public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false) {
    46       return ComputeSimilarity(t1.Root.GetSubtree(0).GetSubtree(0), t2.Root.GetSubtree(0).GetSubtree(0), simplify);
    47     }
    48 
    49     public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false) {
    50       HashNode<ISymbolicExpressionTreeNode>[] lhs;
    51       HashNode<ISymbolicExpressionTreeNode>[] rhs;
    52 
    53       ulong hashFunction(byte[] input) => HashUtil.DJBHash(input);
    54 
    55       if (simplify) {
    56         lhs = t1.MakeNodes().Simplify(hashFunction);
    57         rhs = t2.MakeNodes().Simplify(hashFunction);
    58       } else {
    59         lhs = t1.MakeNodes().Sort(hashFunction); // sort calculates hash values
    60         rhs = t2.MakeNodes().Sort(hashFunction);
    61       }
    62 
    63       var lh = lhs.Select(x => x.CalculatedHashValue).ToArray();
    64       var rh = rhs.Select(x => x.CalculatedHashValue).ToArray();
    65 
    66       Array.Sort(lh);
    67       Array.Sort(rh);
    68 
    69       return ComputeSimilarity(lh, rh);
    70     }
    71 
    72     // this will only work if lh and rh are sorted
    73     private static double ComputeSimilarity(ulong[] lh, ulong[] rh) {
    74       double count = 0;
    75       for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {
    76         var h1 = lh[i];
    77         var h2 = rh[j];
    78         if (h1 == h2) {
    79           ++count;
    80           ++i;
    81           ++j;
    82         } else if (h1 < h2) {
    83           ++i;
    84         } else if (h1 > h2) {
    85           ++j;
    86         }
    87       }
    88 
    89       return 2d * count / (lh.Length + rh.Length);
    90     }
    91 
    92     public static double ComputeAverageSimilarity(ISymbolicExpressionTree[] trees, bool simplify = false, bool strict = false) {
    93       var total = (double)trees.Length * (trees.Length - 1) / 2;
    94       double avg = 0;
    95       var hashes = new ulong[trees.Length][];
    96       // build hash arrays
    97       for (int i = 0; i < trees.Length; ++i) {
    98         var nodes = trees[i].MakeNodes(strict);
    99         hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
    100         Array.Sort(hashes[i]);
    101       }
    102       // compute similarity matrix
    103       for (int i = 0; i < trees.Length - 1; ++i) {
    104         for (int j = i + 1; j < trees.Length; ++j) {
    105           avg += ComputeSimilarity(hashes[i], hashes[j]);
    106         }
    107       }
    108       return avg / total;
    109     }
    110 
    111     public static double[,] ComputeSimilarityMatrix(ISymbolicExpressionTree[] trees, bool simplify = false, bool strict = false) {
    112       var sim = new double[trees.Length, trees.Length];
    113       var hashes = new ulong[trees.Length][];
    114       // build hash arrays
    115       for (int i = 0; i < trees.Length; ++i) {
    116         var nodes = trees[i].MakeNodes(strict);
    117         hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray();
    118         Array.Sort(hashes[i]);
    119       }
    120       // compute similarity matrix
    121       for (int i = 0; i < trees.Length - 1; ++i) {
    122         for (int j = i + 1; j < trees.Length; ++j) {
    123           sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);
    124         }
    125       }
    126       return sim;
    127     }
    128 
    129     public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool strict = false) {
    130       ulong hashFunction(byte[] input) => HashUtil.JSHash(input);
    131       var hashNodes = treeNode.MakeNodes(strict);
    132       var simplified = hashNodes.Simplify(hashFunction);
    133       return simplified.Last().CalculatedHashValue;
     40    private static ISymbolicExpressionTreeNode ActualRoot(this ISymbolicExpressionTree tree) => tree.Root.GetSubtree(0).GetSubtree(0);
     41    public static ulong HashFunction(byte[] input) => HashUtil.DJBHash(input);
     42
     43    #region tree hashing
     44    public static ulong[] Hash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) {
     45      return tree.ActualRoot().Hash(simplify, strict);
     46    }
     47
     48    public static ulong[] Hash(this ISymbolicExpressionTreeNode node, bool simplify = false, bool strict = false) {
     49      var hashNodes = simplify ? node.MakeNodes(strict).Simplify(HashFunction) : node.MakeNodes(strict).Sort(HashFunction);
     50      var hashes = new ulong[hashNodes.Length];
     51      for (int i = 0; i < hashes.Length; ++i) {
     52        hashes[i] = hashNodes[i].CalculatedHashValue;
     53      }
     54      return hashes;
     55    }
     56
     57    public static ulong ComputeHash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) {
     58      return ComputeHash(tree.ActualRoot(), simplify, strict);
     59    }
     60
     61    public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool simplify = false, bool strict = false) {
     62      return treeNode.Hash(simplify, strict).Last();
    13463    }
    13564
     
    14372      }
    14473      var hash = (ulong)name.GetHashCode();
    145       var hashNode = new HashNode<ISymbolicExpressionTreeNode>(comparer) {
     74      var hashNode = new HashNode<ISymbolicExpressionTreeNode> {
    14675        Data = node,
    14776        Arity = node.SubtreeCount,
     
    16796
    16897    public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTree tree, bool strict = false) {
    169       return MakeNodes(tree.Root.GetSubtree(0).GetSubtree(0), strict);
     98      return MakeNodes(tree.ActualRoot(), strict);
    17099    }
    171100
     
    173102      return node.IterateNodesPostfix().Select(x => x.ToHashNode(strict)).ToArray().UpdateNodeSizes();
    174103    }
     104    #endregion
     105
     106    #region tree similarity
     107    public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false, bool strict = false) {
     108      return ComputeSimilarity(t1.ActualRoot(), t2.ActualRoot(), simplify, strict);
     109    }
     110
     111    public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false, bool strict = false) {
     112      var lh = t1.Hash(simplify, strict);
     113      var rh = t2.Hash(simplify, strict);
     114
     115      Array.Sort(lh);
     116      Array.Sort(rh);
     117
     118      return ComputeSimilarity(lh, rh);
     119    }
     120
     121    // requires lhs and rhs to be sorted
     122    public static int IntersectCount(this ulong[] lh, ulong[] rh) {
     123      int count = 0;
     124      for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {
     125        var h1 = lh[i];
     126        var h2 = rh[j];
     127        if (h1 == h2) {
     128          ++count;
     129          ++i;
     130          ++j;
     131        } else if (h1 < h2) {
     132          ++i;
     133        } else if (h1 > h2) {
     134          ++j;
     135        }
     136      }
     137      return count;
     138    }
     139
     140    public static IEnumerable<ulong> Intersect(this ulong[] lh, ulong[] rh) {
     141      for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) {
     142        var h1 = lh[i];
     143        var h2 = rh[j];
     144        if (h1 == h2) {
     145          yield return h1;
     146          ++i;
     147          ++j;
     148        } else if (h1 < h2) {
     149          ++i;
     150        } else if (h1 > h2) {
     151          ++j;
     152        }
     153      }
     154    }
     155
     156    // this will only work if lh and rh are sorted
     157    public static double ComputeSimilarity(ulong[] lh, ulong[] rh) {
     158      return 2d * IntersectCount(lh, rh) / (lh.Length + rh.Length);
     159    }
     160
     161    public static double ComputeAverageSimilarity(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {
     162      var total = trees.Count * (trees.Count - 1) / 2;
     163      double avg = 0;
     164      var hashes = new ulong[trees.Count][];
     165      // build hash arrays
     166      for (int i = 0; i < trees.Count; ++i) {
     167        var nodes = trees[i].MakeNodes(strict);
     168        hashes[i] = (simplify ? nodes.Simplify(HashFunction) : nodes.Sort(HashFunction)).Select(x => x.CalculatedHashValue).ToArray();
     169        Array.Sort(hashes[i]);
     170      }
     171      // compute similarity matrix
     172      for (int i = 0; i < trees.Count - 1; ++i) {
     173        for (int j = i + 1; j < trees.Count; ++j) {
     174          avg += ComputeSimilarity(hashes[i], hashes[j]);
     175        }
     176      }
     177      return avg / total;
     178    }
     179
     180    public static double[,] ComputeSimilarityMatrix(IList<ulong[]> hashes) {
     181      // compute similarity matrix
     182      var n = hashes.Count;
     183      var sim = new double[n, n];
     184      for (int i = 0; i < n - 1; ++i) {
     185        for (int j = i + 1; j < n; ++j) {
     186          sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]);
     187        }
     188      }
     189      return sim;
     190    }
     191
     192    public static double[,] ComputeSimilarityMatrix(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) {
     193      var hashes = new ulong[trees.Count][];
     194      // build hash arrays
     195      for (int i = 0; i < trees.Count; ++i) {
     196        var nodes = trees[i].MakeNodes(strict);
     197        hashes[i] = (simplify ? nodes.Simplify(HashFunction) : nodes.Sort(HashFunction)).Select(x => x.CalculatedHashValue).ToArray();
     198        Array.Sort(hashes[i]);
     199      }
     200      return ComputeSimilarityMatrix(hashes);
     201    }
     202    #endregion
    175203
    176204    #region parse a nodes array back into a tree
     
    218246    #region tree simplification
    219247    // these simplification methods rely on the assumption that child nodes of the current node have already been simplified
    220     // (in other words simplification should be applied in a bottom-up fashion) 
     248    // (in other words simplification should be applied in a bottom-up fashion)
    221249    public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) {
    222       ulong hashFunction(byte[] bytes) => HashUtil.JSHash(bytes);
    223       var root = tree.Root.GetSubtree(0).GetSubtree(0);
    224       var nodes = root.MakeNodes();
    225       var simplified = nodes.Simplify(hashFunction);
    226       return simplified.ToTree();
     250      return tree.MakeNodes().Simplify(HashFunction).ToTree();
    227251    }
    228252
     
    248272    }
    249273
    250     // simplify multiplications by reducing constants and div terms 
     274    // simplify multiplications by reducing constants and div terms
    251275    public static void SimplifyMultiplication(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {
    252276      var node = nodes[i];
     
    261285
    262286        var symbol = child.Data.Symbol;
    263         if (symbol is Constant) {
     287        if (child.Data is ConstantTreeNode firstConst) {
     288          // fold sibling constant nodes into the first constant
    264289          for (int k = j + 1; k < children.Length; ++k) {
    265             var d = children[k];
    266             if (nodes[d].Data.Symbol is Constant) {
    267               nodes[d].Enabled = false;
     290            var sibling = nodes[children[k]];
     291            if (sibling.Data is ConstantTreeNode otherConst) {
     292              sibling.Enabled = false;
    268293              node.Arity--;
     294              firstConst.Value *= otherConst.Value;
     295            } else {
     296              break;
     297            }
     298          }
     299        } else if (child.Data is VariableTreeNode variable) {
     300          // fold sibling constant nodes into the variable weight
     301          for (int k = j + 1; k < children.Length; ++k) {
     302            var sibling = nodes[children[k]];
     303            if (sibling.Data is ConstantTreeNode constantNode) {
     304              sibling.Enabled = false;
     305              node.Arity--;
     306              variable.Weight *= constantNode.Value;
    269307            } else {
    270308              break;
     
    272310          }
    273311        } else if (symbol is Division) {
    274           var div = nodes[c];
    275           var denominator =
    276             div.Arity == 1 ?
    277             nodes[c - 1] :                    // 1 / x is expressed as div(x) (with a single child)
    278             nodes[c - nodes[c - 1].Size - 2]; // assume division always has arity 1 or 2
    279 
    280           foreach (var d in children) {
    281             if (nodes[d].Enabled && nodes[d] == denominator) {
    282               nodes[c].Enabled = nodes[d].Enabled = denominator.Enabled = false;
     312          // 1/x is expressed as div(x) (with a single child)
     313          // we assume division always has arity 1 or 2
     314          var d = child.Arity == 1 ? c - 1 : c - nodes[c - 1].Size - 2;
     315          var denominator = nodes[d];
     316
     317          // iterate children of node i to see if any of them matches the denominator of div node c
     318          for (int k = 0; k < children.Length; ++k) {
     319            var sibling = nodes[children[k]];
     320            if (sibling.Enabled && sibling == denominator) {
     321              nodes.SetEnabled(children[j], false); // disable the div subtree
     322              nodes.SetEnabled(children[k], false); // disable the sibling matching the denominator
     323
    283324              node.Arity -= 2; // matching child + division node
    284325              break;
Note: See TracChangeset for help on using the changeset viewer.