Changeset 16263


Ignore:
Timestamp:
10/30/18 13:08:01 (21 months ago)
Author:
bburlacu
Message:

#2950: Refactor hashing to use unsigned long for hashes. Implement new DiversityPreservingCrossover which prevents subtrees with the same hash value from being swapped.

Location:
trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4
Files:
1 added
4 edited

Legend:

Unmodified
Added
Removed
  • trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisBuildingBlockAnalyzer.cs

    r16259 r16263  
    4141    private const string SimplifyTreesParameterName = "SimplifyTrees";
    4242
    43     private Dictionary<int, DataRow> hashToRow = new Dictionary<int, DataRow>();
     43    private Dictionary<ulong, DataRow> hashToRow = new Dictionary<ulong, DataRow>();
    4444
    4545    #region parameters
     
    6666      base.InitializeState();
    6767
    68       hashToRow = new Dictionary<int, DataRow>();
     68      hashToRow = new Dictionary<ulong, DataRow>();
    6969    }
    7070
     
    104104      var simplify = SimplifyTrees.Value;
    105105
    106       var expressions = new Dictionary<int, string>();
    107       var expressionCounts = new Dictionary<int, int>();
     106      var expressions = new Dictionary<ulong, string>();
     107      var expressionCounts = new Dictionary<ulong, int>();
    108108
    109109      int totalCount = 0; // total number of examined subtrees
  • trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashExtensions.cs

    r16261 r16263  
    3232
    3333      public bool Enabled;
    34       public int HashValue;           // the initial (fixed) hash value for this individual node/data
    35       public int CalculatedHashValue; // the calculated hash value (taking into account the children hash values)
     34      public ulong HashValue;           // the initial (fixed) hash value for this individual node/data
     35      public ulong CalculatedHashValue; // the calculated hash value (taking into account the children hash values)
    3636
    3737      public Action<HashNode<T>[], int> Simplify;
     
    6767
    6868      public override int GetHashCode() {
    69         return CalculatedHashValue;
     69        //return CalculatedHashValue;
     70        return Data.GetHashCode();
    7071      }
    7172
     
    7980    }
    8081
    81     public static int ComputeHash<T>(this HashNode<T>[] nodes, int i) where T : class {
     82    public static ulong ComputeHash<T>(this HashNode<T>[] nodes, int i) where T : class {
    8283      var node = nodes[i];
    83       var hashes = new int[node.Arity + 1];
     84      var hashes = new ulong[node.Arity + 1];
    8485      for (int j = i - 1, k = 0; k < node.Arity; j -= 1 + nodes[j].Size, k++) {
    8586        hashes[k] = nodes[j].CalculatedHashValue;
  • trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashUtil.cs

    r16261 r16263  
    2020#endregion
    2121
    22 using System;
    23 using System.Security.Cryptography;
    2422
    2523namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
     
    2826
    2927    // A simple hash function from Robert Sedgwicks Algorithms in C book.I've added some simple optimizations to the algorithm in order to speed up its hashing process.
    30     public static int RSHash(int[] input) {
     28    public static ulong RSHash(ulong[] input) {
    3129      const int b = 378551;
    32       int a = 63689;
    33       int hash = 0;
     30      ulong a = 63689;
     31      ulong hash = 0;
    3432
    3533      foreach (var v in input) {
     
    4139
    4240    // A bitwise hash function written by Justin Sobel
    43     public static int JSHash(int[] input) {
    44       int hash = 1315423911;
     41    public static ulong JSHash(ulong[] input) {
     42      ulong hash = 1315423911;
    4543      for (int i = 0; i < input.Length; ++i)
    4644        hash ^= (hash << 5) + input[i] + (hash >> 2);
     
    4947
    5048    // This hash function comes from Brian Kernighan and Dennis Ritchie's book "The C Programming Language". It is a simple hash function using a strange set of possible seeds which all constitute a pattern of 31....31...31 etc, it seems to be very similar to the DJB hash function.
    51     public static int BKDRHash(int[] input) {
    52       const int seed = 131;
    53       int hash = 0;
     49    public static ulong BKDRHash(ulong[] input) {
     50      ulong seed = 131;
     51      ulong hash = 0;
    5452      foreach (var v in input) {
    5553        hash = (hash * seed) + v;
     
    5957
    6058    // This is the algorithm of choice which is used in the open source SDBM project. The hash function seems to have a good over-all distribution for many different data sets. It seems to work well in situations where there is a high variance in the MSBs of the elements in a data set.
    61     public static int SDBMHash(int[] input) {
    62       int hash = 0;
     59    public static ulong SDBMHash(ulong[] input) {
     60      ulong hash = 0;
    6361      foreach (var v in input) {
    6462        hash = v + (hash << 6) + (hash << 16) - hash;
     
    6866
    6967    // An algorithm produced by Professor Daniel J. Bernstein and shown first to the world on the usenet newsgroup comp.lang.c. It is one of the most efficient hash functions ever published.
    70     public static int DJBHash(int[] input) {
    71       int hash = 5381;
     68    public static ulong DJBHash(ulong[] input) {
     69      ulong hash = 5381;
    7270      foreach (var v in input) {
    7371        hash = (hash << 5) + hash + v;
     
    7775
    7876    // An algorithm proposed by Donald E.Knuth in The Art Of Computer Programming Volume 3, under the topic of sorting and search chapter 6.4.
    79     public static int DEKHash(int[] input) {
    80       int hash = input.Length;
     77    public static ulong DEKHash(ulong[] input) {
     78      ulong hash = (ulong)input.Length;
    8179      foreach (var v in input) {
    8280        hash = (hash << 5) ^ (hash >> 27) ^ v;
     
    8583    }
    8684
    87     public static int CryptoHash(HashAlgorithm ha, int[] input) {
    88       return BitConverter.ToInt32(ha.ComputeHash(input.ToByteArray()), 0);
    89     }
     85    //public static ulong CryptoHash(HashAlgorithm ha, ulong[] input) {
     86    //  return BitConverter.ToInt32(ha.ComputeHash(input.ToByteArray()), 0);
     87    //}
    9088
    91     public static byte[] ToByteArray(this int[] input) {
    92       var bytes = new byte[input.Length * sizeof(int)];
    93       int pos = 0;
    94       foreach (var v in input) {
    95         var b0 = (byte)((v >> 24) & 0xFF);
    96         var b1 = (byte)((v >> 16) & 0xFF);
    97         var b2 = (byte)((v >> 8) & 0xFF);
    98         var b3 = (byte)(v & 0xFF);
    99         bytes[pos++] = b0;
    100         bytes[pos++] = b1;
    101         bytes[pos++] = b2;
    102         bytes[pos++] = b3;
    103       }
    104       return bytes;
    105     }
     89    //public static byte[] ToByteArray(this ulong[] input) {
     90    //  var bytes = new byte[input.Length * sizeof(int)];
     91    //  int pos = 0;
     92    //  foreach (var v in input) {
     93    //    var b0 = (byte)((v >> 24) & 0xFF);
     94    //    var b1 = (byte)((v >> 16) & 0xFF);
     95    //    var b2 = (byte)((v >> 8) & 0xFF);
     96    //    var b3 = (byte)(v & 0xFF);
     97    //    bytes[pos++] = b0;
     98    //    bytes[pos++] = b1;
     99    //    bytes[pos++] = b2;
     100    //    bytes[pos++] = b3;
     101    //  }
     102    //  return bytes;
     103    //}
    106104  }
    107105}
  • trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs

    r16255 r16263  
    3939    private static readonly ISymbolicExpressionTreeNodeComparer comparer = new SymbolicExpressionTreeNodeComparer();
    4040
    41     public static int ComputeHash(this ISymbolicExpressionTree tree) {
     41    public static ulong ComputeHash(this ISymbolicExpressionTree tree) {
    4242      return ComputeHash(tree.Root.GetSubtree(0).GetSubtree(0));
    4343    }
    4444
    4545    // compute node hashes without sorting the arguments
    46     public static Dictionary<ISymbolicExpressionTreeNode, int> ComputeNodeHashes(this ISymbolicExpressionTree tree) {
     46    public static Dictionary<ISymbolicExpressionTreeNode, ulong> ComputeNodeHashes(this ISymbolicExpressionTree tree) {
    4747      var root = tree.Root.GetSubtree(0).GetSubtree(0);
    4848      var nodes = root.MakeNodes();
     
    5757    }
    5858
    59     public static int ComputeHash(this ISymbolicExpressionTreeNode treeNode) {
     59    public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode) {
    6060      var hashNodes = treeNode.MakeNodes();
    6161      var simplified = hashNodes.Simplify();
    62       return ComputeHash(simplified);
    63     }
    64 
    65     public static int ComputeHash(this HashNode<ISymbolicExpressionTreeNode>[] nodes) {
    66       int hash = 1315423911;
    67       foreach (var node in nodes)
    68         hash ^= (hash << 5) + node.CalculatedHashValue + (hash >> 2);
    69       return hash;
    70     }
     62      //return ComputeHash(simplified);
     63      return simplified.Last().CalculatedHashValue;
     64    }
     65
     66    //public static int ComputeHash(this HashNode<ISymbolicExpressionTreeNode>[] nodes) {
     67    //  int hash = 1315423911;
     68    //  foreach (var node in nodes)
     69    //    hash ^= (hash << 5) + node.CalculatedHashValue + (hash >> 2);
     70    //  return hash;
     71    //}
    7172
    7273    public static HashNode<ISymbolicExpressionTreeNode> ToHashNode(this ISymbolicExpressionTreeNode node) {
     
    7778        name = variableTreeNode.VariableName;
    7879      }
    79       var hash = name.GetHashCode();
     80      var hash = (ulong)name.GetHashCode();
    8081      var hashNode = new HashNode<ISymbolicExpressionTreeNode>(comparer) {
    8182        Data = node,
Note: See TracChangeset for help on using the changeset viewer.