Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
11/02/18 16:20:33 (6 years ago)
Author:
bburlacu
Message:

#2950: Refactor hash extensions and utility methods: hashes are computed from byte[] arrays, simplification accepts an argument specifying which hash function to use. Update SymbolicDataAnalysisBuildingBlockAnalyzer and SymbolicDataAnalysisExpressionDiversityPreservingCrossover.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashUtil.cs

    r16263 r16272  
    2121
    2222
     23using System;
     24using System.Security.Cryptography;
     25
    2326namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
    2427  public static class HashUtil {
     
    2629
    2730    // A simple hash function from Robert Sedgwicks Algorithms in C book.I've added some simple optimizations to the algorithm in order to speed up its hashing process.
    28     public static ulong RSHash(ulong[] input) {
     31    public static ulong RSHash(byte[] input) {
    2932      const int b = 378551;
    3033      ulong a = 63689;
     
    3942
    4043    // A bitwise hash function written by Justin Sobel
    41     public static ulong JSHash(ulong[] input) {
     44    public static ulong JSHash(byte[] input) {
    4245      ulong hash = 1315423911;
    4346      for (int i = 0; i < input.Length; ++i)
     
    4750
    4851    // This hash function comes from Brian Kernighan and Dennis Ritchie's book "The C Programming Language". It is a simple hash function using a strange set of possible seeds which all constitute a pattern of 31....31...31 etc, it seems to be very similar to the DJB hash function.
    49     public static ulong BKDRHash(ulong[] input) {
     52    public static ulong BKDRHash(byte[] input) {
    5053      ulong seed = 131;
    5154      ulong hash = 0;
     
    5760
    5861    // This is the algorithm of choice which is used in the open source SDBM project. The hash function seems to have a good over-all distribution for many different data sets. It seems to work well in situations where there is a high variance in the MSBs of the elements in a data set.
    59     public static ulong SDBMHash(ulong[] input) {
     62    public static ulong SDBMHash(byte[] input) {
    6063      ulong hash = 0;
    6164      foreach (var v in input) {
     
    6669
    6770    // An algorithm produced by Professor Daniel J. Bernstein and shown first to the world on the usenet newsgroup comp.lang.c. It is one of the most efficient hash functions ever published.
    68     public static ulong DJBHash(ulong[] input) {
     71    public static ulong DJBHash(byte[] input) {
    6972      ulong hash = 5381;
    7073      foreach (var v in input) {
     
    7578
    7679    // An algorithm proposed by Donald E.Knuth in The Art Of Computer Programming Volume 3, under the topic of sorting and search chapter 6.4.
    77     public static ulong DEKHash(ulong[] input) {
     80    public static ulong DEKHash(byte[] input) {
    7881      ulong hash = (ulong)input.Length;
    7982      foreach (var v in input) {
     
    8386    }
    8487
    85     //public static ulong CryptoHash(HashAlgorithm ha, ulong[] input) {
    86     //  return BitConverter.ToInt32(ha.ComputeHash(input.ToByteArray()), 0);
    87     //}
    88 
    89     //public static byte[] ToByteArray(this ulong[] input) {
    90     //  var bytes = new byte[input.Length * sizeof(int)];
    91     //  int pos = 0;
    92     //  foreach (var v in input) {
    93     //    var b0 = (byte)((v >> 24) & 0xFF);
    94     //    var b1 = (byte)((v >> 16) & 0xFF);
    95     //    var b2 = (byte)((v >> 8) & 0xFF);
    96     //    var b3 = (byte)(v & 0xFF);
    97     //    bytes[pos++] = b0;
    98     //    bytes[pos++] = b1;
    99     //    bytes[pos++] = b2;
    100     //    bytes[pos++] = b3;
    101     //  }
    102     //  return bytes;
    103     //}
     88    public static ulong CryptoHash(HashAlgorithm ha, byte[] input) {
     89      return BitConverter.ToUInt64(ha.ComputeHash(input), 0);
     90    }
    10491  }
    10592}
Note: See TracChangeset for help on using the changeset viewer.