Free cookie consent management tool by TermsFeed Policy Generator

Ignore:
Timestamp:
11/02/18 16:20:33 (6 years ago)
Author:
bburlacu
Message:

#2950: Refactor hash extensions and utility methods: hashes are computed from byte[] arrays, simplification accepts an argument specifying which hash function to use. Update SymbolicDataAnalysisBuildingBlockAnalyzer and SymbolicDataAnalysisExpressionDiversityPreservingCrossover.

Location:
trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4
Files:
6 edited

Legend:

Unmodified
Added
Removed
  • trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisBuildingBlockAnalyzer.cs

    r16271 r16272  
    2020#endregion
    2121
     22using System;
    2223using System.Collections.Generic;
    2324using System.Linq;
     
    3839  public sealed class SymbolicDataAnalysisBuildingBlockAnalyzer : SymbolicDataAnalysisAnalyzer {
    3940    private const string BuildingBlocksResultName = "BuildingBlocks";
     41    private const string SolutionUniquenessResultName = "SolutionUniqueness";
    4042    private const string MinimumSubtreeLengthParameterName = "MinimumSubtreeLength";
    4143    private const string SimplifyTreesParameterName = "SimplifyTrees";
     
    9092    [StorableConstructor]
    9193    private SymbolicDataAnalysisBuildingBlockAnalyzer(bool deserializing) : base(deserializing) { }
     94
     95    private readonly Func<byte[], ulong> hashFunction = HashUtil.JSHash;
    9296
    9397    public override IOperation Apply() {
     
    109113      int totalCount = 0; // total number of examined subtrees
    110114
     115      var hashes = new List<ulong>();
    111116      // count hashes
    112117      foreach (var tree in SymbolicExpressionTree) {
    113118        var hashNodes = tree.Root.GetSubtree(0).GetSubtree(0).MakeNodes();
    114         var simplified = simplify ? hashNodes.Simplify() : hashNodes.Sort();
     119        var simplified = simplify ? hashNodes.Simplify(hashFunction) : hashNodes.Sort(hashFunction);
     120        hashes.Add(simplified.Last().CalculatedHashValue); // maybe calculate aggregate hash instead
    115121
    116122        for (int i = 0; i < simplified.Length; i++) {
     
    165171      }
    166172
     173      // compute solution uniqueness
     174      DataTableHistory dth;
     175      var counts = hashes.GroupBy(x => x).ToDictionary(x => x.Key, x => x.Count());
     176      if (!ResultCollection.ContainsKey(SolutionUniquenessResultName)) {
     177        dth = new DataTableHistory();
     178        ResultCollection.Add(new Result(SolutionUniquenessResultName, dth));
     179      } else {
     180        dth = (DataTableHistory)ResultCollection[SolutionUniquenessResultName].Value;
     181      }
     182
     183      var ct = new DataTable("Unique Solutions");
     184      var ctr = new DataRow { VisualProperties = { StartIndexZero = true, ChartType = DataRowVisualProperties.DataRowChartType.Columns } };
     185      ctr.Values.AddRange(hashes.Select(x => (double)counts[x]).OrderByDescending(x => x));
     186      ct.Rows.Add(ctr);
     187      dth.Add(ct);
     188
     189      var max = dth.Max(x => x.Rows.First().Values.Max());
     190      foreach (var table in dth) {
     191        table.VisualProperties.YAxisMinimumAuto = false;
     192        table.VisualProperties.YAxisMaximumAuto = false;
     193        table.VisualProperties.YAxisMinimumFixedValue = 0;
     194        table.VisualProperties.YAxisMaximumFixedValue = max;
     195      }
     196
    167197      return base.Apply();
    168198    }
  • trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Crossovers/SymbolicDataAnalysisExpressionDiversityPreservingCrossover.cs

    r16270 r16272  
    2020    private const string WindowingParameterName = "Windowing";
    2121    private const string ProportionalSamplingParameterName = "ProportionalSampling";
     22
     23    private static readonly Func<byte[], ulong> hashFunction = HashUtil.JSHash;
    2224
    2325    #region Parameter Properties
     
    7375      var leafCrossoverPointProbability = 1 - internalCrossoverPointProbability;
    7476
    75       var nodes0 = ActualRoot(parent0).MakeNodes().Sort();
    76       var nodes1 = ActualRoot(parent1).MakeNodes().Sort();
     77      var nodes0 = ActualRoot(parent0).MakeNodes().Sort(hashFunction);
     78      var nodes1 = ActualRoot(parent1).MakeNodes().Sort(hashFunction);
    7779
    7880      IList<HashNode<ISymbolicExpressionTreeNode>> sampled0;
  • trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashExtensions.cs

    r16267 r16272  
    6767
    6868      public override int GetHashCode() {
    69         //return CalculatedHashValue;
    70         return Data.GetHashCode();
     69        return (int)CalculatedHashValue;
    7170      }
    7271
     
    8079    }
    8180
    82     public static ulong ComputeHash<T>(this HashNode<T>[] nodes, int i) where T : class {
     81    public static ulong ComputeHash<T>(this HashNode<T>[] nodes, int i, Func<byte[], ulong> hashFunction) where T : class {
    8382      var node = nodes[i];
    84       var hashes = new ulong[node.Arity + 1];
    85       for (int j = i - 1, k = 0; k < node.Arity; j -= 1 + nodes[j].Size, k++) {
    86         hashes[k] = nodes[j].CalculatedHashValue;
    87       }
    88       hashes[node.Arity] = node.HashValue;
    89       return HashUtil.JSHash(hashes);
    90     }
    91 
    92     public static HashNode<T>[] Simplify<T>(this HashNode<T>[] nodes) where T : class {
    93       var reduced = nodes.UpdateNodeSizes().Reduce().Sort();
     83      const int size = sizeof(ulong);
     84      var hashes = new byte[(node.Arity + 1) * size];
     85
     86      for (int j = i - 1, k = 0; k < node.Arity; ++k, j -= 1 + nodes[j].Size) {
     87        Array.Copy(BitConverter.GetBytes(nodes[j].CalculatedHashValue), 0, hashes, k * size, size);
     88      }
     89      Array.Copy(BitConverter.GetBytes(node.HashValue), 0, hashes, node.Arity * size, size);
     90      return hashFunction(hashes);
     91    }
     92
     93    // set the enabled state for the whole subtree rooted at this node
     94    public static void SetEnabled<T>(this HashNode<T>[] nodes, int i, bool enabled) where T : class {
     95      nodes[i].Enabled = enabled;
     96      for (int j = i - nodes[i].Size; j < i; ++j)
     97        nodes[j].Enabled = enabled;
     98    }
     99
     100    public static HashNode<T>[] Simplify<T>(this HashNode<T>[] nodes, Func<byte[], ulong> hashFunction) where T : class {
     101      var reduced = nodes.UpdateNodeSizes().Reduce().Sort(hashFunction);
    94102
    95103      for (int i = 0; i < reduced.Length; ++i) {
     
    116124        }
    117125      }
    118       return simplified.UpdateNodeSizes().Reduce().Sort();
    119     }
    120 
    121     public static HashNode<T>[] Sort<T>(this HashNode<T>[] nodes) where T : class {
     126      return simplified.UpdateNodeSizes().Reduce().Sort(hashFunction);
     127    }
     128
     129    public static HashNode<T>[] Sort<T>(this HashNode<T>[] nodes, Func<byte[], ulong> hashFunction) where T : class {
    122130      int sort(int a, int b) => nodes[a].CompareTo(nodes[b]);
    123131
     
    155163          }
    156164        }
    157         node.CalculatedHashValue = nodes.ComputeHash(i);
     165        node.CalculatedHashValue = nodes.ComputeHash(i, hashFunction);
    158166      }
    159167      return nodes;
  • trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashUtil.cs

    r16263 r16272  
    2121
    2222
     23using System;
     24using System.Security.Cryptography;
     25
    2326namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
    2427  public static class HashUtil {
     
    2629
    2730    // A simple hash function from Robert Sedgwicks Algorithms in C book.I've added some simple optimizations to the algorithm in order to speed up its hashing process.
    28     public static ulong RSHash(ulong[] input) {
     31    public static ulong RSHash(byte[] input) {
    2932      const int b = 378551;
    3033      ulong a = 63689;
     
    3942
    4043    // A bitwise hash function written by Justin Sobel
    41     public static ulong JSHash(ulong[] input) {
     44    public static ulong JSHash(byte[] input) {
    4245      ulong hash = 1315423911;
    4346      for (int i = 0; i < input.Length; ++i)
     
    4750
    4851    // This hash function comes from Brian Kernighan and Dennis Ritchie's book "The C Programming Language". It is a simple hash function using a strange set of possible seeds which all constitute a pattern of 31....31...31 etc, it seems to be very similar to the DJB hash function.
    49     public static ulong BKDRHash(ulong[] input) {
     52    public static ulong BKDRHash(byte[] input) {
    5053      ulong seed = 131;
    5154      ulong hash = 0;
     
    5760
    5861    // This is the algorithm of choice which is used in the open source SDBM project. The hash function seems to have a good over-all distribution for many different data sets. It seems to work well in situations where there is a high variance in the MSBs of the elements in a data set.
    59     public static ulong SDBMHash(ulong[] input) {
     62    public static ulong SDBMHash(byte[] input) {
    6063      ulong hash = 0;
    6164      foreach (var v in input) {
     
    6669
    6770    // An algorithm produced by Professor Daniel J. Bernstein and shown first to the world on the usenet newsgroup comp.lang.c. It is one of the most efficient hash functions ever published.
    68     public static ulong DJBHash(ulong[] input) {
     71    public static ulong DJBHash(byte[] input) {
    6972      ulong hash = 5381;
    7073      foreach (var v in input) {
     
    7578
    7679    // An algorithm proposed by Donald E.Knuth in The Art Of Computer Programming Volume 3, under the topic of sorting and search chapter 6.4.
    77     public static ulong DEKHash(ulong[] input) {
     80    public static ulong DEKHash(byte[] input) {
    7881      ulong hash = (ulong)input.Length;
    7982      foreach (var v in input) {
     
    8386    }
    8487
    85     //public static ulong CryptoHash(HashAlgorithm ha, ulong[] input) {
    86     //  return BitConverter.ToInt32(ha.ComputeHash(input.ToByteArray()), 0);
    87     //}
    88 
    89     //public static byte[] ToByteArray(this ulong[] input) {
    90     //  var bytes = new byte[input.Length * sizeof(int)];
    91     //  int pos = 0;
    92     //  foreach (var v in input) {
    93     //    var b0 = (byte)((v >> 24) & 0xFF);
    94     //    var b1 = (byte)((v >> 16) & 0xFF);
    95     //    var b2 = (byte)((v >> 8) & 0xFF);
    96     //    var b3 = (byte)(v & 0xFF);
    97     //    bytes[pos++] = b0;
    98     //    bytes[pos++] = b1;
    99     //    bytes[pos++] = b2;
    100     //    bytes[pos++] = b3;
    101     //  }
    102     //  return bytes;
    103     //}
     88    public static ulong CryptoHash(HashAlgorithm ha, byte[] input) {
     89      return BitConverter.ToUInt64(ha.ComputeHash(input), 0);
     90    }
    10491  }
    10592}
  • trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs

    r16267 r16272  
    2020#endregion
    2121
    22 using System.Collections.Generic;
    2322using System.Linq;
    2423using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding;
     
    4342    }
    4443
    45     // compute node hashes without sorting the arguments
    46     public static Dictionary<ISymbolicExpressionTreeNode, ulong> ComputeNodeHashes(this ISymbolicExpressionTree tree) {
    47       var root = tree.Root.GetSubtree(0).GetSubtree(0);
    48       var nodes = root.MakeNodes();
    49       nodes.UpdateNodeSizes();
    50 
    51       for (int i = 0; i < nodes.Length; ++i) {
    52         if (nodes[i].IsLeaf)
    53           continue;
    54         nodes[i].CalculatedHashValue = nodes.ComputeHash(i);
    55       }
    56       return nodes.ToDictionary(x => x.Data, x => x.CalculatedHashValue);
    57     }
    58 
    5944    public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode) {
     45      ulong hashFunction(byte[] input) => HashUtil.JSHash(input);
    6046      var hashNodes = treeNode.MakeNodes();
    61       var simplified = hashNodes.Simplify();
    62       //return ComputeHash(simplified);
     47      var simplified = hashNodes.Simplify(hashFunction);
    6348      return simplified.Last().CalculatedHashValue;
    6449    }
    65 
    66     //public static int ComputeHash(this HashNode<ISymbolicExpressionTreeNode>[] nodes) {
    67     //  int hash = 1315423911;
    68     //  foreach (var node in nodes)
    69     //    hash ^= (hash << 5) + node.CalculatedHashValue + (hash >> 2);
    70     //  return hash;
    71     //}
    7250
    7351    public static HashNode<ISymbolicExpressionTreeNode> ToHashNode(this ISymbolicExpressionTreeNode node) {
     
    152130    // (in other words simplification should be applied in a bottom-up fashion)
    153131    public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) {
     132      ulong hashFunction(byte[] bytes) => HashUtil.JSHash(bytes);
    154133      var root = tree.Root.GetSubtree(0).GetSubtree(0);
    155134      var nodes = root.MakeNodes();
    156       var simplified = nodes.Simplify();
     135      var simplified = nodes.Simplify(hashFunction);
    157136      return simplified.ToTree();
    158137    }
  • trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.csproj

    r16255 r16272  
    142142    <Compile Include="Converters\DerivativeCalculator.cs" />
    143143    <Compile Include="Converters\TreeToAutoDiffTermConverter.cs" />
     144    <Compile Include="Crossovers\SymbolicDataAnalysisExpressionDiversityPreservingCrossover.cs" />
    144145    <Compile Include="Formatters\InfixExpressionFormatter.cs" />
    145146    <Compile Include="Formatters\TSQLExpressionFormatter.cs" />
Note: See TracChangeset for help on using the changeset viewer.