Changeset 16272 for trunk/HeuristicLab.Problems.DataAnalysis.Symbolic
- Timestamp:
- 11/02/18 16:20:33 (6 years ago)
- Location:
- trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisBuildingBlockAnalyzer.cs
r16271 r16272 20 20 #endregion 21 21 22 using System; 22 23 using System.Collections.Generic; 23 24 using System.Linq; … … 38 39 public sealed class SymbolicDataAnalysisBuildingBlockAnalyzer : SymbolicDataAnalysisAnalyzer { 39 40 private const string BuildingBlocksResultName = "BuildingBlocks"; 41 private const string SolutionUniquenessResultName = "SolutionUniqueness"; 40 42 private const string MinimumSubtreeLengthParameterName = "MinimumSubtreeLength"; 41 43 private const string SimplifyTreesParameterName = "SimplifyTrees"; … … 90 92 [StorableConstructor] 91 93 private SymbolicDataAnalysisBuildingBlockAnalyzer(bool deserializing) : base(deserializing) { } 94 95 private readonly Func<byte[], ulong> hashFunction = HashUtil.JSHash; 92 96 93 97 public override IOperation Apply() { … … 109 113 int totalCount = 0; // total number of examined subtrees 110 114 115 var hashes = new List<ulong>(); 111 116 // count hashes 112 117 foreach (var tree in SymbolicExpressionTree) { 113 118 var hashNodes = tree.Root.GetSubtree(0).GetSubtree(0).MakeNodes(); 114 var simplified = simplify ? hashNodes.Simplify() : hashNodes.Sort(); 119 var simplified = simplify ? hashNodes.Simplify(hashFunction) : hashNodes.Sort(hashFunction); 120 hashes.Add(simplified.Last().CalculatedHashValue); // maybe calculate aggregate hash instead 115 121 116 122 for (int i = 0; i < simplified.Length; i++) { … … 165 171 } 166 172 173 // compute solution uniqueness 174 DataTableHistory dth; 175 var counts = hashes.GroupBy(x => x).ToDictionary(x => x.Key, x => x.Count()); 176 if (!ResultCollection.ContainsKey(SolutionUniquenessResultName)) { 177 dth = new DataTableHistory(); 178 ResultCollection.Add(new Result(SolutionUniquenessResultName, dth)); 179 } else { 180 dth = (DataTableHistory)ResultCollection[SolutionUniquenessResultName].Value; 181 } 182 183 var ct = new DataTable("Unique Solutions"); 184 var ctr = new DataRow { VisualProperties = { StartIndexZero = true, ChartType = DataRowVisualProperties.DataRowChartType.Columns } }; 185 ctr.Values.AddRange(hashes.Select(x => (double)counts[x]).OrderByDescending(x => x)); 186 ct.Rows.Add(ctr); 187 dth.Add(ct); 188 189 var max = dth.Max(x => x.Rows.First().Values.Max()); 190 foreach (var table in dth) { 191 table.VisualProperties.YAxisMinimumAuto = false; 192 table.VisualProperties.YAxisMaximumAuto = false; 193 table.VisualProperties.YAxisMinimumFixedValue = 0; 194 table.VisualProperties.YAxisMaximumFixedValue = max; 195 } 196 167 197 return base.Apply(); 168 198 } -
trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Crossovers/SymbolicDataAnalysisExpressionDiversityPreservingCrossover.cs
r16270 r16272 20 20 private const string WindowingParameterName = "Windowing"; 21 21 private const string ProportionalSamplingParameterName = "ProportionalSampling"; 22 23 private static readonly Func<byte[], ulong> hashFunction = HashUtil.JSHash; 22 24 23 25 #region Parameter Properties … … 73 75 var leafCrossoverPointProbability = 1 - internalCrossoverPointProbability; 74 76 75 var nodes0 = ActualRoot(parent0).MakeNodes().Sort( );76 var nodes1 = ActualRoot(parent1).MakeNodes().Sort( );77 var nodes0 = ActualRoot(parent0).MakeNodes().Sort(hashFunction); 78 var nodes1 = ActualRoot(parent1).MakeNodes().Sort(hashFunction); 77 79 78 80 IList<HashNode<ISymbolicExpressionTreeNode>> sampled0; -
trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashExtensions.cs
r16267 r16272 67 67 68 68 public override int GetHashCode() { 69 //return CalculatedHashValue; 70 return Data.GetHashCode(); 69 return (int)CalculatedHashValue; 71 70 } 72 71 … … 80 79 } 81 80 82 public static ulong ComputeHash<T>(this HashNode<T>[] nodes, int i ) where T : class {81 public static ulong ComputeHash<T>(this HashNode<T>[] nodes, int i, Func<byte[], ulong> hashFunction) where T : class { 83 82 var node = nodes[i]; 84 var hashes = new ulong[node.Arity + 1]; 85 for (int j = i - 1, k = 0; k < node.Arity; j -= 1 + nodes[j].Size, k++) { 86 hashes[k] = nodes[j].CalculatedHashValue; 87 } 88 hashes[node.Arity] = node.HashValue; 89 return HashUtil.JSHash(hashes); 90 } 91 92 public static HashNode<T>[] Simplify<T>(this HashNode<T>[] nodes) where T : class { 93 var reduced = nodes.UpdateNodeSizes().Reduce().Sort(); 83 const int size = sizeof(ulong); 84 var hashes = new byte[(node.Arity + 1) * size]; 85 86 for (int j = i - 1, k = 0; k < node.Arity; ++k, j -= 1 + nodes[j].Size) { 87 Array.Copy(BitConverter.GetBytes(nodes[j].CalculatedHashValue), 0, hashes, k * size, size); 88 } 89 Array.Copy(BitConverter.GetBytes(node.HashValue), 0, hashes, node.Arity * size, size); 90 return hashFunction(hashes); 91 } 92 93 // set the enabled state for the whole subtree rooted at this node 94 public static void SetEnabled<T>(this HashNode<T>[] nodes, int i, bool enabled) where T : class { 95 nodes[i].Enabled = enabled; 96 for (int j = i - nodes[i].Size; j < i; ++j) 97 nodes[j].Enabled = enabled; 98 } 99 100 public static HashNode<T>[] Simplify<T>(this HashNode<T>[] nodes, Func<byte[], ulong> hashFunction) where T : class { 101 var reduced = nodes.UpdateNodeSizes().Reduce().Sort(hashFunction); 94 102 95 103 for (int i = 0; i < reduced.Length; ++i) { … … 116 124 } 117 125 } 118 return simplified.UpdateNodeSizes().Reduce().Sort( );119 } 120 121 public static HashNode<T>[] Sort<T>(this HashNode<T>[] nodes ) where T : class {126 return simplified.UpdateNodeSizes().Reduce().Sort(hashFunction); 127 } 128 129 public static HashNode<T>[] Sort<T>(this HashNode<T>[] nodes, Func<byte[], ulong> hashFunction) where T : class { 122 130 int sort(int a, int b) => nodes[a].CompareTo(nodes[b]); 123 131 … … 155 163 } 156 164 } 157 node.CalculatedHashValue = nodes.ComputeHash(i );165 node.CalculatedHashValue = nodes.ComputeHash(i, hashFunction); 158 166 } 159 167 return nodes; -
trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashUtil.cs
r16263 r16272 21 21 22 22 23 using System; 24 using System.Security.Cryptography; 25 23 26 namespace HeuristicLab.Problems.DataAnalysis.Symbolic { 24 27 public static class HashUtil { … … 26 29 27 30 // A simple hash function from Robert Sedgwicks Algorithms in C book.I've added some simple optimizations to the algorithm in order to speed up its hashing process. 28 public static ulong RSHash( ulong[] input) {31 public static ulong RSHash(byte[] input) { 29 32 const int b = 378551; 30 33 ulong a = 63689; … … 39 42 40 43 // A bitwise hash function written by Justin Sobel 41 public static ulong JSHash( ulong[] input) {44 public static ulong JSHash(byte[] input) { 42 45 ulong hash = 1315423911; 43 46 for (int i = 0; i < input.Length; ++i) … … 47 50 48 51 // This hash function comes from Brian Kernighan and Dennis Ritchie's book "The C Programming Language". It is a simple hash function using a strange set of possible seeds which all constitute a pattern of 31....31...31 etc, it seems to be very similar to the DJB hash function. 49 public static ulong BKDRHash( ulong[] input) {52 public static ulong BKDRHash(byte[] input) { 50 53 ulong seed = 131; 51 54 ulong hash = 0; … … 57 60 58 61 // This is the algorithm of choice which is used in the open source SDBM project. The hash function seems to have a good over-all distribution for many different data sets. It seems to work well in situations where there is a high variance in the MSBs of the elements in a data set. 59 public static ulong SDBMHash( ulong[] input) {62 public static ulong SDBMHash(byte[] input) { 60 63 ulong hash = 0; 61 64 foreach (var v in input) { … … 66 69 67 70 // An algorithm produced by Professor Daniel J. Bernstein and shown first to the world on the usenet newsgroup comp.lang.c. It is one of the most efficient hash functions ever published. 68 public static ulong DJBHash( ulong[] input) {71 public static ulong DJBHash(byte[] input) { 69 72 ulong hash = 5381; 70 73 foreach (var v in input) { … … 75 78 76 79 // An algorithm proposed by Donald E.Knuth in The Art Of Computer Programming Volume 3, under the topic of sorting and search chapter 6.4. 77 public static ulong DEKHash( ulong[] input) {80 public static ulong DEKHash(byte[] input) { 78 81 ulong hash = (ulong)input.Length; 79 82 foreach (var v in input) { … … 83 86 } 84 87 85 //public static ulong CryptoHash(HashAlgorithm ha, ulong[] input) { 86 // return BitConverter.ToInt32(ha.ComputeHash(input.ToByteArray()), 0); 87 //} 88 89 //public static byte[] ToByteArray(this ulong[] input) { 90 // var bytes = new byte[input.Length * sizeof(int)]; 91 // int pos = 0; 92 // foreach (var v in input) { 93 // var b0 = (byte)((v >> 24) & 0xFF); 94 // var b1 = (byte)((v >> 16) & 0xFF); 95 // var b2 = (byte)((v >> 8) & 0xFF); 96 // var b3 = (byte)(v & 0xFF); 97 // bytes[pos++] = b0; 98 // bytes[pos++] = b1; 99 // bytes[pos++] = b2; 100 // bytes[pos++] = b3; 101 // } 102 // return bytes; 103 //} 88 public static ulong CryptoHash(HashAlgorithm ha, byte[] input) { 89 return BitConverter.ToUInt64(ha.ComputeHash(input), 0); 90 } 104 91 } 105 92 } -
trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs
r16267 r16272 20 20 #endregion 21 21 22 using System.Collections.Generic;23 22 using System.Linq; 24 23 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; … … 43 42 } 44 43 45 // compute node hashes without sorting the arguments46 public static Dictionary<ISymbolicExpressionTreeNode, ulong> ComputeNodeHashes(this ISymbolicExpressionTree tree) {47 var root = tree.Root.GetSubtree(0).GetSubtree(0);48 var nodes = root.MakeNodes();49 nodes.UpdateNodeSizes();50 51 for (int i = 0; i < nodes.Length; ++i) {52 if (nodes[i].IsLeaf)53 continue;54 nodes[i].CalculatedHashValue = nodes.ComputeHash(i);55 }56 return nodes.ToDictionary(x => x.Data, x => x.CalculatedHashValue);57 }58 59 44 public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode) { 45 ulong hashFunction(byte[] input) => HashUtil.JSHash(input); 60 46 var hashNodes = treeNode.MakeNodes(); 61 var simplified = hashNodes.Simplify(); 62 //return ComputeHash(simplified); 47 var simplified = hashNodes.Simplify(hashFunction); 63 48 return simplified.Last().CalculatedHashValue; 64 49 } 65 66 //public static int ComputeHash(this HashNode<ISymbolicExpressionTreeNode>[] nodes) {67 // int hash = 1315423911;68 // foreach (var node in nodes)69 // hash ^= (hash << 5) + node.CalculatedHashValue + (hash >> 2);70 // return hash;71 //}72 50 73 51 public static HashNode<ISymbolicExpressionTreeNode> ToHashNode(this ISymbolicExpressionTreeNode node) { … … 152 130 // (in other words simplification should be applied in a bottom-up fashion) 153 131 public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) { 132 ulong hashFunction(byte[] bytes) => HashUtil.JSHash(bytes); 154 133 var root = tree.Root.GetSubtree(0).GetSubtree(0); 155 134 var nodes = root.MakeNodes(); 156 var simplified = nodes.Simplify( );135 var simplified = nodes.Simplify(hashFunction); 157 136 return simplified.ToTree(); 158 137 } -
trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.csproj
r16255 r16272 142 142 <Compile Include="Converters\DerivativeCalculator.cs" /> 143 143 <Compile Include="Converters\TreeToAutoDiffTermConverter.cs" /> 144 <Compile Include="Crossovers\SymbolicDataAnalysisExpressionDiversityPreservingCrossover.cs" /> 144 145 <Compile Include="Formatters\InfixExpressionFormatter.cs" /> 145 146 <Compile Include="Formatters\TSQLExpressionFormatter.cs" />
Note: See TracChangeset
for help on using the changeset viewer.