Changeset 17090
- Timestamp:
- 07/05/19 15:44:04 (5 years ago)
- Location:
- stable
- Files:
-
- 7 edited
- 3 copied
Legend:
- Unmodified
- Added
- Removed
-
stable
- Property svn:mergeinfo changed
/trunk merged: 16218,16252,16255,16258-16261,16263,16267,16270-16273,16284,16290-16291,16302,16305,16382,16478
- Property svn:mergeinfo changed
-
stable/HeuristicLab.Problems.DataAnalysis.Symbolic
-
stable/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Analyzers/SymbolicDataAnalysisBuildingBlockAnalyzer.cs
r16255 r17090 23 23 using System.Collections.Generic; 24 24 using System.Linq; 25 using System.Text; 25 26 using HeuristicLab.Analysis; 26 27 using HeuristicLab.Common; … … 38 39 public sealed class SymbolicDataAnalysisBuildingBlockAnalyzer : SymbolicDataAnalysisAnalyzer { 39 40 private const string BuildingBlocksResultName = "BuildingBlocks"; 41 private const string SolutionUniquenessResultName = "SolutionUniqueness"; 40 42 private const string MinimumSubtreeLengthParameterName = "MinimumSubtreeLength"; 41 43 private const string SimplifyTreesParameterName = "SimplifyTrees"; 42 44 43 private readonly InfixExpressionFormatter formatter = new InfixExpressionFormatter();44 private Dictionary<int, DataRow> hashToRow = new Dictionary<int, DataRow>(); 45 45 private Dictionary<ulong, DataRow> hashToRow = new Dictionary<ulong, DataRow>(); 46 47 #region parameters 46 48 public IValueLookupParameter<IntValue> MinimumSubtreeLengthParameter { 47 49 get { return (IValueLookupParameter<IntValue>)Parameters[MinimumSubtreeLengthParameterName]; } … … 51 53 get { return (IValueLookupParameter<BoolValue>)Parameters[SimplifyTreesParameterName]; } 52 54 } 53 55 #endregion 56 57 #region parameter properties 54 58 public IntValue MinimumSubtreeLength { 55 59 get { return MinimumSubtreeLengthParameter.ActualValue; } … … 59 63 get { return SimplifyTreesParameter.ActualValue; } 60 64 } 65 #endregion 61 66 62 67 public override void InitializeState() { 63 68 base.InitializeState(); 64 69 65 hashToRow = new Dictionary<int, DataRow>(); 66 } 67 70 hashToRow = new Dictionary<ulong, DataRow>(); 71 } 68 72 69 73 [StorableHook(HookType.AfterDeserialization)] … … 85 89 return new SymbolicDataAnalysisBuildingBlockAnalyzer(this, cloner); 86 90 } 91 92 [StorableConstructor] 93 private SymbolicDataAnalysisBuildingBlockAnalyzer(bool deserializing) : base(deserializing) { } 94 95 private readonly Func<byte[], ulong> hashFunction = HashUtil.JSHash; 87 96 88 97 public override IOperation Apply() { … … 99 108 var simplify = SimplifyTrees.Value; 100 109 101 var expressions = new Dictionary<int, string>(); 102 var expressionCounts = new Dictionary<int, int>(); 103 104 int totalCount = 0; // total number of subtrees examined 110 var expressions = new Dictionary<ulong, string>(); 111 var expressionCounts = new Dictionary<ulong, int>(); 112 113 int totalCount = 0; // total number of examined subtrees 114 115 var hashes = new List<ulong>(); 116 // count hashes 105 117 foreach (var tree in SymbolicExpressionTree) { 106 118 var hashNodes = tree.Root.GetSubtree(0).GetSubtree(0).MakeNodes(); 107 var simplified = simplify ? hashNodes.Simplify() : hashNodes.Sort(); 119 var simplified = simplify ? hashNodes.Simplify(hashFunction) : hashNodes.Sort(hashFunction); 120 hashes.Add(simplified.Last().CalculatedHashValue); // maybe calculate aggregate hash instead 108 121 109 122 for (int i = 0; i < simplified.Length; i++) { 110 123 HashNode<ISymbolicExpressionTreeNode> s = simplified[i]; 111 if (s.Is Child|| s.Size < minLength) {124 if (s.IsLeaf || s.Size < minLength) { 112 125 continue; 113 126 } 114 127 ++totalCount; 115 128 var hash = s.CalculatedHashValue; 116 if (expressions. TryGetValue(hash, out string str)) {129 if (expressions.ContainsKey(hash)) { 117 130 expressionCounts[hash]++; 118 } else { 119 // set constant and weight values so the tree is formatted nicely by the formatter 120 var nodes = new HashNode<ISymbolicExpressionTreeNode>[1 + s.Size]; 121 Array.Copy(simplified, i - s.Size, nodes, 0, nodes.Length); 122 var subtree = nodes.ToSubtree(); 123 124 foreach (var node in subtree.IterateNodesPostfix()) { 125 if (node is ConstantTreeNode constantTreeNode) { 126 constantTreeNode.Value = 0; 127 } else if (node is VariableTreeNode variableTreeNode) { 128 variableTreeNode.Weight = 1; 129 } 130 } 131 132 expressions[hash] = formatter.Format(subtree); 133 expressionCounts[hash] = 1; 131 continue; 134 132 } 133 134 var sb = new StringBuilder(); 135 for (int j = i - s.Size; j < i; ++j) { 136 sb.Append(GetLabel(simplified[j].Data)).Append(" "); 137 } 138 sb.Append(GetLabel(simplified[i].Data)); 139 expressions[hash] = sb.ToString(); 140 expressionCounts[hash] = 1; 135 141 } 136 142 } 137 143 138 var mostCommon = expressionCounts.OrderByDescending(x => x.Value).Take(10).ToList(); 139 var mostCommonLabels = mostCommon.Select(x => expressions[x.Key]).ToList(); 140 144 // fill in values for existing rows 141 145 foreach (var t in hashToRow) { 142 146 var hash = t.Key; 143 147 var row = t.Value; 144 148 145 if (expressionCounts.TryGetValue(hash, out int count)) { 146 row.Values.Add((double)count / totalCount); 147 } else { 148 row.Values.Add(0); 149 } 149 expressionCounts.TryGetValue(hash, out int count); 150 row.Values.Add(count); 150 151 } 151 152 152 153 var nValues = dt.Rows.Any() ? dt.Rows.Max(x => x.Values.Count) : 0; 153 154 154 for (int i = 0; i < mostCommon.Count; ++i) { 155 var hash = mostCommon[i].Key; 156 var count = mostCommon[i].Value; 155 // check if we have new rows 156 foreach (var t in expressionCounts.OrderByDescending(x => x.Value).Take(10)) { 157 var hash = t.Key; 158 var count = t.Value; 159 var label = expressions[hash]; 157 160 158 161 if (hashToRow.ContainsKey(hash)) { 159 162 continue; 160 163 } 161 var label = mostCommonLabels[i];162 164 var row = new DataRow(label) { VisualProperties = { StartIndexZero = true } }; 163 // pad with zeroes 164 for (int j = 0; j < nValues - 1; ++j) { 165 row.Values.Add(0); 165 if (nValues > 0) { 166 row.Values.AddRange(Enumerable.Repeat<double>(0, nValues - 1)); // pad with zeroes 166 167 } 167 row.Values.Add( (double)count / totalCount);168 row.Values.Add(count); 168 169 dt.Rows.Add(row); 169 170 hashToRow[hash] = row; 170 171 } 172 173 // compute solution uniqueness 174 DataTableHistory dth; 175 var counts = hashes.GroupBy(x => x).ToDictionary(x => x.Key, x => x.Count()); 176 if (!ResultCollection.ContainsKey(SolutionUniquenessResultName)) { 177 dth = new DataTableHistory(); 178 ResultCollection.Add(new Result(SolutionUniquenessResultName, dth)); 179 } else { 180 dth = (DataTableHistory)ResultCollection[SolutionUniquenessResultName].Value; 181 } 182 183 var ct = new DataTable("Unique Solutions"); 184 var ctr = new DataRow { VisualProperties = { StartIndexZero = true, ChartType = DataRowVisualProperties.DataRowChartType.Columns } }; 185 ctr.Values.AddRange(hashes.Select(x => (double)counts[x]).OrderByDescending(x => x)); 186 ct.Rows.Add(ctr); 187 dth.Add(ct); 188 189 var max = dth.Max(x => x.Rows.First().Values.Max()); 190 foreach (var table in dth) { 191 table.VisualProperties.YAxisMinimumAuto = false; 192 table.VisualProperties.YAxisMaximumAuto = false; 193 table.VisualProperties.YAxisMinimumFixedValue = 0; 194 table.VisualProperties.YAxisMaximumFixedValue = max; 195 } 196 171 197 return base.Apply(); 198 } 199 200 private static string GetLabel(ISymbolicExpressionTreeNode node) { 201 if (node is ConstantTreeNode constant) { 202 return "C"; 203 } 204 if (node is VariableTreeNode variable) { 205 return variable.VariableName; 206 } 207 if (node.Symbol is Addition) { 208 return "+"; 209 } 210 if (node.Symbol is Subtraction) { 211 return "-"; 212 } 213 if (node.Symbol is Multiplication) { 214 return "*"; 215 } 216 if (node.Symbol is Division) { 217 return "/"; 218 } 219 return node.Symbol.ToString(); 172 220 } 173 221 } -
stable/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Crossovers/SymbolicDataAnalysisExpressionDiversityPreservingCrossover.cs
r16263 r17090 20 20 private const string WindowingParameterName = "Windowing"; 21 21 private const string ProportionalSamplingParameterName = "ProportionalSampling"; 22 23 private static readonly Func<byte[], ulong> hashFunction = HashUtil.JSHash; 22 24 23 25 #region Parameter Properties … … 73 75 var leafCrossoverPointProbability = 1 - internalCrossoverPointProbability; 74 76 75 var nodes0 = ActualRoot(parent0).MakeNodes().Sort( );76 var nodes1 = ActualRoot(parent1).MakeNodes().Sort( );77 var nodes0 = ActualRoot(parent0).MakeNodes().Sort(hashFunction); 78 var nodes1 = ActualRoot(parent1).MakeNodes().Sort(hashFunction); 77 79 78 80 IList<HashNode<ISymbolicExpressionTreeNode>> sampled0; … … 81 83 if (proportionalSampling) { 82 84 var p = internalCrossoverPointProbability; 83 var weights0 = nodes0.Select(x => x.Is Child? 1 - p : p);85 var weights0 = nodes0.Select(x => x.IsLeaf ? 1 - p : p); 84 86 sampled0 = nodes0.SampleProportionalWithoutRepetition(random, nodes0.Length, weights0, windowing: windowing).ToArray(); 85 87 86 var weights1 = nodes1.Select(x => x.Is Child? 1 - p : p);88 var weights1 = nodes1.Select(x => x.IsLeaf ? 1 - p : p); 87 89 sampled1 = nodes1.SampleProportionalWithoutRepetition(random, nodes1.Length, weights1, windowing: windowing).ToArray(); 88 90 } else { … … 133 135 134 136 if (chooseInternal) { 135 list.AddRange(nodes.Where(x => !x.Is Child&& x.Data.Parent != null));137 list.AddRange(nodes.Where(x => !x.IsLeaf && x.Data.Parent != null)); 136 138 } 137 139 if (!chooseInternal || list.Count == 0) { 138 list.AddRange(nodes.Where(x => x.Is Child&& x.Data.Parent != null));140 list.AddRange(nodes.Where(x => x.IsLeaf && x.Data.Parent != null)); 139 141 } 140 142 -
stable/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashExtensions.cs
r16218 r17090 32 32 33 33 public bool Enabled; 34 public int HashValue; // the initial (fixed) hash value for this individual node/data 35 public int CalculatedHashValue; // the calculated hash value (taking into account the children hash values) 36 37 public Action<HashNode<T>[], int> Simplify; 34 public ulong HashValue; // the initial (fixed) hash value for this individual node/data 35 public ulong CalculatedHashValue; // the calculated hash value (taking into account the children hash values) 36 37 public delegate void SimplifyAction(ref HashNode<T>[] nodes, int i); 38 public SimplifyAction Simplify; 39 38 40 public IComparer<T> Comparer; 39 41 40 public bool Is Child=> Arity == 0;42 public bool IsLeaf => Arity == 0; 41 43 42 44 public HashNode(IComparer<T> comparer) { … … 67 69 68 70 public override int GetHashCode() { 69 return CalculatedHashValue;71 return (int)CalculatedHashValue; 70 72 } 71 73 … … 79 81 } 80 82 81 public static int ComputeHash<T>(this HashNode<T>[] nodes, int i) where T : class {83 public static ulong ComputeHash<T>(this HashNode<T>[] nodes, int i, Func<byte[], ulong> hashFunction) where T : class { 82 84 var node = nodes[i]; 83 var hashes = new int[node.Arity + 1]; 84 int idx = 0; 85 foreach (int c in nodes.IterateChildren(i)) { 86 hashes[idx++] = nodes[c].CalculatedHashValue; 87 } 88 hashes[idx] = node.HashValue; 89 return HashUtil.JSHash(hashes); 90 } 91 92 public static HashNode<T>[] Simplify<T>(this HashNode<T>[] nodes) where T : class { 93 var reduced = nodes.Reduce(); 94 reduced.Sort(); 85 const int size = sizeof(ulong); 86 var hashes = new ulong[node.Arity + 1]; 87 var bytes = new byte[(node.Arity + 1) * size]; 88 89 for (int j = i - 1, k = 0; k < node.Arity; ++k, j -= 1 + nodes[j].Size) { 90 hashes[k] = nodes[j].CalculatedHashValue; 91 } 92 hashes[node.Arity] = node.HashValue; 93 Buffer.BlockCopy(hashes, 0, bytes, 0, bytes.Length); 94 return hashFunction(bytes); 95 } 96 97 // set the enabled state for the whole subtree rooted at this node 98 public static void SetEnabled<T>(this HashNode<T>[] nodes, int i, bool enabled) where T : class { 99 nodes[i].Enabled = enabled; 100 for (int j = i - nodes[i].Size; j < i; ++j) 101 nodes[j].Enabled = enabled; 102 } 103 104 public static HashNode<T>[] Simplify<T>(this HashNode<T>[] nodes, Func<byte[], ulong> hashFunction) where T : class { 105 var reduced = nodes.UpdateNodeSizes().Reduce().Sort(hashFunction); 95 106 96 107 for (int i = 0; i < reduced.Length; ++i) { 97 108 var node = reduced[i]; 98 if (node.Is Child) {99 continue; 100 } 101 node.Simplify?.Invoke(re duced, i);109 if (node.IsLeaf) { 110 continue; 111 } 112 node.Simplify?.Invoke(ref reduced, i); 102 113 } 103 114 // detect if anything was simplified … … 117 128 } 118 129 } 119 simplified.UpdateNodeSizes();120 simplified.Sort();121 return simplified; 122 }123 124 public static void Sort<T>(this HashNode<T>[] nodes) where T : class { 130 return simplified.UpdateNodeSizes().Reduce().Sort(hashFunction); 131 } 132 133 public static HashNode<T>[] Sort<T>(this HashNode<T>[] nodes, Func<byte[], ulong> hashFunction) where T : class { 134 int sort(int a, int b) => nodes[a].CompareTo(nodes[b]); 135 125 136 for (int i = 0; i < nodes.Length; ++i) { 126 137 var node = nodes[i]; 127 138 128 if (node.Is Child) {139 if (node.IsLeaf) { 129 140 continue; 130 141 } … … 138 149 } else { // i have some non-terminal children 139 150 var sorted = new HashNode<T>[size]; 140 var indices = nodes.IterateChildren(i); 141 Array.Sort(indices, (a, b) => nodes[a].CompareTo(nodes[b])); 151 var indices = new int[node.Arity]; 152 for (int j = i - 1, k = 0; k < node.Arity; j -= 1 + nodes[j].Size, ++k) { 153 indices[k] = j; 154 } 155 Array.Sort(indices, sort); 142 156 143 157 int idx = 0; 144 158 foreach (var j in indices) { 145 159 var child = nodes[j]; 146 if (!child.Is Child) { // must copy complete subtree160 if (!child.IsLeaf) { // must copy complete subtree 147 161 Array.Copy(nodes, j - child.Size, sorted, idx, child.Size); 148 162 idx += child.Size; … … 153 167 } 154 168 } 155 node.CalculatedHashValue = nodes.ComputeHash(i); 156 } 169 node.CalculatedHashValue = nodes.ComputeHash(i, hashFunction); 170 } 171 return nodes; 157 172 } 158 173 159 174 /// <summary> 160 /// Get a function node's child indices from left to right175 /// Get a function node's child indicest 161 176 /// </summary> 162 /// <typeparam name="T"> </typeparam>163 /// <param name="nodes"> </param>164 /// <param name="i"> </param>177 /// <typeparam name="T">The data type encapsulated by a hash node</typeparam> 178 /// <param name="nodes">An array of hash nodes with up-to-date node sizes</param> 179 /// <param name="i">The index in the array of hash nodes of the node whose children we want to iterate</param> 165 180 /// <returns>An array containing child indices</returns> 166 181 public static int[] IterateChildren<T>(this HashNode<T>[] nodes, int i) where T : class { … … 170 185 var idx = i - 1; 171 186 for (int j = 0; j < arity; ++j) { 172 while (!nodes[idx].Enabled) { --idx; } // skip nodes possibly disabled by simplification173 187 children[j] = idx; 174 188 idx -= 1 + nodes[idx].Size; … … 177 191 } 178 192 179 public static voidUpdateNodeSizes<T>(this HashNode<T>[] nodes) where T : class {193 public static HashNode<T>[] UpdateNodeSizes<T>(this HashNode<T>[] nodes) where T : class { 180 194 for (int i = 0; i < nodes.Length; ++i) { 181 195 var node = nodes[i]; 182 if (node.Is Child) {196 if (node.IsLeaf) { 183 197 node.Size = 0; 184 198 continue; 185 199 } 186 200 node.Size = node.Arity; 187 foreach (int j in nodes.IterateChildren(i)) { 201 202 for (int j = i - 1, k = 0; k < node.Arity; j -= 1 + nodes[j].Size, ++k) { 188 203 node.Size += nodes[j].Size; 189 204 } 190 205 } 191 } 192 193 /// <summary> 194 /// Reduce nodes of the same type according to arithmetic rules 195 /// </summary> 196 /// <typeparam name="T"></typeparam> 197 /// <param name="g">The given grammar (controls symbol arities)</param> 198 /// <param name="nodes">The node array</param> 199 /// <param name="i">Parent index</param> 200 /// <param name="j">Child index</param> 201 private static void Reduce<T>(this HashNode<T>[] nodes, int i, int j) where T : class { 202 var p = nodes[i]; // parent node 203 var c = nodes[j]; // child node 204 205 if (c.IsChild) 206 return; 207 208 foreach (int k in nodes.IterateChildren(j)) { 209 if (nodes[k].IsChild) continue; 210 nodes.Reduce(j, k); 211 } 212 213 // handle commutative symbols (add, mul) 214 if (p.IsCommutative && p.HashValue == c.HashValue) { 215 c.Enabled = false; 216 p.Arity += c.Arity - 1; 217 } 218 } 219 220 public static HashNode<T>[] Reduce<T>(this HashNode<T>[] nodes) where T : class { 221 nodes.UpdateNodeSizes(); 222 int i = nodes.Length - 1; 223 foreach (int c in nodes.IterateChildren(i)) { 224 if (nodes[c].IsChild) continue; 225 nodes.Reduce(i, c); 226 } 206 return nodes; 207 } 208 209 private static HashNode<T>[] Reduce<T>(this HashNode<T>[] nodes) where T : class { 227 210 int count = 0; 228 foreach (var node in nodes) { 229 if (!node.Enabled) { ++count; } 211 for (int i = 0; i < nodes.Length; ++i) { 212 var node = nodes[i]; 213 if (node.IsLeaf || !node.IsCommutative) { 214 continue; 215 } 216 217 var arity = node.Arity; 218 for (int j = i - 1, k = 0; k < arity; j -= 1 + nodes[j].Size, ++k) { 219 if (node.HashValue == nodes[j].HashValue) { 220 nodes[j].Enabled = false; 221 node.Arity += nodes[j].Arity - 1; 222 ++count; 223 } 224 } 230 225 } 231 226 if (count == 0) … … 233 228 234 229 var reduced = new HashNode<T>[nodes.Length - count]; 235 i= 0;230 var idx = 0; 236 231 foreach (var node in nodes) { 237 if (node.Enabled) { reduced[i++] = node; } 238 } 239 reduced.UpdateNodeSizes(); 240 return reduced; 232 if (node.Enabled) { reduced[idx++] = node; } 233 } 234 return reduced.UpdateNodeSizes(); 241 235 } 242 236 } -
stable/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashUtil.cs
r16218 r17090 20 20 #endregion 21 21 22 22 23 using System; 23 24 using System.Security.Cryptography; … … 28 29 29 30 // A simple hash function from Robert Sedgwicks Algorithms in C book.I've added some simple optimizations to the algorithm in order to speed up its hashing process. 30 public static int RSHash(int[] input) {31 public static ulong RSHash(byte[] input) { 31 32 const int b = 378551; 32 inta = 63689;33 inthash = 0;33 ulong a = 63689; 34 ulong hash = 0; 34 35 35 36 foreach (var v in input) { … … 41 42 42 43 // A bitwise hash function written by Justin Sobel 43 public static int JSHash(int[] input) {44 inthash = 1315423911;44 public static ulong JSHash(byte[] input) { 45 ulong hash = 1315423911; 45 46 for (int i = 0; i < input.Length; ++i) 46 47 hash ^= (hash << 5) + input[i] + (hash >> 2); … … 49 50 50 51 // This hash function comes from Brian Kernighan and Dennis Ritchie's book "The C Programming Language". It is a simple hash function using a strange set of possible seeds which all constitute a pattern of 31....31...31 etc, it seems to be very similar to the DJB hash function. 51 public static int BKDRHash(int[] input) {52 const intseed = 131;53 inthash = 0;52 public static ulong BKDRHash(byte[] input) { 53 ulong seed = 131; 54 ulong hash = 0; 54 55 foreach (var v in input) { 55 56 hash = (hash * seed) + v; … … 59 60 60 61 // This is the algorithm of choice which is used in the open source SDBM project. The hash function seems to have a good over-all distribution for many different data sets. It seems to work well in situations where there is a high variance in the MSBs of the elements in a data set. 61 public static int SDBMHash(int[] input) {62 inthash = 0;62 public static ulong SDBMHash(byte[] input) { 63 ulong hash = 0; 63 64 foreach (var v in input) { 64 65 hash = v + (hash << 6) + (hash << 16) - hash; … … 68 69 69 70 // An algorithm produced by Professor Daniel J. Bernstein and shown first to the world on the usenet newsgroup comp.lang.c. It is one of the most efficient hash functions ever published. 70 public static int DJBHash(int[] input) {71 inthash = 5381;71 public static ulong DJBHash(byte[] input) { 72 ulong hash = 5381; 72 73 foreach (var v in input) { 73 74 hash = (hash << 5) + hash + v; … … 77 78 78 79 // An algorithm proposed by Donald E.Knuth in The Art Of Computer Programming Volume 3, under the topic of sorting and search chapter 6.4. 79 public static int DEKHash(int[] input) {80 int hash =input.Length;80 public static ulong DEKHash(byte[] input) { 81 ulong hash = (ulong)input.Length; 81 82 foreach (var v in input) { 82 83 hash = (hash << 5) ^ (hash >> 27) ^ v; … … 85 86 } 86 87 87 public static int CryptoHash(HashAlgorithm ha, int[] input) { 88 return BitConverter.ToInt32(ha.ComputeHash(input.ToByteArray()), 0); 89 } 90 91 public static byte[] ToByteArray(this int[] input) { 92 const int size = sizeof(int); 93 var bytes = new byte[input.Length * sizeof(int)]; 94 for (int i = 0; i < input.Length; ++i) { 95 Array.Copy(BitConverter.GetBytes(bytes[i]), 0, bytes, i * size, size); 96 } 97 return bytes; 88 public static ulong CryptoHash(HashAlgorithm ha, byte[] input) { 89 return BitConverter.ToUInt64(ha.ComputeHash(input), 0); 98 90 } 99 91 } -
stable/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs
r16218 r17090 1 using System.Collections.Generic; 1 #region License Information 2 /* HeuristicLab 3 * Copyright (C) 2002-2019 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 * 5 * This file is part of HeuristicLab. 6 * 7 * HeuristicLab is free software: you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation, either version 3 of the License, or 10 * (at your option) any later version. 11 * 12 * HeuristicLab is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>. 19 */ 20 #endregion 21 22 using System; 23 using System.Collections.Generic; 2 24 using System.Linq; 3 25 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; … … 17 39 18 40 private static readonly ISymbolicExpressionTreeNodeComparer comparer = new SymbolicExpressionTreeNodeComparer(); 19 20 public static int ComputeHash(this ISymbolicExpressionTree tree) { 21 return ComputeHash(tree.Root.GetSubtree(0).GetSubtree(0)); 22 } 23 24 public static Dictionary<ISymbolicExpressionTreeNode, int> ComputeNodeHashes(this ISymbolicExpressionTree tree) { 25 var root = tree.Root.GetSubtree(0).GetSubtree(0); 26 var nodes = root.MakeNodes(); 27 nodes.UpdateNodeSizes(); 28 29 for (int i = 0; i < nodes.Length; ++i) { 30 if (nodes[i].IsChild) 31 continue; 32 nodes[i].CalculatedHashValue = nodes.ComputeHash(i); 33 } 34 return nodes.ToDictionary(x => x.Data, x => x.CalculatedHashValue); 35 } 36 37 public static int ComputeHash(this ISymbolicExpressionTreeNode treeNode) { 38 var hashNodes = treeNode.MakeNodes(); 39 var simplified = hashNodes.Simplify(); 40 return ComputeHash(simplified); 41 } 42 43 public static int ComputeHash(this HashNode<ISymbolicExpressionTreeNode>[] nodes) { 44 int hash = 1315423911; 45 foreach (var node in nodes) 46 hash ^= (hash << 5) + node.CalculatedHashValue + (hash >> 2); 47 return hash; 48 } 49 50 public static HashNode<ISymbolicExpressionTreeNode> ToHashNode(this ISymbolicExpressionTreeNode node) { 41 private static ISymbolicExpressionTreeNode ActualRoot(this ISymbolicExpressionTree tree) => tree.Root.GetSubtree(0).GetSubtree(0); 42 43 #region tree hashing 44 public static ulong[] Hash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) { 45 return tree.ActualRoot().Hash(simplify, strict); 46 } 47 48 public static ulong[] Hash(this ISymbolicExpressionTreeNode node, bool simplify = false, bool strict = false) { 49 ulong hashFunction(byte[] input) => HashUtil.DJBHash(input); 50 51 var hashNodes = simplify ? node.MakeNodes(strict).Simplify(hashFunction) : node.MakeNodes(strict).Sort(hashFunction); 52 var hashes = new ulong[hashNodes.Length]; 53 for (int i = 0; i < hashes.Length; ++i) { 54 hashes[i] = hashNodes[i].CalculatedHashValue; 55 } 56 return hashes; 57 } 58 59 public static ulong ComputeHash(this ISymbolicExpressionTree tree, bool simplify = false, bool strict = false) { 60 return ComputeHash(tree.ActualRoot(), simplify, strict); 61 } 62 63 public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool simplify = false, bool strict = false) { 64 return treeNode.Hash(simplify, strict).Last(); 65 } 66 67 public static HashNode<ISymbolicExpressionTreeNode> ToHashNode(this ISymbolicExpressionTreeNode node, bool strict = false) { 51 68 var symbol = node.Symbol; 52 69 var name = symbol.Name; 53 if (symbol is Variable) { 54 var variableTreeNode = (VariableTreeNode)node; 55 name = variableTreeNode.VariableName; 56 } 57 var hash = name.GetHashCode(); 70 if (node is ConstantTreeNode constantNode) { 71 name = strict ? constantNode.Value.ToString() : symbol.Name; 72 } else if (node is VariableTreeNode variableNode) { 73 name = strict ? variableNode.Weight.ToString() + variableNode.VariableName : variableNode.VariableName; 74 } 75 var hash = (ulong)name.GetHashCode(); 58 76 var hashNode = new HashNode<ISymbolicExpressionTreeNode>(comparer) { 59 77 Data = node, … … 79 97 } 80 98 81 public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTreeNode node) { 82 return node.IterateNodesPostfix().Select(ToHashNode).ToArray(); 83 } 99 public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTree tree, bool strict = false) { 100 return MakeNodes(tree.ActualRoot(), strict); 101 } 102 103 public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTreeNode node, bool strict = false) { 104 return node.IterateNodesPostfix().Select(x => x.ToHashNode(strict)).ToArray().UpdateNodeSizes(); 105 } 106 #endregion 107 108 #region tree similarity 109 public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false, bool strict = false) { 110 return ComputeSimilarity(t1.ActualRoot(), t2.ActualRoot(), simplify, strict); 111 } 112 113 public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false, bool strict = false) { 114 var lh = t1.Hash(simplify, strict); 115 var rh = t2.Hash(simplify, strict); 116 117 Array.Sort(lh); 118 Array.Sort(rh); 119 120 return ComputeSimilarity(lh, rh); 121 } 122 123 // requires lhs and rhs to be sorted 124 public static int IntersectCount(this ulong[] lh, ulong[] rh) { 125 int count = 0; 126 for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) { 127 var h1 = lh[i]; 128 var h2 = rh[j]; 129 if (h1 == h2) { 130 ++count; 131 ++i; 132 ++j; 133 } else if (h1 < h2) { 134 ++i; 135 } else if (h1 > h2) { 136 ++j; 137 } 138 } 139 return count; 140 } 141 142 public static IEnumerable<ulong> Intersect(this ulong[] lh, ulong[] rh) { 143 for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) { 144 var h1 = lh[i]; 145 var h2 = rh[j]; 146 if (h1 == h2) { 147 yield return h1; 148 ++i; 149 ++j; 150 } else if (h1 < h2) { 151 ++i; 152 } else if (h1 > h2) { 153 ++j; 154 } 155 } 156 } 157 158 // this will only work if lh and rh are sorted 159 public static double ComputeSimilarity(ulong[] lh, ulong[] rh) { 160 return 2d * IntersectCount(lh, rh) / (lh.Length + rh.Length); 161 } 162 163 public static double ComputeAverageSimilarity(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) { 164 var total = trees.Count * (trees.Count - 1) / 2; 165 double avg = 0; 166 var hashes = new ulong[trees.Count][]; 167 // build hash arrays 168 for (int i = 0; i < trees.Count; ++i) { 169 var nodes = trees[i].MakeNodes(strict); 170 hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray(); 171 Array.Sort(hashes[i]); 172 } 173 // compute similarity matrix 174 for (int i = 0; i < trees.Count - 1; ++i) { 175 for (int j = i + 1; j < trees.Count; ++j) { 176 avg += ComputeSimilarity(hashes[i], hashes[j]); 177 } 178 } 179 return avg / total; 180 } 181 182 public static double[,] ComputeSimilarityMatrix(IList<ISymbolicExpressionTree> trees, bool simplify = false, bool strict = false) { 183 var sim = new double[trees.Count, trees.Count]; 184 var hashes = new ulong[trees.Count][]; 185 // build hash arrays 186 for (int i = 0; i < trees.Count; ++i) { 187 var nodes = trees[i].MakeNodes(strict); 188 hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray(); 189 Array.Sort(hashes[i]); 190 } 191 // compute similarity matrix 192 for (int i = 0; i < trees.Count - 1; ++i) { 193 for (int j = i + 1; j < trees.Count; ++j) { 194 sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]); 195 } 196 } 197 return sim; 198 } 199 #endregion 84 200 85 201 #region parse a nodes array back into a tree … … 98 214 var node = nodes[i]; 99 215 100 if (node.Is Child) {216 if (node.IsLeaf) { 101 217 if (node.Data is VariableTreeNode variable) { 102 218 var variableTreeNode = (VariableTreeNode)treeNodes[i]; 103 219 variableTreeNode.VariableName = variable.VariableName; 104 variableTreeNode.Weight = 1;220 variableTreeNode.Weight = variable.Weight; 105 221 } else if (node.Data is ConstantTreeNode @const) { 106 222 var constantTreeNode = (ConstantTreeNode)treeNodes[i]; … … 127 243 #region tree simplification 128 244 // these simplification methods rely on the assumption that child nodes of the current node have already been simplified 129 // (in other words simplification should be applied in a bottom-up fashion) 245 // (in other words simplification should be applied in a bottom-up fashion) 130 246 public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) { 247 ulong hashFunction(byte[] bytes) => HashUtil.JSHash(bytes); 131 248 var root = tree.Root.GetSubtree(0).GetSubtree(0); 132 249 var nodes = root.MakeNodes(); 133 var simplified = nodes.Simplify( );250 var simplified = nodes.Simplify(hashFunction); 134 251 return simplified.ToTree(); 135 252 } 136 253 137 public static void SimplifyAddition( HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {254 public static void SimplifyAddition(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) { 138 255 // simplify additions of terms by eliminating terms with the same symbol and hash 139 256 var children = nodes.IterateChildren(i); 140 257 258 // we always assume the child nodes are sorted 141 259 var curr = children[0]; 142 260 var node = nodes[i]; … … 144 262 foreach (var j in children.Skip(1)) { 145 263 if (nodes[j] == nodes[curr]) { 146 for (int k = j - nodes[j].Size; k <= j; ++k) { 147 nodes[k].Enabled = false; 148 } 264 nodes.SetEnabled(j, false); 149 265 node.Arity--; 150 266 } else { … … 157 273 } 158 274 159 // simplify multiplications by reducing constants and div terms 160 public static void SimplifyMultiplication( HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {275 // simplify multiplications by reducing constants and div terms 276 public static void SimplifyMultiplication(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) { 161 277 var node = nodes[i]; 162 278 var children = nodes.IterateChildren(i); … … 174 290 var d = children[k]; 175 291 if (nodes[d].Data.Symbol is Constant) { 176 ((ConstantTreeNode)child.Data).Value *= ((ConstantTreeNode)nodes[d].Data).Value;177 292 nodes[d].Enabled = false; 178 293 node.Arity--; … … 207 322 } 208 323 209 public static void SimplifyDivision( HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {324 public static void SimplifyDivision(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) { 210 325 var node = nodes[i]; 211 326 var children = nodes.IterateChildren(i); 212 327 213 if (children.All(x => nodes[x].Data.Symbol is Constant)) { 328 var tmp = nodes; 329 330 if (children.All(x => tmp[x].Data.Symbol is Constant)) { 214 331 var v = ((ConstantTreeNode)nodes[children.First()].Data).Value; 215 332 if (node.Arity == 1) { … … 242 359 } 243 360 244 public static void SimplifyUnaryNode( HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {361 public static void SimplifyUnaryNode(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) { 245 362 // check if the child of the unary node is a constant, then the whole node can be simplified 246 363 var parent = nodes[i]; … … 257 374 } 258 375 259 public static void SimplifyBinaryNode( HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) {376 public static void SimplifyBinaryNode(ref HashNode<ISymbolicExpressionTreeNode>[] nodes, int i) { 260 377 var children = nodes.IterateChildren(i); 261 if (children.All(x => nodes[x].Data.Symbol is Constant)) { 378 var tmp = nodes; 379 if (children.All(x => tmp[x].Data.Symbol is Constant)) { 262 380 foreach (var j in children) { 263 381 nodes[j].Enabled = false; -
stable/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.csproj
r17072 r17090 128 128 <ItemGroup> 129 129 <Compile Include="Analyzers\SymbolicDataAnalysisBottomUpDiversityAnalyzer.cs" /> 130 <Compile Include="Analyzers\SymbolicDataAnalysisBuildingBlockAnalyzer.cs" /> 130 131 <Compile Include="Analyzers\SymbolicDataAnalysisSingleObjectivePruningAnalyzer.cs" /> 131 132 <Compile Include="Analyzers\SymbolicDataAnalysisSingleObjectiveValidationParetoBestSolutionAnalyzer.cs" /> … … 146 147 <Compile Include="Converters\DerivativeCalculator.cs" /> 147 148 <Compile Include="Converters\TreeToAutoDiffTermConverter.cs" /> 149 <Compile Include="Crossovers\SymbolicDataAnalysisExpressionDiversityPreservingCrossover.cs" /> 148 150 <Compile Include="Formatters\InfixExpressionFormatter.cs" /> 149 151 <Compile Include="Formatters\TSQLExpressionFormatter.cs" /> 150 152 <Compile Include="Formatters\SymbolicDataAnalysisExpressionMathematicaFormatter.cs" /> 151 153 <Compile Include="Formatters\SymbolicDataAnalysisExpressionCSharpFormatter.cs" /> 154 <Compile Include="Hashing\HashExtensions.cs" /> 155 <Compile Include="Hashing\HashUtil.cs" /> 156 <Compile Include="Hashing\SymbolicExpressionTreeHash.cs" /> 152 157 <Compile Include="Importer\InfixExpressionParser.cs" /> 153 158 <Compile Include="Importer\SymbolicExpressionImporter.cs" /> -
stable/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisModel.cs
r17054 r17090 118 118 ConstantTreeNode alphaTreeNode = null; 119 119 ConstantTreeNode betaTreeNode = null; 120 // check if model has been scaled previously by analyzing the structure of the tree120 // check if model has a structure that can be re-used for scaling 121 121 var startNode = SymbolicExpressionTree.Root.GetSubtree(0); 122 if (startNode.GetSubtree(0).Symbol is Addition) { 123 var addNode = startNode.GetSubtree(0); 124 if (addNode.SubtreeCount == 2 && addNode.GetSubtree(0).Symbol is Multiplication && addNode.GetSubtree(1).Symbol is Constant) { 125 alphaTreeNode = addNode.GetSubtree(1) as ConstantTreeNode; 126 var mulNode = addNode.GetSubtree(0); 127 if (mulNode.SubtreeCount == 2 && mulNode.GetSubtree(1).Symbol is Constant) { 128 betaTreeNode = mulNode.GetSubtree(1) as ConstantTreeNode; 129 } 122 var addNode = startNode.GetSubtree(0); 123 if (addNode.Symbol is Addition && addNode.SubtreeCount == 2) { 124 alphaTreeNode = (ConstantTreeNode)addNode.Subtrees.LastOrDefault(n => n is ConstantTreeNode); 125 var mulNode = addNode.Subtrees.FirstOrDefault(n => n.Symbol is Multiplication); 126 if (mulNode != null) { 127 betaTreeNode = (ConstantTreeNode)mulNode.Subtrees.LastOrDefault(n => n is ConstantTreeNode); 130 128 } 131 129 }
Note: See TracChangeset
for help on using the changeset viewer.