- Timestamp:
- 11/19/18 14:34:25 (6 years ago)
- Location:
- branches/2915-AbsoluteSymbol/HeuristicLab.Problems.DataAnalysis.Symbolic
- Files:
-
- 9 edited
- 7 copied
Legend:
- Unmodified
- Added
- Removed
-
branches/2915-AbsoluteSymbol/HeuristicLab.Problems.DataAnalysis.Symbolic
-
branches/2915-AbsoluteSymbol/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Converters/DerivativeCalculator.cs
r16240 r16304 21 21 22 22 using System; 23 using System.Collections.Generic; 23 24 using System.Linq; 24 25 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; … … 27 28 public static class DerivativeCalculator { 28 29 public static ISymbolicExpressionTree Derive(ISymbolicExpressionTree tree, string variableName) { 30 if (tree.Root.SubtreeCount != 1) 31 throw new NotImplementedException("Derive is not implemented for symbolic expressions with automatically defined functions (ADF)"); 32 if (tree.Root.GetSubtree(0).SubtreeCount != 1) 33 throw new NotImplementedException("Derive is not implemented for multi-variate symbolic expressions"); 29 34 var mainBranch = tree.Root.GetSubtree(0).GetSubtree(0); 30 35 var root = new ProgramRootSymbol().CreateTreeNode(); 31 36 root.AddSubtree(new StartSymbol().CreateTreeNode()); 32 37 var dTree = TreeSimplifier.GetSimplifiedTree(Derive(mainBranch, variableName)); 33 // 38 //var dTree = Derive(mainBranch, variableName); 34 39 root.GetSubtree(0).AddSubtree(dTree); 35 40 return new SymbolicExpressionTree(root); 36 41 } 37 42 38 private static Constant constantSy = new Constant(); 39 private static Addition addSy = new Addition(); 40 private static Subtraction subSy = new Subtraction(); 41 private static Multiplication mulSy = new Multiplication(); 42 private static Division divSy = new Division(); 43 private static readonly Constant constantSy = new Constant(); 44 private static readonly Addition addSy = new Addition(); 45 private static readonly Subtraction subSy = new Subtraction(); 46 private static readonly Multiplication mulSy = new Multiplication(); 47 private static readonly Division divSy = new Division(); 48 private static readonly Cosine cosSy = new Cosine(); 49 private static readonly Square sqrSy = new Square(); 43 50 44 51 public static ISymbolicExpressionTreeNode Derive(ISymbolicExpressionTreeNode branch, string variableName) { … … 85 92 } 86 93 return fgPrime; 87 } else throw new ArgumentException(); 94 } else 95 // multiplication with only one argument has no effect -> derive the argument 96 return Derive(branch.GetSubtree(0), variableName); 88 97 } 89 98 if (branch.Symbol is Division) { … … 95 104 sqrNode.AddSubtree(g); 96 105 return Div(gPrime, sqrNode); 97 } else if (branch.SubtreeCount == 2) { 106 } else { 107 // for two subtrees: 108 // (f/g)' = (f'g - fg')/g² 109 110 // if there are more than 2 subtrees 111 // div(x,y,z) is interpretered as (x/y)/z 112 // which is the same as x / (y*z) 113 114 // --> make a product of all but the first subtree and differentiate as for the 2-argument case above 98 115 var f = (ISymbolicExpressionTreeNode)branch.GetSubtree(0).Clone(); 99 var g = (ISymbolicExpressionTreeNode)branch.GetSubtree(1).Clone();116 var g = Product(branch.Subtrees.Skip(1).Select(n => (ISymbolicExpressionTreeNode)n.Clone())); 100 117 var fprime = Derive(f, variableName); 101 118 var gprime = Derive(g, variableName); 102 var sqrNode = new Square().CreateTreeNode(); 103 sqrNode.AddSubtree((ISymbolicExpressionTreeNode)branch.GetSubtree(1).Clone()); 104 return Div(Subtract(Product(fprime, g), Product(f, gprime)), sqrNode); 105 } else throw new NotSupportedException(); 119 var gSqr = Square(g); 120 return Div(Subtract(Product(fprime, g), Product(f, gprime)), gSqr); 121 } 106 122 } 107 123 if (branch.Symbol is Logarithm) { … … 113 129 return Product(f, Derive(branch.GetSubtree(0), variableName)); 114 130 } 115 if (branch.Symbol is Square) {131 if (branch.Symbol is Square) { 116 132 var f = (ISymbolicExpressionTreeNode)branch.GetSubtree(0).Clone(); 117 133 return Product(Product(CreateConstant(2.0), f), Derive(f, variableName)); 118 } 119 if (branch.Symbol is SquareRoot) {134 } 135 if (branch.Symbol is SquareRoot) { 120 136 var f = (ISymbolicExpressionTreeNode)branch.Clone(); 121 137 var u = (ISymbolicExpressionTreeNode)branch.GetSubtree(0).Clone(); … … 133 149 sin.AddSubtree(u); 134 150 return Product(CreateConstant(-1.0), Product(sin, Derive(u, variableName))); 151 } 152 if (branch.Symbol is Tangent) { 153 // tan(x)' = 1 / cos²(x) 154 var fxp = Derive(branch.GetSubtree(0), variableName); 155 var u = (ISymbolicExpressionTreeNode)branch.GetSubtree(0).Clone(); 156 return Div(fxp, Square(Cosine(u))); 135 157 } 136 158 throw new NotSupportedException(string.Format("Symbol {0} is not supported.", branch.Symbol)); … … 144 166 return product; 145 167 } 168 private static ISymbolicExpressionTreeNode Product(IEnumerable<ISymbolicExpressionTreeNode> fs) { 169 var product = mulSy.CreateTreeNode(); 170 foreach (var f in fs) product.AddSubtree(f); 171 return product; 172 } 146 173 private static ISymbolicExpressionTreeNode Div(ISymbolicExpressionTreeNode f, ISymbolicExpressionTreeNode g) { 147 174 var div = divSy.CreateTreeNode(); … … 163 190 return sum; 164 191 } 165 192 private static ISymbolicExpressionTreeNode Cosine(ISymbolicExpressionTreeNode f) { 193 var cos = cosSy.CreateTreeNode(); 194 cos.AddSubtree(f); 195 return cos; 196 } 197 private static ISymbolicExpressionTreeNode Square(ISymbolicExpressionTreeNode f) { 198 var sqr = sqrSy.CreateTreeNode(); 199 sqr.AddSubtree(f); 200 return sqr; 201 } 202 166 203 private static ISymbolicExpressionTreeNode CreateConstant(double v) { 167 204 var constNode = (ConstantTreeNode)constantSy.CreateTreeNode(); … … 186 223 !(n.Symbol is Sine) && 187 224 !(n.Symbol is Cosine) && 225 !(n.Symbol is Tangent) && 188 226 !(n.Symbol is StartSymbol) 189 227 select n).Any(); -
branches/2915-AbsoluteSymbol/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashExtensions.cs
r16218 r16304 32 32 33 33 public bool Enabled; 34 public intHashValue; // the initial (fixed) hash value for this individual node/data35 public intCalculatedHashValue; // the calculated hash value (taking into account the children hash values)34 public ulong HashValue; // the initial (fixed) hash value for this individual node/data 35 public ulong CalculatedHashValue; // the calculated hash value (taking into account the children hash values) 36 36 37 37 public Action<HashNode<T>[], int> Simplify; 38 38 public IComparer<T> Comparer; 39 39 40 public bool Is Child=> Arity == 0;40 public bool IsLeaf => Arity == 0; 41 41 42 42 public HashNode(IComparer<T> comparer) { … … 67 67 68 68 public override int GetHashCode() { 69 return CalculatedHashValue;69 return (int)CalculatedHashValue; 70 70 } 71 71 … … 79 79 } 80 80 81 public static int ComputeHash<T>(this HashNode<T>[] nodes, int i) where T : class {81 public static ulong ComputeHash<T>(this HashNode<T>[] nodes, int i, Func<byte[], ulong> hashFunction) where T : class { 82 82 var node = nodes[i]; 83 var hashes = new int[node.Arity + 1]; 84 int idx = 0; 85 foreach (int c in nodes.IterateChildren(i)) { 86 hashes[idx++] = nodes[c].CalculatedHashValue; 87 } 88 hashes[idx] = node.HashValue; 89 return HashUtil.JSHash(hashes); 90 } 91 92 public static HashNode<T>[] Simplify<T>(this HashNode<T>[] nodes) where T : class { 93 var reduced = nodes.Reduce(); 94 reduced.Sort(); 83 const int size = sizeof(ulong); 84 var hashes = new ulong[node.Arity + 1]; 85 var bytes = new byte[(node.Arity + 1) * size]; 86 87 for (int j = i - 1, k = 0; k < node.Arity; ++k, j -= 1 + nodes[j].Size) { 88 hashes[k] = nodes[j].CalculatedHashValue; 89 } 90 hashes[node.Arity] = node.HashValue; 91 Buffer.BlockCopy(hashes, 0, bytes, 0, bytes.Length); 92 return hashFunction(bytes); 93 } 94 95 // set the enabled state for the whole subtree rooted at this node 96 public static void SetEnabled<T>(this HashNode<T>[] nodes, int i, bool enabled) where T : class { 97 nodes[i].Enabled = enabled; 98 for (int j = i - nodes[i].Size; j < i; ++j) 99 nodes[j].Enabled = enabled; 100 } 101 102 public static HashNode<T>[] Simplify<T>(this HashNode<T>[] nodes, Func<byte[], ulong> hashFunction) where T : class { 103 var reduced = nodes.UpdateNodeSizes().Reduce().Sort(hashFunction); 95 104 96 105 for (int i = 0; i < reduced.Length; ++i) { 97 106 var node = reduced[i]; 98 if (node.Is Child) {107 if (node.IsLeaf) { 99 108 continue; 100 109 } … … 117 126 } 118 127 } 119 simplified.UpdateNodeSizes();120 simplified.Sort();121 return simplified; 122 }123 124 public static void Sort<T>(this HashNode<T>[] nodes) where T : class { 128 return simplified.UpdateNodeSizes().Reduce().Sort(hashFunction); 129 } 130 131 public static HashNode<T>[] Sort<T>(this HashNode<T>[] nodes, Func<byte[], ulong> hashFunction) where T : class { 132 int sort(int a, int b) => nodes[a].CompareTo(nodes[b]); 133 125 134 for (int i = 0; i < nodes.Length; ++i) { 126 135 var node = nodes[i]; 127 136 128 if (node.Is Child) {137 if (node.IsLeaf) { 129 138 continue; 130 139 } … … 138 147 } else { // i have some non-terminal children 139 148 var sorted = new HashNode<T>[size]; 140 var indices = nodes.IterateChildren(i); 141 Array.Sort(indices, (a, b) => nodes[a].CompareTo(nodes[b])); 149 var indices = new int[node.Arity]; 150 for (int j = i - 1, k = 0; k < node.Arity; j -= 1 + nodes[j].Size, ++k) { 151 indices[k] = j; 152 } 153 Array.Sort(indices, sort); 142 154 143 155 int idx = 0; 144 156 foreach (var j in indices) { 145 157 var child = nodes[j]; 146 if (!child.Is Child) { // must copy complete subtree158 if (!child.IsLeaf) { // must copy complete subtree 147 159 Array.Copy(nodes, j - child.Size, sorted, idx, child.Size); 148 160 idx += child.Size; … … 153 165 } 154 166 } 155 node.CalculatedHashValue = nodes.ComputeHash(i); 156 } 167 node.CalculatedHashValue = nodes.ComputeHash(i, hashFunction); 168 } 169 return nodes; 157 170 } 158 171 159 172 /// <summary> 160 /// Get a function node's child indices from left to right173 /// Get a function node's child indicest 161 174 /// </summary> 162 /// <typeparam name="T"> </typeparam>163 /// <param name="nodes"> </param>164 /// <param name="i"> </param>175 /// <typeparam name="T">The data type encapsulated by a hash node</typeparam> 176 /// <param name="nodes">An array of hash nodes with up-to-date node sizes</param> 177 /// <param name="i">The index in the array of hash nodes of the node whose children we want to iterate</param> 165 178 /// <returns>An array containing child indices</returns> 166 179 public static int[] IterateChildren<T>(this HashNode<T>[] nodes, int i) where T : class { … … 170 183 var idx = i - 1; 171 184 for (int j = 0; j < arity; ++j) { 172 while (!nodes[idx].Enabled) { --idx; } // skip nodes possibly disabled by simplification173 185 children[j] = idx; 174 186 idx -= 1 + nodes[idx].Size; … … 177 189 } 178 190 179 public static voidUpdateNodeSizes<T>(this HashNode<T>[] nodes) where T : class {191 public static HashNode<T>[] UpdateNodeSizes<T>(this HashNode<T>[] nodes) where T : class { 180 192 for (int i = 0; i < nodes.Length; ++i) { 181 193 var node = nodes[i]; 182 if (node.Is Child) {194 if (node.IsLeaf) { 183 195 node.Size = 0; 184 196 continue; 185 197 } 186 198 node.Size = node.Arity; 187 foreach (int j in nodes.IterateChildren(i)) { 199 200 for (int j = i - 1, k = 0; k < node.Arity; j -= 1 + nodes[j].Size, ++k) { 188 201 node.Size += nodes[j].Size; 189 202 } 190 203 } 191 } 192 193 /// <summary> 194 /// Reduce nodes of the same type according to arithmetic rules 195 /// </summary> 196 /// <typeparam name="T"></typeparam> 197 /// <param name="g">The given grammar (controls symbol arities)</param> 198 /// <param name="nodes">The node array</param> 199 /// <param name="i">Parent index</param> 200 /// <param name="j">Child index</param> 201 private static void Reduce<T>(this HashNode<T>[] nodes, int i, int j) where T : class { 202 var p = nodes[i]; // parent node 203 var c = nodes[j]; // child node 204 205 if (c.IsChild) 206 return; 207 208 foreach (int k in nodes.IterateChildren(j)) { 209 if (nodes[k].IsChild) continue; 210 nodes.Reduce(j, k); 211 } 212 213 // handle commutative symbols (add, mul) 214 if (p.IsCommutative && p.HashValue == c.HashValue) { 215 c.Enabled = false; 216 p.Arity += c.Arity - 1; 217 } 218 } 219 220 public static HashNode<T>[] Reduce<T>(this HashNode<T>[] nodes) where T : class { 221 nodes.UpdateNodeSizes(); 222 int i = nodes.Length - 1; 223 foreach (int c in nodes.IterateChildren(i)) { 224 if (nodes[c].IsChild) continue; 225 nodes.Reduce(i, c); 226 } 204 return nodes; 205 } 206 207 private static HashNode<T>[] Reduce<T>(this HashNode<T>[] nodes) where T : class { 227 208 int count = 0; 228 foreach (var node in nodes) { 229 if (!node.Enabled) { ++count; } 209 for (int i = 0; i < nodes.Length; ++i) { 210 var node = nodes[i]; 211 if (node.IsLeaf || !node.IsCommutative) { 212 continue; 213 } 214 215 var arity = node.Arity; 216 for (int j = i - 1, k = 0; k < arity; j -= 1 + nodes[j].Size, ++k) { 217 if (node.HashValue == nodes[j].HashValue) { 218 nodes[j].Enabled = false; 219 node.Arity += nodes[j].Arity - 1; 220 ++count; 221 } 222 } 230 223 } 231 224 if (count == 0) … … 233 226 234 227 var reduced = new HashNode<T>[nodes.Length - count]; 235 i= 0;228 var idx = 0; 236 229 foreach (var node in nodes) { 237 if (node.Enabled) { reduced[i++] = node; } 238 } 239 reduced.UpdateNodeSizes(); 240 return reduced; 230 if (node.Enabled) { reduced[idx++] = node; } 231 } 232 return reduced.UpdateNodeSizes(); 241 233 } 242 234 } -
branches/2915-AbsoluteSymbol/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashUtil.cs
r16218 r16304 20 20 #endregion 21 21 22 22 23 using System; 23 24 using System.Security.Cryptography; … … 28 29 29 30 // A simple hash function from Robert Sedgwicks Algorithms in C book.I've added some simple optimizations to the algorithm in order to speed up its hashing process. 30 public static int RSHash(int[] input) {31 public static ulong RSHash(byte[] input) { 31 32 const int b = 378551; 32 inta = 63689;33 inthash = 0;33 ulong a = 63689; 34 ulong hash = 0; 34 35 35 36 foreach (var v in input) { … … 41 42 42 43 // A bitwise hash function written by Justin Sobel 43 public static int JSHash(int[] input) {44 inthash = 1315423911;44 public static ulong JSHash(byte[] input) { 45 ulong hash = 1315423911; 45 46 for (int i = 0; i < input.Length; ++i) 46 47 hash ^= (hash << 5) + input[i] + (hash >> 2); … … 49 50 50 51 // This hash function comes from Brian Kernighan and Dennis Ritchie's book "The C Programming Language". It is a simple hash function using a strange set of possible seeds which all constitute a pattern of 31....31...31 etc, it seems to be very similar to the DJB hash function. 51 public static int BKDRHash(int[] input) {52 const intseed = 131;53 inthash = 0;52 public static ulong BKDRHash(byte[] input) { 53 ulong seed = 131; 54 ulong hash = 0; 54 55 foreach (var v in input) { 55 56 hash = (hash * seed) + v; … … 59 60 60 61 // This is the algorithm of choice which is used in the open source SDBM project. The hash function seems to have a good over-all distribution for many different data sets. It seems to work well in situations where there is a high variance in the MSBs of the elements in a data set. 61 public static int SDBMHash(int[] input) {62 inthash = 0;62 public static ulong SDBMHash(byte[] input) { 63 ulong hash = 0; 63 64 foreach (var v in input) { 64 65 hash = v + (hash << 6) + (hash << 16) - hash; … … 68 69 69 70 // An algorithm produced by Professor Daniel J. Bernstein and shown first to the world on the usenet newsgroup comp.lang.c. It is one of the most efficient hash functions ever published. 70 public static int DJBHash(int[] input) {71 inthash = 5381;71 public static ulong DJBHash(byte[] input) { 72 ulong hash = 5381; 72 73 foreach (var v in input) { 73 74 hash = (hash << 5) + hash + v; … … 77 78 78 79 // An algorithm proposed by Donald E.Knuth in The Art Of Computer Programming Volume 3, under the topic of sorting and search chapter 6.4. 79 public static int DEKHash(int[] input) {80 int hash =input.Length;80 public static ulong DEKHash(byte[] input) { 81 ulong hash = (ulong)input.Length; 81 82 foreach (var v in input) { 82 83 hash = (hash << 5) ^ (hash >> 27) ^ v; … … 85 86 } 86 87 87 public static int CryptoHash(HashAlgorithm ha, int[] input) { 88 return BitConverter.ToInt32(ha.ComputeHash(input.ToByteArray()), 0); 89 } 90 91 public static byte[] ToByteArray(this int[] input) { 92 const int size = sizeof(int); 93 var bytes = new byte[input.Length * sizeof(int)]; 94 for (int i = 0; i < input.Length; ++i) { 95 Array.Copy(BitConverter.GetBytes(bytes[i]), 0, bytes, i * size, size); 96 } 97 return bytes; 88 public static ulong CryptoHash(HashAlgorithm ha, byte[] input) { 89 return BitConverter.ToUInt64(ha.ComputeHash(input), 0); 98 90 } 99 91 } -
branches/2915-AbsoluteSymbol/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/SymbolicExpressionTreeHash.cs
r16218 r16304 1 using System.Collections.Generic; 1 #region License Information 2 /* HeuristicLab 3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL) 4 * 5 * This file is part of HeuristicLab. 6 * 7 * HeuristicLab is free software: you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation, either version 3 of the License, or 10 * (at your option) any later version. 11 * 12 * HeuristicLab is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>. 19 */ 20 #endregion 21 22 using System; 2 23 using System.Linq; 3 24 using HeuristicLab.Encodings.SymbolicExpressionTreeEncoding; … … 18 39 private static readonly ISymbolicExpressionTreeNodeComparer comparer = new SymbolicExpressionTreeNodeComparer(); 19 40 20 public static intComputeHash(this ISymbolicExpressionTree tree) {41 public static ulong ComputeHash(this ISymbolicExpressionTree tree) { 21 42 return ComputeHash(tree.Root.GetSubtree(0).GetSubtree(0)); 22 43 } 23 44 24 public static Dictionary<ISymbolicExpressionTreeNode, int> ComputeNodeHashes(this ISymbolicExpressionTree tree) { 25 var root = tree.Root.GetSubtree(0).GetSubtree(0); 26 var nodes = root.MakeNodes(); 27 nodes.UpdateNodeSizes(); 28 29 for (int i = 0; i < nodes.Length; ++i) { 30 if (nodes[i].IsChild) 31 continue; 32 nodes[i].CalculatedHashValue = nodes.ComputeHash(i); 33 } 34 return nodes.ToDictionary(x => x.Data, x => x.CalculatedHashValue); 35 } 36 37 public static int ComputeHash(this ISymbolicExpressionTreeNode treeNode) { 38 var hashNodes = treeNode.MakeNodes(); 39 var simplified = hashNodes.Simplify(); 40 return ComputeHash(simplified); 41 } 42 43 public static int ComputeHash(this HashNode<ISymbolicExpressionTreeNode>[] nodes) { 44 int hash = 1315423911; 45 foreach (var node in nodes) 46 hash ^= (hash << 5) + node.CalculatedHashValue + (hash >> 2); 47 return hash; 48 } 49 50 public static HashNode<ISymbolicExpressionTreeNode> ToHashNode(this ISymbolicExpressionTreeNode node) { 45 public static double ComputeSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool simplify = false) { 46 return ComputeSimilarity(t1.Root.GetSubtree(0).GetSubtree(0), t2.Root.GetSubtree(0).GetSubtree(0), simplify); 47 } 48 49 public static double ComputeSimilarity(ISymbolicExpressionTreeNode t1, ISymbolicExpressionTreeNode t2, bool simplify = false) { 50 HashNode<ISymbolicExpressionTreeNode>[] lhs; 51 HashNode<ISymbolicExpressionTreeNode>[] rhs; 52 53 ulong hashFunction(byte[] input) => HashUtil.DJBHash(input); 54 55 if (simplify) { 56 lhs = t1.MakeNodes().Simplify(hashFunction); 57 rhs = t2.MakeNodes().Simplify(hashFunction); 58 } else { 59 lhs = t1.MakeNodes().Sort(hashFunction); // sort calculates hash values 60 rhs = t2.MakeNodes().Sort(hashFunction); 61 } 62 63 var lh = lhs.Select(x => x.CalculatedHashValue).ToArray(); 64 var rh = rhs.Select(x => x.CalculatedHashValue).ToArray(); 65 66 Array.Sort(lh); 67 Array.Sort(rh); 68 69 return ComputeSimilarity(lh, rh); 70 } 71 72 // this will only work if lh and rh are sorted 73 private static double ComputeSimilarity(ulong[] lh, ulong[] rh) { 74 double count = 0; 75 for (int i = 0, j = 0; i < lh.Length && j < rh.Length;) { 76 var h1 = lh[i]; 77 var h2 = rh[j]; 78 if (h1 == h2) { 79 ++count; 80 ++i; 81 ++j; 82 } else if (h1 < h2) { 83 ++i; 84 } else if (h1 > h2) { 85 ++j; 86 } 87 } 88 89 return 2d * count / (lh.Length + rh.Length); 90 } 91 92 public static double ComputeAverageSimilarity(ISymbolicExpressionTree[] trees, bool simplify = false, bool strict = false) { 93 var total = (double)trees.Length * (trees.Length - 1) / 2; 94 double avg = 0; 95 var hashes = new ulong[trees.Length][]; 96 // build hash arrays 97 for (int i = 0; i < trees.Length; ++i) { 98 var nodes = trees[i].MakeNodes(strict); 99 hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray(); 100 Array.Sort(hashes[i]); 101 } 102 // compute similarity matrix 103 for (int i = 0; i < trees.Length - 1; ++i) { 104 for (int j = i + 1; j < trees.Length; ++j) { 105 avg += ComputeSimilarity(hashes[i], hashes[j]); 106 } 107 } 108 return avg / total; 109 } 110 111 public static double[,] ComputeSimilarityMatrix(ISymbolicExpressionTree[] trees, bool simplify = false, bool strict = false) { 112 var sim = new double[trees.Length, trees.Length]; 113 var hashes = new ulong[trees.Length][]; 114 // build hash arrays 115 for (int i = 0; i < trees.Length; ++i) { 116 var nodes = trees[i].MakeNodes(strict); 117 hashes[i] = (simplify ? nodes.Simplify(HashUtil.DJBHash) : nodes.Sort(HashUtil.DJBHash)).Select(x => x.CalculatedHashValue).ToArray(); 118 Array.Sort(hashes[i]); 119 } 120 // compute similarity matrix 121 for (int i = 0; i < trees.Length - 1; ++i) { 122 for (int j = i + 1; j < trees.Length; ++j) { 123 sim[i, j] = sim[j, i] = ComputeSimilarity(hashes[i], hashes[j]); 124 } 125 } 126 return sim; 127 } 128 129 public static ulong ComputeHash(this ISymbolicExpressionTreeNode treeNode, bool strict = false) { 130 ulong hashFunction(byte[] input) => HashUtil.JSHash(input); 131 var hashNodes = treeNode.MakeNodes(strict); 132 var simplified = hashNodes.Simplify(hashFunction); 133 return simplified.Last().CalculatedHashValue; 134 } 135 136 public static HashNode<ISymbolicExpressionTreeNode> ToHashNode(this ISymbolicExpressionTreeNode node, bool strict = false) { 51 137 var symbol = node.Symbol; 52 138 var name = symbol.Name; 53 if (symbol is Variable) { 54 var variableTreeNode = (VariableTreeNode)node; 55 name = variableTreeNode.VariableName; 56 } 57 var hash = name.GetHashCode(); 139 if (node is ConstantTreeNode constantNode) { 140 name = strict ? constantNode.Value.ToString() : symbol.Name; 141 } else if (node is VariableTreeNode variableNode) { 142 name = strict ? variableNode.Weight.ToString() + variableNode.VariableName : variableNode.VariableName; 143 } 144 var hash = (ulong)name.GetHashCode(); 58 145 var hashNode = new HashNode<ISymbolicExpressionTreeNode>(comparer) { 59 146 Data = node, … … 79 166 } 80 167 81 public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTreeNode node) { 82 return node.IterateNodesPostfix().Select(ToHashNode).ToArray(); 168 public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTree tree, bool strict = false) { 169 return MakeNodes(tree.Root.GetSubtree(0).GetSubtree(0), strict); 170 } 171 172 public static HashNode<ISymbolicExpressionTreeNode>[] MakeNodes(this ISymbolicExpressionTreeNode node, bool strict = false) { 173 return node.IterateNodesPostfix().Select(x => x.ToHashNode(strict)).ToArray().UpdateNodeSizes(); 83 174 } 84 175 … … 98 189 var node = nodes[i]; 99 190 100 if (node.Is Child) {191 if (node.IsLeaf) { 101 192 if (node.Data is VariableTreeNode variable) { 102 193 var variableTreeNode = (VariableTreeNode)treeNodes[i]; 103 194 variableTreeNode.VariableName = variable.VariableName; 104 variableTreeNode.Weight = 1;195 variableTreeNode.Weight = variable.Weight; 105 196 } else if (node.Data is ConstantTreeNode @const) { 106 197 var constantTreeNode = (ConstantTreeNode)treeNodes[i]; … … 129 220 // (in other words simplification should be applied in a bottom-up fashion) 130 221 public static ISymbolicExpressionTree Simplify(ISymbolicExpressionTree tree) { 222 ulong hashFunction(byte[] bytes) => HashUtil.JSHash(bytes); 131 223 var root = tree.Root.GetSubtree(0).GetSubtree(0); 132 224 var nodes = root.MakeNodes(); 133 var simplified = nodes.Simplify( );225 var simplified = nodes.Simplify(hashFunction); 134 226 return simplified.ToTree(); 135 227 } … … 174 266 var d = children[k]; 175 267 if (nodes[d].Data.Symbol is Constant) { 176 ((ConstantTreeNode)child.Data).Value *= ((ConstantTreeNode)nodes[d].Data).Value;177 268 nodes[d].Enabled = false; 178 269 node.Arity--; -
branches/2915-AbsoluteSymbol/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/HeuristicLab.Problems.DataAnalysis.Symbolic-3.4.csproj
r16240 r16304 107 107 <Private>False</Private> 108 108 </Reference> 109 <Reference Include="HeuristicLab.Problems.DataAnalysis.Symbolic.NativeInterpreter-0.1, Version=0.0.0.1, Culture=neutral, PublicKeyToken=ba48961d6f65dcec, processorArchitecture=MSIL"> 110 <SpecificVersion>False</SpecificVersion> 111 <HintPath>..\..\bin\HeuristicLab.Problems.DataAnalysis.Symbolic.NativeInterpreter-0.1.dll</HintPath> 112 <Private>False</Private> 113 </Reference> 109 114 <Reference Include="System" /> 110 115 <Reference Include="System.Core"> … … 123 128 <ItemGroup> 124 129 <Compile Include="Analyzers\SymbolicDataAnalysisBottomUpDiversityAnalyzer.cs" /> 130 <Compile Include="Analyzers\SymbolicDataAnalysisBuildingBlockAnalyzer.cs" /> 125 131 <Compile Include="Analyzers\SymbolicDataAnalysisSingleObjectivePruningAnalyzer.cs" /> 126 132 <Compile Include="Analyzers\SymbolicDataAnalysisSingleObjectiveValidationParetoBestSolutionAnalyzer.cs" /> … … 141 147 <Compile Include="Converters\DerivativeCalculator.cs" /> 142 148 <Compile Include="Converters\TreeToAutoDiffTermConverter.cs" /> 149 <Compile Include="Crossovers\SymbolicDataAnalysisExpressionDiversityPreservingCrossover.cs" /> 143 150 <Compile Include="Formatters\InfixExpressionFormatter.cs" /> 144 151 <Compile Include="Formatters\TSQLExpressionFormatter.cs" /> … … 154 161 <Compile Include="Interfaces\IVariableTreeNode.cs" /> 155 162 <Compile Include="Interfaces\IVariableSymbol.cs" /> 163 <Compile Include="Interpreter\BatchInstruction.cs" /> 164 <Compile Include="Interpreter\BatchOperations.cs" /> 156 165 <Compile Include="Interpreter\SymbolicDataAnalysisExpressionCompiledTreeInterpreter.cs" /> 166 <Compile Include="Interpreter\SymbolicDataAnalysisExpressionTreeBatchInterpreter.cs" /> 167 <Compile Include="Interpreter\SymbolicDataAnalysisExpressionTreeNativeInterpreter.cs" /> 157 168 <Compile Include="SymbolicDataAnalysisExpressionTreeSimplificationOperator.cs" /> 158 169 <Compile Include="SymbolicDataAnalysisModelComplexityCalculator.cs" /> -
branches/2915-AbsoluteSymbol/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Plugin.cs.frame
r15589 r16304 37 37 [PluginDependency("HeuristicLab.Data", "3.3")] 38 38 [PluginDependency("HeuristicLab.Encodings.SymbolicExpressionTreeEncoding", "3.4")] 39 [PluginDependency("HeuristicLab.NativeInterpreter", "0.1")] 39 40 [PluginDependency("HeuristicLab.Operators", "3.3")] 40 41 [PluginDependency("HeuristicLab.Optimization", "3.3")] -
branches/2915-AbsoluteSymbol/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/SymbolicDataAnalysisModel.cs
r15583 r16304 34 34 /// </summary> 35 35 [StorableClass] 36 public abstract class SymbolicDataAnalysisModel : NamedItem, ISymbolicDataAnalysisModel {36 public abstract class SymbolicDataAnalysisModel : DataAnalysisModel, ISymbolicDataAnalysisModel { 37 37 public static new Image StaticItemImage { 38 38 get { return HeuristicLab.Common.Resources.VSImageLibrary.Function; } … … 59 59 } 60 60 61 public IEnumerable<string> VariablesUsedForPrediction {61 public override IEnumerable<string> VariablesUsedForPrediction { 62 62 get { 63 63 var variables = … … 118 118 ConstantTreeNode alphaTreeNode = null; 119 119 ConstantTreeNode betaTreeNode = null; 120 // check if model has been scaled previously by analyzing the structure of the tree120 // check if model has a structure that can be re-used for scaling 121 121 var startNode = SymbolicExpressionTree.Root.GetSubtree(0); 122 if (startNode.GetSubtree(0).Symbol is Addition) { 123 var addNode = startNode.GetSubtree(0); 124 if (addNode.SubtreeCount == 2 && addNode.GetSubtree(0).Symbol is Multiplication && addNode.GetSubtree(1).Symbol is Constant) { 125 alphaTreeNode = addNode.GetSubtree(1) as ConstantTreeNode; 126 var mulNode = addNode.GetSubtree(0); 127 if (mulNode.SubtreeCount == 2 && mulNode.GetSubtree(1).Symbol is Constant) { 128 betaTreeNode = mulNode.GetSubtree(1) as ConstantTreeNode; 129 } 122 var addNode = startNode.GetSubtree(0); 123 if (addNode.Symbol is Addition && addNode.SubtreeCount == 2) { 124 alphaTreeNode = (ConstantTreeNode)addNode.Subtrees.LastOrDefault(n => n is ConstantTreeNode); 125 var mulNode = addNode.Subtrees.FirstOrDefault(n => n.Symbol is Multiplication); 126 if (mulNode != null) { 127 betaTreeNode = (ConstantTreeNode)mulNode.Subtrees.LastOrDefault(n => n is ConstantTreeNode); 130 128 } 131 129 } -
branches/2915-AbsoluteSymbol/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/TreeMatching/SymbolicExpressionTreeBottomUpSimilarityCalculator.cs
r15583 r16304 22 22 using System; 23 23 using System.Collections.Generic; 24 using System.Diagnostics;25 24 using System.Globalization; 26 25 using System.Linq; … … 31 30 using HeuristicLab.Persistence.Default.CompositeSerializers.Storable; 32 31 32 using NodeMap = System.Collections.Generic.Dictionary<HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.ISymbolicExpressionTreeNode, HeuristicLab.Encodings.SymbolicExpressionTreeEncoding.ISymbolicExpressionTreeNode>; 33 33 34 namespace HeuristicLab.Problems.DataAnalysis.Symbolic { 34 35 [StorableClass] … … 40 41 protected override bool IsCommutative { get { return true; } } 41 42 43 public bool MatchConstantValues { get; set; } 44 public bool MatchVariableWeights { get; set; } 45 42 46 [StorableConstructor] 43 47 protected SymbolicExpressionTreeBottomUpSimilarityCalculator(bool deserializing) … … 53 57 } 54 58 59 #region static methods 60 private static ISymbolicExpressionTreeNode ActualRoot(ISymbolicExpressionTree tree) { 61 return tree.Root.GetSubtree(0).GetSubtree(0); 62 } 63 64 public static double CalculateSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool strict = false) { 65 return CalculateSimilarity(ActualRoot(t1), ActualRoot(t2), strict); 66 } 67 68 public static double CalculateSimilarity(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2, bool strict = false) { 69 var calculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator { MatchConstantValues = strict, MatchVariableWeights = strict }; 70 return CalculateSimilarity(n1, n2, strict); 71 } 72 73 public static Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> ComputeBottomUpMapping(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, bool strict = false) { 74 return ComputeBottomUpMapping(ActualRoot(t1), ActualRoot(t2), strict); 75 } 76 77 public static Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> ComputeBottomUpMapping(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2, bool strict = false) { 78 var calculator = new SymbolicExpressionTreeBottomUpSimilarityCalculator { MatchConstantValues = strict, MatchVariableWeights = strict }; 79 return calculator.ComputeBottomUpMapping(n1, n2); 80 } 81 #endregion 82 55 83 public double CalculateSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) { 56 if (t1 == t2) 84 return CalculateSimilarity(t1, t2, out Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> map); 85 } 86 87 public double CalculateSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2, out NodeMap map) { 88 if (t1 == t2) { 89 map = null; 57 90 return 1; 58 59 var map = ComputeBottomUpMapping(t1.Root, t2.Root);60 return 2.0 * map.Count / (t1.Length + t2.Length );91 } 92 map = ComputeBottomUpMapping(t1, t2); 93 return 2.0 * map.Count / (t1.Length + t2.Length - 4); // -4 for skipping root and start symbols in the two trees 61 94 } 62 95 … … 78 111 } 79 112 113 public Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> ComputeBottomUpMapping(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) { 114 return ComputeBottomUpMapping(t1.Root.GetSubtree(0).GetSubtree(0), t2.Root.GetSubtree(0).GetSubtree(0)); 115 } 116 80 117 public Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode> ComputeBottomUpMapping(ISymbolicExpressionTreeNode n1, ISymbolicExpressionTreeNode n2) { 81 var comparer = new SymbolicExpressionTreeNodeComparer(); // use a node comparer because it's faster than calling node.ToString() (strings are expensive) and comparing strings82 118 var compactedGraph = Compact(n1, n2); 83 119 84 var forwardMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t1 => nodes of t2 85 var reverseMap = new Dictionary<ISymbolicExpressionTreeNode, ISymbolicExpressionTreeNode>(); // nodes of t2 => nodes of t1 86 87 // visit nodes in order of decreasing height to ensure correct mapping 88 var nodes1 = n1.IterateNodesPrefix().OrderByDescending(x => x.GetDepth()).ToList(); 89 var nodes2 = n2.IterateNodesPrefix().ToList(); 90 for (int i = 0; i < nodes1.Count; ++i) { 91 var v = nodes1[i]; 92 if (forwardMap.ContainsKey(v)) 120 IEnumerable<ISymbolicExpressionTreeNode> Subtrees(ISymbolicExpressionTreeNode node, bool commutative) { 121 var subtrees = node.IterateNodesPrefix(); 122 return commutative ? subtrees.OrderBy(x => compactedGraph[x].Hash) : subtrees; 123 } 124 125 var nodes1 = n1.IterateNodesPostfix().OrderByDescending(x => x.GetLength()); // by descending length so that largest subtrees are mapped first 126 var nodes2 = (List<ISymbolicExpressionTreeNode>)n2.IterateNodesPostfix(); 127 128 var forward = new NodeMap(); 129 var reverse = new NodeMap(); 130 131 foreach (ISymbolicExpressionTreeNode v in nodes1) { 132 if (forward.ContainsKey(v)) 93 133 continue; 134 94 135 var kv = compactedGraph[v]; 95 ISymbolicExpressionTreeNode w = null;96 for (int j = 0; j < nodes2.Count; ++j) { 97 var t = nodes2[j];98 if ( reverseMap.ContainsKey(t) || compactedGraph[t] != kv)136 var commutative = v.SubtreeCount > 1 && commutativeSymbols.Contains(kv.Label); 137 138 foreach (ISymbolicExpressionTreeNode w in nodes2) { 139 if (w.GetLength() != kv.Length || w.GetDepth() != kv.Depth || reverse.ContainsKey(w) || compactedGraph[w] != kv) 99 140 continue; 100 w = t; 141 142 // map one whole subtree to the other 143 foreach (var t in Subtrees(v, commutative).Zip(Subtrees(w, commutative), Tuple.Create)) { 144 forward[t.Item1] = t.Item2; 145 reverse[t.Item2] = t.Item1; 146 } 147 101 148 break; 102 149 } 103 if (w == null) continue; 104 105 // at this point we know that v and w are isomorphic, however, the mapping cannot be done directly 106 // (as in the paper) because the trees are unordered (subtree order might differ). the solution is 107 // to sort subtrees from under commutative labels (this will work because the subtrees are isomorphic!) 108 // while iterating over the two subtrees 109 var vv = IterateBreadthOrdered(v, comparer).ToList(); 110 var ww = IterateBreadthOrdered(w, comparer).ToList(); 111 int len = Math.Min(vv.Count, ww.Count); 112 for (int j = 0; j < len; ++j) { 113 var s = vv[j]; 114 var t = ww[j]; 115 Debug.Assert(!reverseMap.ContainsKey(t)); 116 117 forwardMap[s] = t; 118 reverseMap[t] = s; 119 } 120 } 121 122 return forwardMap; 150 } 151 152 return forward; 123 153 } 124 154 … … 132 162 var nodeMap = new Dictionary<ISymbolicExpressionTreeNode, GraphNode>(); // K 133 163 var labelMap = new Dictionary<string, GraphNode>(); // L 134 var childrenCount = new Dictionary<ISymbolicExpressionTreeNode, int>(); // Children135 164 136 165 var nodes = n1.IterateNodesPostfix().Concat(n2.IterateNodesPostfix()); // the disjoint union F 137 var list = new List<GraphNode>(); 138 var queue = new Queue<ISymbolicExpressionTreeNode>(); 139 140 foreach (var n in nodes) { 141 if (n.SubtreeCount == 0) { 142 var label = GetLabel(n); 166 var graph = new List<GraphNode>(); 167 168 IEnumerable<GraphNode> Subtrees(GraphNode g, bool commutative) { 169 var subtrees = g.SymbolicExpressionTreeNode.Subtrees.Select(x => nodeMap[x]); 170 return commutative ? subtrees.OrderBy(x => x.Hash) : subtrees; 171 } 172 173 foreach (var node in nodes) { 174 var label = GetLabel(node); 175 176 if (node.SubtreeCount == 0) { 143 177 if (!labelMap.ContainsKey(label)) { 144 var z = new GraphNode { SymbolicExpressionTreeNode = n, Label = label }; 145 labelMap[z.Label] = z; 146 } 147 nodeMap[n] = labelMap[label]; 148 queue.Enqueue(n); 178 labelMap[label] = new GraphNode(node, label); 179 } 180 nodeMap[node] = labelMap[label]; 149 181 } else { 150 childrenCount[n] = n.SubtreeCount; 151 } 152 } 153 while (queue.Any()) { 154 var n = queue.Dequeue(); 155 if (n.SubtreeCount > 0) { 182 var v = new GraphNode(node, label); 156 183 bool found = false; 157 var label = n.Symbol.Name; 158 var depth = n.GetDepth(); 159 160 bool sort = n.SubtreeCount > 1 && commutativeSymbols.Contains(label); 161 var nSubtrees = n.Subtrees.Select(x => nodeMap[x]).ToList(); 162 if (sort) nSubtrees.Sort((a, b) => string.CompareOrdinal(a.Label, b.Label)); 163 164 for (int i = list.Count - 1; i >= 0; --i) { 165 var w = list[i]; 166 if (!(n.SubtreeCount == w.SubtreeCount && label == w.Label && depth == w.Depth)) 184 var commutative = node.SubtreeCount > 1 && commutativeSymbols.Contains(label); 185 186 var vv = Subtrees(v, commutative); 187 188 foreach (var w in graph) { 189 if (v.Depth != w.Depth || v.SubtreeCount != w.SubtreeCount || v.Length != w.Length || v.Label != w.Label) { 167 190 continue; 168 169 // sort V and W when the symbol is commutative because we are dealing with unordered trees 170 var m = w.SymbolicExpressionTreeNode; 171 var mSubtrees = m.Subtrees.Select(x => nodeMap[x]).ToList(); 172 if (sort) mSubtrees.Sort((a, b) => string.CompareOrdinal(a.Label, b.Label)); 173 174 found = nSubtrees.SequenceEqual(mSubtrees); 191 } 192 193 var ww = Subtrees(w, commutative); 194 found = vv.SequenceEqual(ww); 195 175 196 if (found) { 176 nodeMap[n ] = w;197 nodeMap[node] = w; 177 198 break; 178 199 } 179 200 } 180 181 201 if (!found) { 182 var w = new GraphNode { SymbolicExpressionTreeNode = n, Label = label, Depth = depth }; 183 list.Add(w); 184 nodeMap[n] = w; 202 nodeMap[node] = v; 203 graph.Add(v); 185 204 } 186 205 } 187 188 if (n == n1 || n == n2) 189 continue; 190 191 var p = n.Parent; 192 if (p == null) 193 continue; 194 195 childrenCount[p]--; 196 197 if (childrenCount[p] == 0) 198 queue.Enqueue(p); 199 } 200 206 } 201 207 return nodeMap; 202 208 } 203 209 204 private IEnumerable<ISymbolicExpressionTreeNode> IterateBreadthOrdered(ISymbolicExpressionTreeNode node, ISymbolicExpressionTreeNodeComparer comparer) { 205 var list = new List<ISymbolicExpressionTreeNode> { node }; 206 int i = 0; 207 while (i < list.Count) { 208 var n = list[i]; 209 if (n.SubtreeCount > 0) { 210 var subtrees = commutativeSymbols.Contains(node.Symbol.Name) ? n.Subtrees.OrderBy(x => x, comparer) : n.Subtrees; 211 list.AddRange(subtrees); 212 } 213 i++; 214 } 215 return list; 216 } 217 218 private static string GetLabel(ISymbolicExpressionTreeNode node) { 210 private string GetLabel(ISymbolicExpressionTreeNode node) { 219 211 if (node.SubtreeCount > 0) 220 212 return node.Symbol.Name; 221 213 222 var constant = node as ConstantTreeNode; 223 if (constant != null) 224 return constant.Value.ToString(CultureInfo.InvariantCulture); 225 226 var variable = node as VariableTreeNode; 227 if (variable != null) 228 return variable.Weight + variable.VariableName; 214 if (node is ConstantTreeNode constant) 215 return MatchConstantValues ? constant.Value.ToString(CultureInfo.InvariantCulture) : constant.Symbol.Name; 216 217 if (node is VariableTreeNode variable) 218 return MatchVariableWeights ? variable.Weight + variable.VariableName : variable.VariableName; 229 219 230 220 return node.ToString(); … … 232 222 233 223 private class GraphNode { 234 public ISymbolicExpressionTreeNode SymbolicExpressionTreeNode; 235 public string Label; 236 public int Depth; 224 private GraphNode() { } 225 226 public GraphNode(ISymbolicExpressionTreeNode node, string label) { 227 SymbolicExpressionTreeNode = node; 228 Label = label; 229 Hash = GetHashCode(); 230 Depth = node.GetDepth(); 231 Length = node.GetLength(); 232 } 233 234 public int Hash { get; } 235 public ISymbolicExpressionTreeNode SymbolicExpressionTreeNode { get; } 236 public string Label { get; } 237 public int Depth { get; } 237 238 public int SubtreeCount { get { return SymbolicExpressionTreeNode.SubtreeCount; } } 239 public int Length { get; } 238 240 } 239 241 }
Note: See TracChangeset
for help on using the changeset viewer.