source: trunk/HeuristicLab.Problems.DataAnalysis.Symbolic/3.4/Hashing/HashExtensions.cs @ 16218

Last change on this file since 16218 was 16218, checked in by bburlacu, 2 years ago

#2950: Initial commit of hashing functionality as well as simplification rules for symbolic expression trees. As this is still in development the public api is not yet established (all methods public for now).

File size: 7.8 KB
Line 
1#region License Information
2/* HeuristicLab
3 * Copyright (C) 2002-2018 Heuristic and Evolutionary Algorithms Laboratory (HEAL)
4 *
5 * This file is part of HeuristicLab.
6 *
7 * HeuristicLab is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * HeuristicLab is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with HeuristicLab. If not, see <http://www.gnu.org/licenses/>.
19 */
20#endregion
21
22using System;
23using System.Collections.Generic;
24
25namespace HeuristicLab.Problems.DataAnalysis.Symbolic {
26  public static class SymbolicExpressionHashExtensions {
27    public sealed class HashNode<T> : IComparable<HashNode<T>>, IEquatable<HashNode<T>> where T : class {
28      public T Data;
29      public int Arity;
30      public int Size;
31      public bool IsCommutative;
32
33      public bool Enabled;
34      public int HashValue;           // the initial (fixed) hash value for this individual node/data
35      public int CalculatedHashValue; // the calculated hash value (taking into account the children hash values)
36
37      public Action<HashNode<T>[], int> Simplify;
38      public IComparer<T> Comparer;
39
40      public bool IsChild => Arity == 0;
41
42      public HashNode(IComparer<T> comparer) {
43        Comparer = comparer;
44      }
45
46      private HashNode() { }
47
48      public int CompareTo(HashNode<T> other) {
49        var res = Comparer.Compare(Data, other.Data);
50        return res == 0 ? CalculatedHashValue.CompareTo(other.CalculatedHashValue) : res;
51      }
52
53      public override string ToString() {
54        return $"{Data} {Arity} {Size} {CalculatedHashValue} {Enabled}";
55      }
56
57      public bool Equals(HashNode<T> other) {
58        return CalculatedHashValue.Equals(other.CalculatedHashValue);
59      }
60
61      public override bool Equals(object obj) {
62        var other = obj as HashNode<T>;
63        if (other != null)
64          return Equals(other);
65        return base.Equals(obj);
66      }
67
68      public override int GetHashCode() {
69        return CalculatedHashValue;
70      }
71
72      public static bool operator ==(HashNode<T> a, HashNode<T> b) {
73        return a.Equals(b);
74      }
75
76      public static bool operator !=(HashNode<T> a, HashNode<T> b) {
77        return !a.Equals(b);
78      }
79    }
80
81    public static int ComputeHash<T>(this HashNode<T>[] nodes, int i) where T : class {
82      var node = nodes[i];
83      var hashes = new int[node.Arity + 1];
84      int idx = 0;
85      foreach (int c in nodes.IterateChildren(i)) {
86        hashes[idx++] = nodes[c].CalculatedHashValue;
87      }
88      hashes[idx] = node.HashValue;
89      return HashUtil.JSHash(hashes);
90    }
91
92    public static HashNode<T>[] Simplify<T>(this HashNode<T>[] nodes) where T : class {
93      var reduced = nodes.Reduce();
94      reduced.Sort();
95
96      for (int i = 0; i < reduced.Length; ++i) {
97        var node = reduced[i];
98        if (node.IsChild) {
99          continue;
100        }
101        node.Simplify?.Invoke(reduced, i);
102      }
103      // detect if anything was simplified
104      var count = 0;
105      foreach (var node in reduced) {
106        if (!node.Enabled) { ++count; }
107      }
108      if (count == 0) {
109        return reduced;
110      }
111
112      var simplified = new HashNode<T>[reduced.Length - count];
113      int j = 0;
114      foreach (var node in reduced) {
115        if (node.Enabled) {
116          simplified[j++] = node;
117        }
118      }
119      simplified.UpdateNodeSizes();
120      simplified.Sort();
121      return simplified;
122    }
123
124    public static void Sort<T>(this HashNode<T>[] nodes) where T : class {
125      for (int i = 0; i < nodes.Length; ++i) {
126        var node = nodes[i];
127
128        if (node.IsChild) {
129          continue;
130        }
131
132        if (node.IsCommutative) { // only sort when the argument order does not matter
133          var arity = node.Arity;
134          var size = node.Size;
135
136          if (arity == size) { // all child nodes are terminals
137            Array.Sort(nodes, i - size, size);
138          } else { // i have some non-terminal children
139            var sorted = new HashNode<T>[size];
140            var indices = nodes.IterateChildren(i);
141            Array.Sort(indices, (a, b) => nodes[a].CompareTo(nodes[b]));
142
143            int idx = 0;
144            foreach (var j in indices) {
145              var child = nodes[j];
146              if (!child.IsChild) { // must copy complete subtree
147                Array.Copy(nodes, j - child.Size, sorted, idx, child.Size);
148                idx += child.Size;
149              }
150              sorted[idx++] = nodes[j];
151            }
152            Array.Copy(sorted, 0, nodes, i - size, size);
153          }
154        }
155        node.CalculatedHashValue = nodes.ComputeHash(i);
156      }
157    }
158
159    /// <summary>
160    /// Get a function node's child indices from left to right
161    /// </summary>
162    /// <typeparam name="T"></typeparam>
163    /// <param name="nodes"></param>
164    /// <param name="i"></param>
165    /// <returns>An array containing child indices</returns>
166    public static int[] IterateChildren<T>(this HashNode<T>[] nodes, int i) where T : class {
167      var node = nodes[i];
168      var arity = node.Arity;
169      var children = new int[arity];
170      var idx = i - 1;
171      for (int j = 0; j < arity; ++j) {
172        while (!nodes[idx].Enabled) { --idx; } // skip nodes possibly disabled by simplification
173        children[j] = idx;
174        idx -= 1 + nodes[idx].Size;
175      }
176      return children;
177    }
178
179    public static void UpdateNodeSizes<T>(this HashNode<T>[] nodes) where T : class {
180      for (int i = 0; i < nodes.Length; ++i) {
181        var node = nodes[i];
182        if (node.IsChild) {
183          node.Size = 0;
184          continue;
185        }
186        node.Size = node.Arity;
187        foreach (int j in nodes.IterateChildren(i)) {
188          node.Size += nodes[j].Size;
189        }
190      }
191    }
192
193    /// <summary>
194    /// Reduce nodes of the same type according to arithmetic rules
195    /// </summary>
196    /// <typeparam name="T"></typeparam>
197    /// <param name="g">The given grammar (controls symbol arities)</param>
198    /// <param name="nodes">The node array</param>
199    /// <param name="i">Parent index</param>
200    /// <param name="j">Child index</param>
201    private static void Reduce<T>(this HashNode<T>[] nodes, int i, int j) where T : class {
202      var p = nodes[i]; // parent node
203      var c = nodes[j]; // child node
204
205      if (c.IsChild)
206        return;
207
208      foreach (int k in nodes.IterateChildren(j)) {
209        if (nodes[k].IsChild) continue;
210        nodes.Reduce(j, k);
211      }
212
213      // handle commutative symbols (add, mul)
214      if (p.IsCommutative && p.HashValue == c.HashValue) {
215        c.Enabled = false;
216        p.Arity += c.Arity - 1;
217      }
218    }
219
220    public static HashNode<T>[] Reduce<T>(this HashNode<T>[] nodes) where T : class {
221      nodes.UpdateNodeSizes();
222      int i = nodes.Length - 1;
223      foreach (int c in nodes.IterateChildren(i)) {
224        if (nodes[c].IsChild) continue;
225        nodes.Reduce(i, c);
226      }
227      int count = 0;
228      foreach (var node in nodes) {
229        if (!node.Enabled) { ++count; }
230      }
231      if (count == 0)
232        return nodes;
233
234      var reduced = new HashNode<T>[nodes.Length - count];
235      i = 0;
236      foreach (var node in nodes) {
237        if (node.Enabled) { reduced[i++] = node; }
238      }
239      reduced.UpdateNodeSizes();
240      return reduced;
241    }
242  }
243}
Note: See TracBrowser for help on using the repository browser.