Free cookie consent management tool by TermsFeed Policy Generator

source: branches/2389-EpsLexicase/HeuristicLab.ExtLibs/HeuristicLab.Cecil/0.9.5/Mono.Cecil-0.9.5/Symbols/Mono.Cecil.Pdb/Microsoft.Cci.Pdb/IntHashTable.cs

Last change on this file was 11700, checked in by jkarder, 10 years ago

#2077: created branch and added first version

File size: 23.3 KB
Line 
1//-----------------------------------------------------------------------------
2//
3// Copyright (C) Microsoft Corporation.  All Rights Reserved.
4//
5//-----------------------------------------------------------------------------
6using System;
7using System.Collections;
8
9namespace Microsoft.Cci.Pdb {
10  // The IntHashTable class represents a dictionary of associated keys and
11  // values with constant lookup time.
12  //
13  // Objects used as keys in a hashtable must implement the GetHashCode
14  // and Equals methods (or they can rely on the default implementations
15  // inherited from Object if key equality is simply reference
16  // equality). Furthermore, the GetHashCode and Equals methods of
17  // a key object must produce the same results given the same parameters
18  // for the entire time the key is present in the hashtable. In practical
19  // terms, this means that key objects should be immutable, at least for
20  // the time they are used as keys in a hashtable.
21  //
22  // When entries are added to a hashtable, they are placed into
23  // buckets based on the hashcode of their keys. Subsequent lookups of
24  // keys will use the hashcode of the keys to only search a particular
25  // bucket, thus substantially reducing the number of key comparisons
26  // required to find an entry. A hashtable's maximum load factor, which
27  // can be specified when the hashtable is instantiated, determines the
28  // maximum ratio of hashtable entries to hashtable buckets. Smaller load
29  // factors cause faster average lookup times at the cost of increased
30  // memory consumption. The default maximum load factor of 1.0 generally
31  // provides the best balance between speed and size. As entries are added
32  // to a hashtable, the hashtable's actual load factor increases, and when
33  // the actual load factor reaches the maximum load factor value, the
34  // number of buckets in the hashtable is automatically increased by
35  // approximately a factor of two (to be precise, the number of hashtable
36  // buckets is increased to the smallest prime number that is larger than
37  // twice the current number of hashtable buckets).
38  //
39  // Each object provides their own hash function, accessed by calling
40  // GetHashCode().  However, one can write their own object
41  // implementing IHashCodeProvider and pass it to a constructor on
42  // the IntHashTable.  That hash function would be used for all objects in
43  // the table.
44  //
45  // This IntHashTable is implemented to support multiple concurrent readers
46  // and one concurrent writer without using any synchronization primitives.
47  // All read methods essentially must protect themselves from a resize
48  // occuring while they are running.  This was done by enforcing an
49  // ordering on inserts & removes, as well as removing some member variables
50  // and special casing the expand code to work in a temporary array instead
51  // of the live bucket array.  All inserts must set a bucket's value and
52  // key before setting the hash code & collision field.
53  //
54  // By Brian Grunkemeyer, algorithm by Patrick Dussud.
55  // Version 1.30 2/20/2000
56  //| <include path='docs/doc[@for="IntHashTable"]/*' />
57  internal class IntHashTable : IEnumerable {
58    /*
59      Implementation Notes:
60
61      This IntHashTable uses double hashing.  There are hashsize buckets in
62      the table, and each bucket can contain 0 or 1 element.  We a bit to
63      mark whether there's been a collision when we inserted multiple
64      elements (ie, an inserted item was hashed at least a second time and
65      we probed this bucket, but it was already in use).  Using the
66      collision bit, we can terminate lookups & removes for elements that
67      aren't in the hash table more quickly.  We steal the most
68      significant bit from the hash code to store the collision bit.
69
70      Our hash function is of the following form:
71
72      h(key, n) = h1(key) + n*h2(key)
73
74      where n is the number of times we've hit a collided bucket and
75      rehashed (on this particular lookup).  Here are our hash functions:
76
77      h1(key) = GetHash(key);  // default implementation calls key.GetHashCode();
78      h2(key) = 1 + (((h1(key) >> 5) + 1) % (hashsize - 1));
79
80      The h1 can return any number.  h2 must return a number between 1 and
81      hashsize - 1 that is relatively prime to hashsize (not a problem if
82      hashsize is prime).  (Knuth's Art of Computer Programming, Vol. 3,
83      p. 528-9)
84
85      If this is true, then we are guaranteed to visit every bucket in
86      exactly hashsize probes, since the least common multiple of hashsize
87      and h2(key) will be hashsize * h2(key).  (This is the first number
88      where adding h2 to h1 mod hashsize will be 0 and we will search the
89      same bucket twice).
90
91      We previously used a different h2(key, n) that was not constant.
92      That is a horrifically bad idea, unless you can prove that series
93      will never produce any identical numbers that overlap when you mod
94      them by hashsize, for all subranges from i to i+hashsize, for all i.
95      It's not worth investigating, since there was no clear benefit from
96      using that hash function, and it was broken.
97
98      For efficiency reasons, we've implemented this by storing h1 and h2
99      in a temporary, and setting a variable called seed equal to h1.  We
100      do a probe, and if we collided, we simply add h2 to seed each time
101      through the loop.
102
103      A good test for h2() is to subclass IntHashTable, provide your own
104      implementation of GetHash() that returns a constant, then add many
105      items to the hash table.  Make sure Count equals the number of items
106      you inserted.
107
108      -- Brian Grunkemeyer, 10/28/1999
109    */
110
111    // A typical resize algorithm would pick the smallest prime number in this array
112    // that is larger than twice the previous capacity.
113    // Suppose our Hashtable currently has capacity x and enough elements are added
114    // such that a resize needs to occur. Resizing first computes 2x then finds the
115    // first prime in the table greater than 2x, i.e. if primes are ordered
116    // p_1, p_2, …, p_i,…, it finds p_n such that p_n-1 < 2x < p_n.
117    // Doubling is important for preserving the asymptotic complexity of the
118    // hashtable operations such as add.  Having a prime guarantees that double
119    // hashing does not lead to infinite loops.  IE, your hash function will be
120    // h1(key) + i*h2(key), 0 <= i < size.  h2 and the size must be relatively prime.
121    private static readonly int[] primes = {
122            3, 7, 11, 17, 23, 29, 37, 47, 59, 71, 89, 107, 131, 163, 197, 239, 293, 353, 431, 521, 631, 761, 919,
123            1103, 1327, 1597, 1931, 2333, 2801, 3371, 4049, 4861, 5839, 7013, 8419, 10103, 12143, 14591,
124            17519, 21023, 25229, 30293, 36353, 43627, 52361, 62851, 75431, 90523, 108631, 130363, 156437,
125            187751, 225307, 270371, 324449, 389357, 467237, 560689, 672827, 807403, 968897, 1162687, 1395263,
126            1674319, 2009191, 2411033, 2893249, 3471899, 4166287, 4999559, 5999471, 7199369};
127
128    private int GetPrime(int minSize) {
129      if (minSize < 0) {
130        throw new ArgumentException("Arg_HTCapacityOverflow");
131      }
132      for (int i = 0; i < primes.Length; i++) {
133        int size = primes[i];
134        if (size >= minSize) {
135          return size;
136        }
137      }
138      throw new ArgumentException("Arg_HTCapacityOverflow");
139    }
140
141    // Deleted entries have their key set to buckets
142
143    // The hash table data.
144    // This cannot be serialised
145    private struct bucket {
146      internal int key;
147      internal int hash_coll;   // Store hash code; sign bit means there was a collision.
148      internal Object val;
149    }
150
151    private bucket[] buckets;
152
153    // The total number of entries in the hash table.
154    private int count;
155
156    // The total number of collision bits set in the hashtable
157    private int occupancy;
158
159    private int loadsize;
160    private int loadFactorPerc;    // 100 = 1.0
161
162    private int version;
163
164    // Constructs a new hashtable. The hashtable is created with an initial
165    // capacity of zero and a load factor of 1.0.
166    //| <include path='docs/doc[@for="IntHashTable.IntHashTable"]/*' />
167    internal IntHashTable()
168      : this(0, 100) {
169    }
170
171    // Constructs a new hashtable with the given initial capacity and a load
172    // factor of 1.0. The capacity argument serves as an indication of
173    // the number of entries the hashtable will contain. When this number (or
174    // an approximation) is known, specifying it in the constructor can
175    // eliminate a number of resizing operations that would otherwise be
176    // performed when elements are added to the hashtable.
177    //
178    //| <include path='docs/doc[@for="IntHashTable.IntHashTable1"]/*' />
179    internal IntHashTable(int capacity)
180      : this(capacity, 100) {
181    }
182
183    // Constructs a new hashtable with the given initial capacity and load
184    // factor. The capacity argument serves as an indication of the
185    // number of entries the hashtable will contain. When this number (or an
186    // approximation) is known, specifying it in the constructor can eliminate
187    // a number of resizing operations that would otherwise be performed when
188    // elements are added to the hashtable. The loadFactorPerc argument
189    // indicates the maximum ratio of hashtable entries to hashtable buckets.
190    // Smaller load factors cause faster average lookup times at the cost of
191    // increased memory consumption. A load factor of 1.0 generally provides
192    // the best balance between speed and size.
193    //
194    //| <include path='docs/doc[@for="IntHashTable.IntHashTable3"]/*' />
195    internal IntHashTable(int capacity, int loadFactorPerc) {
196      if (capacity < 0)
197        throw new ArgumentOutOfRangeException("capacity", "ArgumentOutOfRange_NeedNonNegNum");
198      if (!(loadFactorPerc >= 10 && loadFactorPerc <= 100))
199        throw new ArgumentOutOfRangeException("loadFactorPerc", String.Format("ArgumentOutOfRange_IntHashTableLoadFactor", 10, 100));
200
201      // Based on perf work, .72 is the optimal load factor for this table.
202      this.loadFactorPerc = (loadFactorPerc * 72) / 100;
203
204      int hashsize = GetPrime((int)(capacity / this.loadFactorPerc));
205      buckets = new bucket[hashsize];
206
207      loadsize = (int)(this.loadFactorPerc * hashsize) / 100;
208      if (loadsize >= hashsize)
209        loadsize = hashsize-1;
210    }
211
212    // Computes the hash function:  H(key, i) = h1(key) + i*h2(key, hashSize).
213    // The out parameter seed is h1(key), while the out parameter
214    // incr is h2(key, hashSize).  Callers of this function should
215    // add incr each time through a loop.
216    private uint InitHash(int key, int hashsize, out uint seed, out uint incr) {
217      // Hashcode must be positive.  Also, we must not use the sign bit, since
218      // that is used for the collision bit.
219      uint hashcode = (uint)key & 0x7FFFFFFF;
220      seed = (uint)hashcode;
221      // Restriction: incr MUST be between 1 and hashsize - 1, inclusive for
222      // the modular arithmetic to work correctly.  This guarantees you'll
223      // visit every bucket in the table exactly once within hashsize
224      // iterations.  Violate this and it'll cause obscure bugs forever.
225      // If you change this calculation for h2(key), update putEntry too!
226      incr = (uint)(1 + (((seed >> 5) + 1) % ((uint)hashsize - 1)));
227      return hashcode;
228    }
229
230    // Adds an entry with the given key and value to this hashtable. An
231    // ArgumentException is thrown if the key is null or if the key is already
232    // present in the hashtable.
233    //
234    //| <include path='docs/doc[@for="IntHashTable.Add"]/*' />
235    internal void Add(int key, Object value) {
236      Insert(key, value, true);
237    }
238
239    // Removes all entries from this hashtable.
240    //| <include path='docs/doc[@for="IntHashTable.Clear"]/*' />
241    internal void Clear() {
242      if (count == 0)
243        return;
244
245      for (int i = 0; i < buckets.Length; i++) {
246        buckets[i].hash_coll = 0;
247        buckets[i].key = -1;
248        buckets[i].val = null;
249      }
250
251      count = 0;
252      occupancy = 0;
253    }
254
255    // Checks if this hashtable contains an entry with the given key.  This is
256    // an O(1) operation.
257    //
258    //| <include path='docs/doc[@for="IntHashTable.Contains"]/*' />
259    internal bool Contains(int key) {
260      if (key < 0) {
261        throw new ArgumentException("Argument_KeyLessThanZero");
262      }
263
264      uint seed;
265      uint incr;
266      // Take a snapshot of buckets, in case another thread resizes table
267      bucket[] lbuckets = buckets;
268      uint hashcode = InitHash(key, lbuckets.Length, out seed, out incr);
269      int ntry = 0;
270
271      bucket b;
272      do {
273        int bucketNumber = (int)(seed % (uint)lbuckets.Length);
274        b = lbuckets[bucketNumber];
275        if (b.val == null) {
276          return false;
277        }
278        if (((b.hash_coll & 0x7FFFFFFF) == hashcode) && b.key == key) {
279          return true;
280        }
281        seed += incr;
282      } while (b.hash_coll < 0 && ++ntry < lbuckets.Length);
283      return false;
284    }
285
286    // Returns the value associated with the given key. If an entry with the
287    // given key is not found, the returned value is null.
288    //
289    //| <include path='docs/doc[@for="IntHashTable.this"]/*' />
290    internal Object this[int key] {
291      get {
292        if (key < 0) {
293          throw new ArgumentException("Argument_KeyLessThanZero");
294        }
295        uint seed;
296        uint incr;
297        // Take a snapshot of buckets, in case another thread does a resize
298        bucket[] lbuckets = buckets;
299        uint hashcode = InitHash(key, lbuckets.Length, out seed, out incr);
300        int ntry = 0;
301
302        bucket b;
303        do {
304          int bucketNumber = (int)(seed % (uint)lbuckets.Length);
305          b = lbuckets[bucketNumber];
306          if (b.val == null) {
307            return null;
308          }
309          if (((b.hash_coll & 0x7FFFFFFF) == hashcode) && key == b.key) {
310            return b.val;
311          }
312          seed += incr;
313        } while (b.hash_coll < 0 && ++ntry < lbuckets.Length);
314        return null;
315      }
316      set {
317        Insert(key, value, false);
318      }
319    }
320
321    // Increases the bucket count of this hashtable. This method is called from
322    // the Insert method when the actual load factor of the hashtable reaches
323    // the upper limit specified when the hashtable was constructed. The number
324    // of buckets in the hashtable is increased to the smallest prime number
325    // that is larger than twice the current number of buckets, and the entries
326    // in the hashtable are redistributed into the new buckets using the cached
327    // hashcodes.
328    private void expand() {
329      rehash(GetPrime(1+buckets.Length*2));
330    }
331
332    // We occationally need to rehash the table to clean up the collision bits.
333    private void rehash() {
334      rehash(buckets.Length);
335    }
336
337    private void rehash(int newsize) {
338
339      // reset occupancy
340      occupancy=0;
341
342      // Don't replace any internal state until we've finished adding to the
343      // new bucket[].  This serves two purposes:
344      //   1) Allow concurrent readers to see valid hashtable contents
345      //      at all times
346      //   2) Protect against an OutOfMemoryException while allocating this
347      //      new bucket[].
348      bucket[] newBuckets = new bucket[newsize];
349
350      // rehash table into new buckets
351      int nb;
352      for (nb = 0; nb < buckets.Length; nb++) {
353        bucket oldb = buckets[nb];
354        if (oldb.val != null) {
355          putEntry(newBuckets, oldb.key, oldb.val, oldb.hash_coll & 0x7FFFFFFF);
356        }
357      }
358
359      // New bucket[] is good to go - replace buckets and other internal state.
360      version++;
361      buckets = newBuckets;
362      loadsize = (int)(loadFactorPerc * newsize) / 100;
363
364      if (loadsize >= newsize) {
365        loadsize = newsize-1;
366      }
367
368      return;
369    }
370
371    // Returns an enumerator for this hashtable.
372    // If modifications made to the hashtable while an enumeration is
373    // in progress, the MoveNext and Current methods of the
374    // enumerator will throw an exception.
375    //
376    //| <include path='docs/doc[@for="IntHashTable.IEnumerable.GetEnumerator"]/*' />
377    IEnumerator IEnumerable.GetEnumerator() {
378      return new IntHashTableEnumerator(this);
379    }
380
381    // Internal method to compare two keys.
382    //
383    // Inserts an entry into this hashtable. This method is called from the Set
384    // and Add methods. If the add parameter is true and the given key already
385    // exists in the hashtable, an exception is thrown.
386    private void Insert(int key, Object nvalue, bool add) {
387      if (key < 0) {
388        throw new ArgumentException("Argument_KeyLessThanZero");
389      }
390      if (nvalue == null) {
391        throw new ArgumentNullException("nvalue", "ArgumentNull_Value");
392      }
393      if (count >= loadsize) {
394        expand();
395      } else if (occupancy > loadsize && count > 100) {
396        rehash();
397      }
398
399      uint seed;
400      uint incr;
401      // Assume we only have one thread writing concurrently.  Modify
402      // buckets to contain new data, as long as we insert in the right order.
403      uint hashcode = InitHash(key, buckets.Length, out seed, out incr);
404      int ntry = 0;
405      int emptySlotNumber = -1; // We use the empty slot number to cache the first empty slot. We chose to reuse slots
406      // create by remove that have the collision bit set over using up new slots.
407
408      do {
409        int bucketNumber = (int)(seed % (uint)buckets.Length);
410
411        // Set emptySlot number to current bucket if it is the first available bucket that we have seen
412        // that once contained an entry and also has had a collision.
413        // We need to search this entire collision chain because we have to ensure that there are no
414        // duplicate entries in the table.
415
416        // Insert the key/value pair into this bucket if this bucket is empty and has never contained an entry
417        // OR
418        // This bucket once contained an entry but there has never been a collision
419        if (buckets[bucketNumber].val == null) {
420          // If we have found an available bucket that has never had a collision, but we've seen an available
421          // bucket in the past that has the collision bit set, use the previous bucket instead
422          if (emptySlotNumber != -1) { // Reuse slot
423            bucketNumber = emptySlotNumber;
424          }
425
426          // We pretty much have to insert in this order.  Don't set hash
427          // code until the value & key are set appropriately.
428          buckets[bucketNumber].val = nvalue;
429          buckets[bucketNumber].key = key;
430          buckets[bucketNumber].hash_coll |= (int)hashcode;
431          count++;
432          version++;
433          return;
434        }
435
436        // The current bucket is in use
437        // OR
438        // it is available, but has had the collision bit set and we have already found an available bucket
439        if (((buckets[bucketNumber].hash_coll & 0x7FFFFFFF) == hashcode) &&
440                    key == buckets[bucketNumber].key) {
441          if (add) {
442            throw new ArgumentException("Argument_AddingDuplicate__" + buckets[bucketNumber].key);
443          }
444          buckets[bucketNumber].val = nvalue;
445          version++;
446          return;
447        }
448
449        // The current bucket is full, and we have therefore collided.  We need to set the collision bit
450        // UNLESS
451        // we have remembered an available slot previously.
452        if (emptySlotNumber == -1) {// We don't need to set the collision bit here since we already have an empty slot
453          if (buckets[bucketNumber].hash_coll >= 0) {
454            buckets[bucketNumber].hash_coll |= unchecked((int)0x80000000);
455            occupancy++;
456          }
457        }
458        seed += incr;
459      } while (++ntry < buckets.Length);
460
461      // This code is here if and only if there were no buckets without a collision bit set in the entire table
462      if (emptySlotNumber != -1) {
463        // We pretty much have to insert in this order.  Don't set hash
464        // code until the value & key are set appropriately.
465        buckets[emptySlotNumber].val = nvalue;
466        buckets[emptySlotNumber].key  = key;
467        buckets[emptySlotNumber].hash_coll |= (int)hashcode;
468        count++;
469        version++;
470        return;
471
472      }
473
474      // If you see this assert, make sure load factor & count are reasonable.
475      // Then verify that our double hash function (h2, described at top of file)
476      // meets the requirements described above. You should never see this assert.
477      throw new InvalidOperationException("InvalidOperation_HashInsertFailed");
478    }
479
480    private void putEntry(bucket[] newBuckets, int key, Object nvalue, int hashcode) {
481      uint seed = (uint)hashcode;
482      uint incr = (uint)(1 + (((seed >> 5) + 1) % ((uint)newBuckets.Length - 1)));
483
484      do {
485        int bucketNumber = (int)(seed % (uint)newBuckets.Length);
486
487        if ((newBuckets[bucketNumber].val == null)) {
488          newBuckets[bucketNumber].val = nvalue;
489          newBuckets[bucketNumber].key = key;
490          newBuckets[bucketNumber].hash_coll |= hashcode;
491          return;
492        }
493
494        if (newBuckets[bucketNumber].hash_coll >= 0) {
495          newBuckets[bucketNumber].hash_coll |= unchecked((int)0x80000000);
496          occupancy++;
497        }
498        seed += incr;
499      } while (true);
500    }
501
502    // Returns the number of associations in this hashtable.
503    //
504    //| <include path='docs/doc[@for="IntHashTable.Count"]/*' />
505    internal int Count {
506      get { return count; }
507    }
508
509    // Implements an enumerator for a hashtable. The enumerator uses the
510    // internal version number of the hashtabke to ensure that no modifications
511    // are made to the hashtable while an enumeration is in progress.
512    private class IntHashTableEnumerator : IEnumerator {
513      private IntHashTable hashtable;
514      private int bucket;
515      private int version;
516      private bool current;
517      private int currentKey;
518      private Object currentValue;
519
520      internal IntHashTableEnumerator(IntHashTable hashtable) {
521        this.hashtable = hashtable;
522        bucket = hashtable.buckets.Length;
523        version = hashtable.version;
524        current = false;
525      }
526
527      public bool MoveNext() {
528        if (version != hashtable.version)
529          throw new InvalidOperationException("InvalidOperation_EnumFailedVersion");
530        while (bucket > 0) {
531          bucket--;
532          Object val = hashtable.buckets[bucket].val;
533          if (val != null) {
534            currentKey = hashtable.buckets[bucket].key;
535            currentValue = val;
536            current = true;
537            return true;
538          }
539        }
540        current = false;
541        return false;
542      }
543
544      internal int Key {
545        get {
546          if (current == false)
547            throw new InvalidOperationException("InvalidOperation_EnumOpCantHappen");
548          return currentKey;
549        }
550      }
551
552      public Object Current {
553        get {
554          if (current == false)
555            throw new InvalidOperationException("InvalidOperation_EnumOpCantHappen");
556          return currentValue;
557        }
558      }
559
560      public Object Value {
561        get {
562          if (version != hashtable.version)
563            throw new InvalidOperationException("InvalidOperation_EnumFailedVersion");
564          if (current == false)
565            throw new InvalidOperationException("InvalidOperation_EnumOpCantHappen");
566          return currentValue;
567        }
568      }
569
570      public void Reset() {
571        if (version != hashtable.version) throw new InvalidOperationException("InvalidOperation_EnumFailedVersion");
572        current = false;
573        bucket = hashtable.buckets.Length;
574        currentKey = -1;
575        currentValue = null;
576      }
577    }
578  }
579}
Note: See TracBrowser for help on using the repository browser.