第5章 散列

一个好的散列函数

 1     public static int hash(String key, int tableSize)
 2     {
 3         int hashVal = 0;
 4         
 5         for (int i = 0; i < key.length(); i++)
 6             hashVal = 37 * hashVal + key.charAt(i);
 7         
 8         hashVal %= tableSize;
 9         if (hashVal < 0)
10             hashVal += tableSize;
11         
12         return hashVal;
13     }

 

如果当一个元素被插入时与一个已经插入的元素散列到相同的值,那么就产生一个冲突,这个冲突需要消除。解决这种冲突的方法有几种,其中最简单的两种:分离链接法开放地址法

 

分离链接法

  1 import java.util.LinkedList;
  2 import java.util.List;
  3 
  4 public class SeparateChainingHashTable<AnyType>
  5 {
  6     public SeparateChainingHashTable()
  7     { this(DEFAULT_TABLE_SIZE); }
  8     public SeparateChainingHashTable(int size)
  9     {
 10         theLists = new LinkedList[nextPrime(size)];
 11         for (int i = 0; i < theLists.length; i++)
 12             theLists[i] = new LinkedList<>();
 13     }
 14 
 15     public boolean contains(AnyType x)
 16     {
 17         List<AnyType>whichList = theLists[myhash(x)];
 18         return whichList.contains(x);
 19     }
 20 
 21     public void insert(AnyType x)
 22     {
 23         List<AnyType>whichList = theLists[myhash(x)];
 24         if (!whichList.contains(x))
 25         {
 26             whichList.add(x);
 27 
 28             if (++currentSize > theLists.length)
 29                 rehash();
 30         }
 31     }
 32 
 33     public void remove(AnyType x)
 34     {
 35         List<AnyType>whichList = theLists[myhash(x)];
 36         if (whichList.contains(x))
 37         {
 38             whichList.remove(x);
 39             currentSize--;
 40         }
 41     }
 42 
 43     public void makeEmpty()
 44     {
 45         for (int i = 0; i < theLists.length; i++)
 46             theLists[i].clear();
 47 
 48         currentSize = 0;
 49     }
 50 
 51     private int currentSize;
 52     private List<AnyType>[] theLists;
 53     private static final int DEFAULT_TABLE_SIZE = 101;
 54 
 55     private void rehash()
 56     {
 57         List<AnyType>[] oldLists = theLists;
 58 
 59         theLists = new List[nextPrime( 2 * theLists.length)];
 60         for (int i = 0; i < theLists.length; i++)
 61             theLists[i] = new LinkedList<>();
 62 
 63         currentSize = 0;
 64         for (int i = 0; i < oldLists.length; i++)
 65             for (AnyType item : oldLists[i])
 66                 insert(item);
 67     }
 68 
 69     private int myhash(AnyType x)
 70     {
 71         int hashVal = x.hashCode();
 72 
 73         hashVal %= theLists.length;
 74         if (hashVal < 0)
 75             hashVal += theLists.length;
 76 
 77         return hashVal;
 78     }
 79 
 80     private static boolean isPrime(int n)
 81     {
 82         if (n == 2 || n == 3)
 83             return true;
 84         if (n == 1 || (n&1) == 0)
 85             return false;
 86         for (int i = 3; i * i < n; i++)
 87             if (n % i == 0)
 88                 return false;
 89 
 90         return true;
 91     }
 92 
 93     private static int nextPrime(int n)
 94     {
 95         if (n % 2 == 0)
 96             n++;
 97         for (; !isPrime(n); n += 2)
 98             ;
 99         return n;
100     }
101 }

 

5.4 不用链表的散列表

5.4.1 线性探测法

只要表足够大,总能够找到一个自由单元,但是如此花费的时间是相当多的。更糟的是,即使表相对较空,这样占据的单元也会开始形成一些区块,其结果称为一次聚集,就是说,散列到区块中的任何关键字都需要多次试选单元才能够解决冲突,然后该关键字被添加到相应的区块中。

 

5.4.2 平方探测法

平方探测是消除线性探测中一次聚焦问题的冲突解决方法。虽然平方探测排除了一次聚集,但是散列到同一位置上的那些元素将探测相同的备选单元。这叫作二次聚集。 

  1 public class QuadraticProbingHashTable<AnyType> {
  2     public QuadraticProbingHashTable() {
  3         this(DEFAULT_TABLE_SIZE);
  4     }
  5 
  6     public QuadraticProbingHashTable(int size) {
  7         allocateArray(size);
  8         makeEmpty();
  9     }
 10 
 11     public void makeEmpty() {
 12         currentSize = 0;
 13         for (int i = 0; i < array.length; i++)
 14             array[i] = null;
 15     }
 16 
 17     public boolean contains(AnyType x) {
 18         int currentPos = findPos(x);
 19         return isActive(currentPos);
 20     }
 21 
 22     public void insert(AnyType x) {
 23         int currentPos = findPos(x);
 24         if (isActive(currentPos))
 25             return;
 26 
 27         array[currentPos] = new HashEntry<>(x, true);
 28         if (currentPos > array.length / 2)
 29             rehash();
 30     }
 31 
 32     public void remove(AnyType x)
 33     {
 34         int currentPos = findPos(x);
 35         if (isActive(currentPos))
 36             array[currentPos].isActive = false;
 37     }
 38     
 39     private static class HashEntry<AnyType>
 40     {
 41         public AnyType element;
 42         public boolean isActive;
 43 
 44         public HashEntry(AnyType e)
 45         { this(e, true); }
 46 
 47         public HashEntry(AnyType e, boolean i)
 48         {
 49             element = e;
 50             isActive = i;
 51         }
 52     }
 53 
 54     private static final int DEFAULT_TABLE_SIZE = 101;
 55 
 56     private HashEntry<AnyType>[] array;
 57     private int currentSize;
 58 
 59     private void allocateArray(int arraySize)
 60     { array = new HashEntry[nextPrime(arraySize)]; }
 61     private int findPos(AnyType x)
 62     {
 63         int offest = 1;
 64         int currentPos = myhash(x);
 65 
 66         while (array[currentPos] != null && !array[currentPos].element.equals(x))
 67         {
 68             currentPos += offest;
 69             offest += 2;
 70             if (currentPos >= array.length)
 71                 currentPos -= array.length;
 72         }
 73         return currentPos;
 74     }
 75     private boolean isActive(int currentPos)
 76     { return array[currentPos] != null && array[currentPos].isActive; }
 77     private void rehash()
 78     {
 79         HashEntry<AnyType>[] oldArray = array;
 80         allocateArray(nextPrime(2 * array.length));
 81         currentSize = 0;
 82 
 83         for (int i = 0; i < oldArray.length; i++)
 84             if (oldArray[i] != null && oldArray[i].isActive)
 85                 insert(oldArray[i].element);
 86     }
 87 
 88     private int myhash(AnyType x)
 89     {
 90         int hashVal = x.hashCode();
 91 
 92         hashVal %= array.length;
 93         if (hashVal < 0)
 94             hashVal += array.length;
 95 
 96         return hashVal;
 97     }
 98 
 99     private static int nextPrime(int n)
100     {
101         if ((n&1) == 0)
102             n++;
103 
104         for (; !isPrime(n); n += 2)
105             ;
106 
107         return n;
108     }
109 
110     private static boolean isPrime(int n)
111     {
112         if (n == 2 || n == 3)
113             return true;
114         if (n == 1 || (n&1) == 0)
115             return false;
116         for (int i = 3; i * i <= n; i++)
117             if (n % i == 0)
118                 return false;
119 
120         return true;
121     }
122 }

 

 

5.4.3 双散列 最后一个冲突解决方法

 

5.5 再散列 对于使用平方探测的开放定址散列法,如果散列表填的太满,那么操作的运行时间将开始消耗过长,且插入操作可能失败。这可能发生在有太多的移动和插入混合的场合。此时,一个解决方法是建立另外一个大约两倍大的表(而且使用一个相关的新散列函数),扫描整个原始散列表,计算每个(未删除)元素的新散列值并将其插入到新表中。

 

5.6 标准库中的散列表

HashSetHashMap通常是用分离链接散列实现的。

 

5.7.2 布谷鸟散列

  1 import java.util.Random;
  2 
  3 public class CuckooHashTable<AnyType>
  4 {
  5     public CuckooHashTable(HashFamily<? super AnyType>hf) { this(hf, DEFAULLT_TABLE_SIZE); }
  6 
  7     public CuckooHashTable(HashFamily<? super AnyType>hf, int size)
  8     {
  9         allocateArray(nextPrime(size));
 10         doClear();
 11         hashFunctions = hf;
 12         numHashFunctions = hf.getNumberOfFunctions();
 13     }
 14 
 15     private Random r = new Random();
 16 
 17     private static final double MAX_LOAD = 0.4;
 18     private static final int ALLOWED_REHASHES = 1;
 19 
 20     private int rehashes = 0;
 21 
 22    private boolean insertHelper1(AnyType x)
 23    {
 24        final int COUNT_LINIT = 100;
 25        
 26        while (true)
 27        {
 28            int lastPos = 1;
 29            int pos;
 30            for (int count = 0; count < COUNT_LINIT; count++)
 31            {
 32                for (int i = 0; i < numHashFunctions; i++)
 33                {
 34                    pos = myhash(x, i);
 35                    if (array[pos] == null)
 36                    {
 37                        array[pos] = x;
 38                        currentSize++;
 39                        return true;
 40                    }
 41                }
 42                int i = 0;
 43                do 
 44                {
 45                    pos = myhash(x, r.nextInt(numHashFunctions));
 46                }while (pos == lastPos && i++ < 5);
 47                
 48                AnyType tmp = array[lastPos = pos];
 49                array[pos] = x;
 50                x = tmp;
 51            }
 52            if (++rehashes > ALLOWED_REHASHES)
 53            {
 54                expand();
 55                rehashes = 0;
 56            }
 57            else
 58                rehash();
 59        }
 60    }
 61     
 62 
 63     public boolean insert(AnyType x)
 64     {
 65         if (contains(x))
 66             return false;
 67         if (currentSize >= array.length / MAX_LOAD)
 68             expand();
 69         return insertHelper1(x);
 70     }
 71 
 72     private int myhash(AnyType x, int which)
 73     {
 74         int hashVal = hashFunctions.hash(x, which);
 75 
 76         hashVal %= array.length;
 77         if (hashVal < 0)
 78             hashVal += array.length;
 79 
 80         return hashVal;
 81     }
 82 
 83     private void expand(){ rehash((int)(array.length / MAX_LOAD));}
 84 
 85     private void rehash()
 86     {
 87         hashFunctions.generateNewFunctions();
 88         rehash(array.length);
 89     }
 90 
 91     private void rehash(int newLength)
 92     {
 93         AnyType[] oldArray = array;
 94         allocateArray(nextPrime(newLength));
 95 
 96         currentSize = 0;
 97 
 98         for (AnyType str : oldArray)
 99             if (str != null)
100                 insert(str);
101     }
102 
103     public int size(){ return currentSize; }
104 
105     public int capacity(){ return array.length; }
106 
107     private int findPos(AnyType x)
108     {
109         for (int i = 0; i < numHashFunctions; i++)
110         {
111             int pos = myhash(x, i);
112             if (array[pos] != null && array[pos].equals(x))
113                 return pos;
114         }
115         return -1;
116     }
117 
118     public boolean remove(AnyType x)
119     {
120         int pos = findPos(x);
121         if (pos != -1)
122         {
123             array[pos] = null;
124             currentSize--;
125         }
126         return pos != -1;
127     }
128 
129     public boolean contains(AnyType x){ return findPos(x) != -1;}
130 
131     public void makeEmpty(){ doClear(); }
132 
133     private void doClear()
134     {
135         currentSize = 0;
136         for (int i = 0; i < array.length; i++)
137             array[i] = null;
138     }
139 
140     private static final int DEFAULLT_TABLE_SIZE = 101;
141 
142     private final HashFamily<? super AnyType>hashFunctions;
143     private final int numHashFunctions;
144     private AnyType[] array;
145     private int currentSize;
146 
147     private void allocateArray(int arraySize) { array = (AnyType[])new Object[arraySize]; }
148 
149     protected static int nextPrime(int n)
150     {
151         if ((n&1) == 0)
152             n++;
153         for (; !isPime(n); n += 2)
154             ;
155         return n;
156     }
157 
158     private static boolean isPime(int n)
159     {
160         if (n == 2 || n == 3)
161             return true;
162         if (n == 1 || (n&1) == 0)
163             return false;
164         for (int i = 3; i * i <= n; i += 2)
165             if (n % i == 0)
166                 return false;
167 
168         return true;
169     }
170 }

 

5.7.3 跳房子散列的思路是,用事先确定的、对计算机的底层体系结构而言是最优的一个常数,给探测序列的最大长度加个上界。这样做可以给出常数级的最坏查询时间,并且与布谷鸟散列一样,查询可以并行化,以同时检查可用位置的有限集。 

posted @ 2019-03-19 20:38  tjjloveworld  阅读(260)  评论(0编辑  收藏  举报