【STL库】哈希封装 unordered_map/unordered_set - 教程

在这里插入图片描述

励志不掉头发的内向程序员：个人主页
✨️ 个人专栏: 《C++语言》《Linux学习》

偶尔悲伤，偶尔被幸福所完善

️博主简介:

在这里插入图片描述

文章目录

前言
一、源码即框架分析
二、模拟实现 unordered_map 和 unordered_set
三、unordered_map 和 unordered_set 代码实现
总结

前言

我们在上一章节了解完我们的哈希表是怎么实现后，我们又有 map/set 封装的经验，那我们的 unordered_map/unordered_set 的封装就会显得相对简单了，和我们 map/set 是差不多的，主要难点可能也是在迭代器的实现上了，我们就一起来看看吧。

在这里插入图片描述

一、源码即框架分析

我们 unordered_map/unordered_set 的实现和 map/set 实现的结构其实是一样的，我们都是用一个结构复用同时实现的， map/set 复用的是红黑树，而 unordered_map/unordered_set 复用的则是哈希表了。

通过源码可以看到，结构上 hash_map 和 hash_set 跟 map 和 set 的完全类似，复用同一个 hashtable 实现 key 和 key/value 结构，hash_set 传给 hash_table 的是两个 key，hash_map 传给 hash_table 的是 pair<const key, value>
在这里插入图片描述
注意：
源码里面跟 map/set 源码类似，命名风格比较乱，这里比 map 和 set 还乱，hash_set 模板参数居然用的 Value 命名，hash_map 用的是 Key 和 T 命名，可见⼤佬有时写代码也不规范。下面我们模拟一份自己的出来，就按自己的风格⾛了。

二、模拟实现 unordered_map 和 unordered_set

2.1、实现出复用哈希表的框架，并支持 insert

和 map/set 的思路是一样的，创建一个模板，我们想要让哈希表是什么 key 还是 key/value，就传什么类型的即可。

// MyUnorderedSet.h
template<
class K
>
class unordered_set
{
struct SetKeyOfT
{
const K&
operator()(const K& key)
{
return key;
}
};
public:
bool insert(const K& key)
{
return _ht.Insert(key);
}
private:
hash_bucket::HashTable<K, K, SetKeyOfT> _ht;
  };

// MyUnorderedMap.h
template<
class K
, class V
>
class unordered_map
{
struct MapKeyOfT
{
const K&
operator()(const pair<K, V>
  & kv)
  {
  return kv.first;
  }
  };
  public:
  bool insert(const pair<K, V>
    & kv)
    {
    return _ht.Insert(kv);
    }
    private:
    hash_bucket::HashTable<K, pair<K, V>
      , MapKeyOfT> _ht;
      };

// HashTable.h
template<
class K
>
struct HashFunc
{
size_t operator()(const K& key)
{
return (size_t)key;
}
};
namespace hash_bucket
{
template<
class T
>
struct HashNode
{
T _data;
HashNode<T>
  * _next;
  HashNode(const T& data)
  :_data(data)
  , _next(nullptr)
  {
  }
  };
  // 实现步骤：
  // 1、实现哈希表
  // 2、封装unordered_map和unordered_set的框架 解决KeyOfT
  // 3、iterator
  // 4、const_iterator
  // 5、key不⽀持修改的问题
  // 6、operator[]
  template<
  class K
  , class T
  , class KeyOfT
  >
  class HashTable
  {
  typedef HashNode<T> Node;
    inline unsigned long __stl_next_prime(unsigned long n)
    {
    static const int __stl_num_primes = 28;
    static const unsigned long __stl_prime_list[__stl_num_primes] =
    {
    53, 97, 193, 389, 769,
    1543, 3079, 6151, 12289, 24593,
    49157, 98317, 196613, 393241, 786433,
    1572869, 3145739, 6291469, 12582917, 25165843,
    50331653, 100663319, 201326611, 402653189, 805306457,
    1610612741, 3221225473, 4294967291
    };
    const unsigned long* first = __stl_prime_list;
    const unsigned long* last = __stl_prime_list + __stl_num_primes;
    const unsigned long* pos = lower_bound(first, last, n);
    return pos == last ? *(last - 1) : *pos;
    }
    public:
    HashTable()
    {
    _tables.resize(__stl_next_prime(_tables.size()), nullptr);
    }
    ~HashTable()
    {
    // 依次把每个桶释放
    for (size_t i = 0; i < _tables.size(); i++)
    {
    Node* cur = _tables[i];
    while (cur)
    {
    Node* next = cur->_next;
    delete cur;
    cur = next;
    }
    _tables[i] = nullptr;
    }
    }
    bool Insert(const T& data)
    {
    KeyOfT kot;
    if (Find(kot(data)))
    return false;
    size_t hashi = kot(data) % _tables.size();
    // 负载因⼦==1扩容
    if (_n == _tables.size())
    {
    vector<Node*>
      newtables(__stl_next_prime(_tables.size()), nullptr);
      for (size_t i = 0; i < _tables.size(); i++)
      {
      Node* cur = _tables[i];
      while (cur)
      {
      Node* next = cur->_next;
      // 旧表中结点，挪动新表重新映射的位置
      size_t hashi = kot(cur->_data) % newtables.size();
      // 头插到新表
      cur->_next = newtables[hashi];
      newtables[hashi] = cur;
      cur = next;
      }
      _tables[i] = nullptr;
      }
      _tables.swap(newtables);
      }
      // 头插
      Node* newnode = new Node(data);
      newnode->_next = _tables[hashi];
      _tables[hashi] = newnode;
      ++_n;
      return true;
      }
      private:
      vector<Node*> _tables;
        // 指针数组
        size_t _n = 0;
        // 表中存储数据个数
        };
        }

2.2、支持 iterator 的实现

（1）iterator 实现思路分析

iterator 实现的大框架：
跟 list 的 iterator 思路是一致的，用一个类型封装结点的指针，再通过重载运算符实现，迭代器像指针一样访问的行为，要注意的是哈希表的迭代器是单向迭代器。

template<
class K
, class T
, class Ptr
, class Ref
, class KeyOfT
, class Hash
>
struct HTIterator
{
typedef HashNode<T> Node;
  typedef HTIterator<K, T, Ptr, Ref, KeyOfT, Hash> Self;
    Node* _node;
    const HashTable<K, T, KeyOfT, Hash>
      * _pht;
      HTIterator(Node* node, const HashTable<K, T, KeyOfT, Hash>
        * pht)
        :_node(node)
        , _pht(pht)
        {
        }
        Ref operator*()
        {
        }
        Ptr operator->
        ()
        {
        }
        bool operator!=(const Self& s)
        {
        }
        Self&
        operator++()
        {
        }
        };

iterator 实现思路分析：

这里的难点是 operator++ 的实现。iterator 中有一个指向结点的指针，如果当前桶下面还有结点，则结点的指针指向下一个结点即可。如果当前桶走完了，则需要想办法计算找到下一个桶。这里的难点反而是结构设计的问题，参考上面的源码，我们可以看到 iterator 中除了有结点的指针，还有哈希表对象的指针，这样当前桶走完了，要计算下一个桶就相对容易多了，用 key 值计算出当前桶位置，依次往后找下一个不为空的桶即可。

Self&
operator++()
{
if (_node->_next)
{
// 当前桶还有节点
_node = _node->_next;
}
else
{
// 当前桶⾛完了，找下⼀个不为空的桶
KeyOfT kot;
Hash hs;
size_t hashi = hs(kot(_node->_data)) % _pht -> _tables.size();
++hashi;
while (hashi < _pht->_tables.size())
  {
  if (_pht->_tables[hashi])
  {
  break;
  }
  ++hashi;
  }
  if (hashi == _pht->_tables.size())
  {
  _node = nullptr;
  // end()
  }
  else
  {
  _node = _pht->_tables[hashi];
  }
  }
  return *this;
  }

begin() 返回第一个桶中第一个节点指针构造的迭代器，这里 end() 返回迭代器可以用空表示。

typedef HTIterator<K, T, T*, T&
, KeyOfT, Hash> Iterator;
typedef HTIterator<K, T, const T*, const T&
, KeyOfT, Hash> ConstIterator;
Iterator Begin()
{
if (_n == 0)
return End();
for (size_t i = 0; i < _tables.size(); i++)
{
Node* cur = _tables[i];
if (cur)
{
return Iterator(cur, this);
}
}
return End();
}
Iterator End()
{
return Iterator(nullptr, this);
}
ConstIterator Begin() const
{
if (_n == 0)
return End();
for (size_t i = 0; i < _tables.size(); i++)
{
Node* cur = _tables[i];
if (cur)
{
return ConstIterator(cur, this);
}
}
return End();
}
ConstIterator End() const
{
return ConstIterator(nullptr, this);
}

unordered_set 的 iterator 也不支持修改，我们把 unordered_set 的第二个模板参数改成 const K 即可，
HashTable<K, const K, SetKeyOfT, Hash> _ht
unordered_map 的 iterator 不支持修改 key 但是可以修改 value，我们把 unordered_map 的第二个模板参数 pair 的第一个参数改成 const K 即可， HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht

// MyUnorderedSet.h
typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>
  ::Iterator iterator;
  typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>
    ::ConstIterator const_iterator;
    iterator begin()
    {
    return _ht.Begin();
    }
    iterator end()
    {
    return _ht.End();
    }
    const_iterator begin() const
    {
    return _ht.Begin();
    }
    const_iterator end() const
    {
    return _ht.End();
    }
    // MyUnorderedMap.h
    typedef typename hash_bucket::HashTable<K, pair<
    const K, V>
    , MapKeyOfT, Hash>
    ::Iterator iterator;
    typedef typename hash_bucket::HashTable<K, pair<
    const K, V>
    , MapKeyOfT, Hash>
    ::ConstIterator const_iterator;
    iterator begin()
    {
    return _ht.Begin();
    }
    iterator end()
    {
    return _ht.End();
    }
    const_iterator begin() const
    {
    return _ht.Begin();
    }
    const_iterator end() const
    {
    return _ht.End();
    }

实现了以上的代码，其余的就非常简单啦。

Ref operator*()
{
return _node->_data;
}
Ptr operator->
()
{
return &_node->_data;
}
bool operator!=(const Self& s)
{
return _node != s._node;
}

2.3、unordered_map 支持 [ ]

unordered_map 要支持 [ ] 主要需要修改 insert 返回值支持，修改 HashTable 中的 insert 返回值为 pair<Iterator, bool> Insert(const T& data)
有了insert⽀持[]实现就很简单了

V&
operator[](const K& key)
{
pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
  return ret.first->second;
  }

三、unordered_map 和 unordered_set 代码实现

// MyUnorderedSet.h
namespace bit
{
template<
class K
, class Hash
= HashFunc<K>>
  class unordered_set
  {
  struct SetKeyOfT
  {
  const K&
  operator()(const K& key)
  {
  return key;
  }
  };
  public:
  typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>
    ::Iterator iterator;
    typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>
      ::ConstIterator const_iterator;
      iterator begin()
      {
      return _ht.Begin();
      }
      iterator end()
      {
      return _ht.End();
      }
      const_iterator begin() const
      {
      return _ht.Begin();
      }
      const_iterator end() const
      {
      return _ht.End();
      }
      pair<iterator, bool>
        insert(const K & key)
        {
        return _ht.Insert(key);
        }
        iterator Find(const K & key)
        {
        return _ht.Find(key);
        }
        bool Erase(const K & key)
        {
        return _ht.Erase(key);
        }
        private:
        hash_bucket::HashTable<K, const K, SetKeyOfT, Hash> _ht;
          };
          }
          // MyUnorderedMap.h
          namespace bit
          {
          template<
          class K
          , class V
          , class Hash
          = HashFunc<K>>
            class unordered_map
            {
            struct MapKeyOfT
            {
            const K&
            operator()(const pair<K, V>
              & kv)
              {
              return kv.first;
              }
              };
              public:
              typedef typename hash_bucket::HashTable<K, pair<
              const K, V>
              , MapKeyOfT, Hash>
              ::Iterator iterator;
              typedef typename hash_bucket::HashTable<K, pair<
              const K, V>
              , MapKeyOfT, Hash>
              ::ConstIterator const_iterator;
              iterator begin()
              {
              return _ht.Begin();
              }
              iterator end()
              {
              return _ht.End();
              }
              const_iterator begin() const
              {
              return _ht.Begin();
              }
              const_iterator end() const
              {
              return _ht.End();
              }
              pair<iterator, bool>
                insert(const pair<K, V>
                  & kv)
                  {
                  return _ht.Insert(kv);
                  }
                  V&
                  operator[](const K& key)
                  {
                  pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
                    return ret.first->second;
                    }
                    iterator Find(const K& key)
                    {
                    return _ht.Find(key);
                    }
                    bool Erase(const K& key)
                    {
                    return _ht.Erase(key);
                    }
                    private:
                    hash_bucket::HashTable<K, pair<
                    const K, V>
                    , MapKeyOfT, Hash> _ht;
                    };
                    }
                    // HashTable.h
                    template<
                    class K
                    >
                    struct HashFunc
                    {
                    size_t operator()(const K& key)
                    {
                    return (size_t)key;
                    }
                    };
                    // 特化
                    template<
                    >
                    struct HashFunc<string>
                      {
                      size_t operator()(const string& key)
                      {
                      size_t hash = 0;
                      for (auto e : key)
                      {
                      hash *= 131;
                      hash += e;
                      }
                      return hash;
                      }
                      };
                      namespace hash_bucket
                      {
                      template<
                      class T
                      >
                      struct HashNode
                      {
                      T _data;
                      HashNode<T>
                        * _next;
                        HashNode(const T& data)
                        :_data(data)
                        , _next(nullptr)
                        {
                        }
                        };
                        // 前置声明
                        template<
                        class K
                        , class T
                        , class KeyOfT
                        , class Hash
                        >
                        class HashTable
                        ;
                        template<
                        class K
                        , class T
                        , class Ptr
                        , class Ref
                        , class KeyOfT
                        , class Hash
                        >
                        struct HTIterator
                        {
                        typedef HashNode<T> Node;
                          typedef HTIterator<K, T, Ptr, Ref, KeyOfT, Hash> Self;
                            Node* _node;
                            const HashTable<K, T, KeyOfT, Hash>
                              * _pht;
                              HTIterator(Node* node, const HashTable<K, T, KeyOfT, Hash>
                                * pht)
                                :_node(node)
                                , _pht(pht)
                                {
                                }
                                Ref operator*()
                                {
                                return _node->_data;
                                }
                                Ptr operator->
                                ()
                                {
                                return &_node->_data;
                                }
                                bool operator!=(const Self& s)
                                {
                                return _node != s._node;
                                }
                                Self&
                                operator++()
                                {
                                if (_node->_next)
                                {
                                // 当前桶还有节点
                                _node = _node->_next;
                                }
                                else
                                {
                                // 当前桶⾛完了，找下⼀个不为空的桶
                                KeyOfT kot;
                                Hash hs;
                                size_t hashi = hs(kot(_node->_data)) % _pht -> _tables.size();
                                ++hashi;
                                while (hashi < _pht->_tables.size())
                                  {
                                  if (_pht->_tables[hashi])
                                  {
                                  break;
                                  }
                                  ++hashi;
                                  }
                                  if (hashi == _pht->_tables.size())
                                  {
                                  _node = nullptr;
                                  // end()
                                  }
                                  else
                                  {
                                  _node = _pht->_tables[hashi];
                                  }
                                  }
                                  return *this;
                                  }
                                  };
                                  template<
                                  class K
                                  , class T
                                  , class KeyOfT
                                  , class Hash
                                  >
                                  class HashTable
                                  {
                                  // 友元声明
                                  template<
                                  class K
                                  , class T
                                  , class Ptr
                                  , class Ref
                                  , class KeyOfT
                                  , class Hash
                                  >
                                  friend struct HTIterator;
                                  typedef HashNode<T> Node;
                                    public:
                                    typedef HTIterator<K, T, T*, T&
                                    , KeyOfT, Hash> Iterator;
                                    typedef HTIterator<K, T, const T*, const T&
                                    , KeyOfT, Hash> ConstIterator;
                                    Iterator Begin()
                                    {
                                    if (_n == 0)
                                    return End();
                                    for (size_t i = 0; i < _tables.size(); i++)
                                    {
                                    Node* cur = _tables[i];
                                    if (cur)
                                    {
                                    return Iterator(cur, this);
                                    }
                                    }
                                    return End();
                                    }
                                    Iterator End()
                                    {
                                    return Iterator(nullptr, this);
                                    }
                                    ConstIterator Begin() const
                                    {
                                    if (_n == 0)
                                    return End();
                                    for (size_t i = 0; i < _tables.size(); i++)
                                    {
                                    Node* cur = _tables[i];
                                    if (cur)
                                    {
                                    return ConstIterator(cur, this);
                                    }
                                    }
                                    return End();
                                    }
                                    ConstIterator End() const
                                    {
                                    return ConstIterator(nullptr, this);
                                    }
                                    inline unsigned long __stl_next_prime(unsigned long n)
                                    {
                                    static const int __stl_num_primes = 28;
                                    static const unsigned long __stl_prime_list[__stl_num_primes] =
                                    {
                                    53, 97, 193, 389, 769,
                                    1543, 3079, 6151, 12289, 24593,
                                    49157, 98317, 196613, 393241, 786433,
                                    1572869, 3145739, 6291469, 12582917, 25165843,
                                    50331653, 100663319, 201326611, 402653189, 805306457,
                                    1610612741, 3221225473, 4294967291
                                    };
                                    const unsigned long* first = __stl_prime_list;
                                    const unsigned long* last = __stl_prime_list + __stl_num_primes;
                                    const unsigned long* pos = lower_bound(first, last, n);
                                    return pos == last ? *(last - 1) : *pos;
                                    }
                                    HashTable()
                                    {
                                    _tables.resize(__stl_next_prime(_tables.size()), nullptr);
                                    }
                                    ~HashTable()
                                    {
                                    // 依次把每个桶释放
                                    for (size_t i = 0; i < _tables.size(); i++)
                                    {
                                    Node* cur = _tables[i];
                                    while (cur)
                                    {
                                    Node* next = cur->_next;
                                    delete cur;
                                    cur = next;
                                    }
                                    _tables[i] = nullptr;
                                    }
                                    }
                                    pair<Iterator, bool>
                                      Insert(const T& data)
                                      {
                                      KeyOfT kot;
                                      Iterator it = Find(kot(data));
                                      if (it != End())
                                      return make_pair(it, false);
                                      Hash hs;
                                      size_t hashi = hs(kot(data)) % _tables.size();
                                      // 负载因⼦==1扩容
                                      if (_n == _tables.size())
                                      {
                                      vector<Node*>
                                        newtables(__stl_next_prime(_tables.size() + 1), nullptr);
                                        for (size_t i = 0; i < _tables.size(); i++)
                                        {
                                        Node* cur = _tables[i];
                                        while (cur)
                                        {
                                        Node* next = cur->_next;
                                        // 旧表中节点，挪动新表重新映射的位置
                                        size_t hashi = hs(kot(cur->_data)) % newtables.size();
                                        // 头插到新表
                                        cur->_next = newtables[hashi];
                                        newtables[hashi] = cur;
                                        cur = next;
                                        }
                                        _tables[i] = nullptr;
                                        }
                                        _tables.swap(newtables);
                                        }
                                        // 头插
                                        Node* newnode = new Node(data);
                                        newnode->_next = _tables[hashi];
                                        _tables[hashi] = newnode;
                                        ++_n;
                                        return make_pair(Iterator(newnode, this), true);
                                        }
                                        Iterator Find(const K& key)
                                        {
                                        KeyOfT kot;
                                        Hash hs;
                                        size_t hashi = hs(key) % _tables.size();
                                        Node* cur = _tables[hashi];
                                        while (cur)
                                        {
                                        if (kot(cur->_data) == key)
                                        {
                                        return Iterator(cur, this);
                                        }
                                        cur = cur->_next;
                                        }
                                        return End();
                                        }
                                        bool Erase(const K& key)
                                        {
                                        KeyOfT kot;
                                        Hash hs;
                                        size_t hashi = hs(key) % _tables.size();
                                        Node* prev = nullptr;
                                        Node* cur = _tables[hashi];
                                        while (cur)
                                        {
                                        if (kot(cur->_data) == key)
                                        {
                                        if (prev == nullptr)
                                        {
                                        _tables[hashi] = cur->_next;
                                        }
                                        else
                                        {
                                        prev->_next = cur->_next;
                                        }
                                        delete cur;
                                        --_n;
                                        return true;
                                        }
                                        prev = cur;
                                        cur = cur->_next;
                                        }
                                        return false;
                                        }
                                        private:
                                        vector<Node*> _tables;
                                          // 指针数组
                                          size_t _n = 0;
                                          // 表中存储数据个数
                                          };
                                          }

总结

到此为止，我们以及学会了 C++ 的大部分的容器了，只剩下一些像位图等不算特别困难的容器我们到后面有机会再来细谈，下一章节开始我们就会来聊聊我们 C++ 的一次重大的更新，C++11 的一些非常使用的新特性，我们下一章节再见吧。

坚持到这里已经很厉害啦，辛苦啦 ʕ • ᴥ • ʔ づ♡ど

posted on 2025-09-18 21:12 lxjshuju 阅读(14) 评论(0) 收藏举报