【C++进阶】封装哈希表实现myunordered_map和myunordered_set
🥕个人主页:开敲🍉
🔥所属专栏:C++🥭
🌼文章目录🌼
2. 模拟实现unordered_map和unordered_set
2.3 myunordered_map和myunordered_set完整代码实现
1. 源码及框架分析
SGI-STL30版本源代码中没有unordered_map和unordered_set,SGI-STL30版本是C++11之前的STL版本,这两个容器是C++11之后才更新的。但是SGI-STL30实现了哈希表,只是容器的名字是hash_map和hash_set,他是作为非标准的容器出现的,非标准是指非C++标准规定必须实现的,源代码在hash_map/hash_set/stl_hash_map/stl_hash_set/stl_hashtable.h中 hash_map和hash_set的实现结构框架核心部分截取出来如下:
// stl_hash_set
template <class Value, class HashFcn = hash<Value>,
class EqualKey = equal_to<Value>,
class Alloc = alloc>
class hash_set
{
p
rivate :
typedef hashtable<Value, Value, HashFcn, identity<Value>,
EqualKey, Alloc> ht;
ht rep;
public:
typedef typename ht::key_type key_type;
typedef typename ht::value_type value_type;
typedef typename ht::hasher hasher;
typedef typename ht::key_equal key_equal;
typedef typename ht::const_iterator iterator;
typedef typename ht::const_iterator const_iterator;
hasher hash_funct() const { return rep.hash_funct(); }
key_equal key_eq() const { return rep.key_eq(); }
};
// stl_hash_map
template <class Key, class T, class HashFcn = hash<Key>,
class EqualKey = equal_to<Key>,
class Alloc = alloc>
class hash_map
{
p
rivate :
typedef hashtable<pair<const Key, T>, Key, HashFcn,
select1st<pair<const Key, T> >, EqualKey, Alloc> ht;
ht rep;
public:
typedef typename ht::key_type key_type;
typedef T data_type;
typedef T mapped_type;
typedef typename ht::value_type value_type;
typedef typename ht::hasher hasher;
typedef typename ht::key_equal key_equal;
typedef typename ht::iterator iterator;
typedef typename ht::const_iterator const_iterator;
};
// stl_hashtable.h
template <class Value, class Key, class HashFcn,
class ExtractKey, class EqualKey,
class Alloc>
class hashtable {
public:
typedef Key key_type;
typedef Value value_type;
typedef HashFcn hasher;
typedef EqualKey key_equal;
private:
hasher hash;
key_equal equals;
ExtractKey get_key;
typedef __hashtable_node<Value> node;
vector<node*, Alloc> buckets;
size_type num_elements;
public:
typedef __hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey,
Alloc> iterator;
pair<iterator, bool> insert_unique(const value_type& obj);
const_iterator find(const key_type& key) const;
};
template <class Value>
struct __hashtable_node
{
__hashtable_node* next;
Value val;
};
通过源码可以看到,结构上hash_map和hash_set跟map和set的完全类似,复用同⼀个hashtable实现key和key/value结构,hash_set传给hash_table的是两个key,hash_map传给hash_table的是pair<const key, value>。
2. 模拟实现unordered_map和unordered_set
2.1 实现出复用哈希表的框架,并支持insert
① 参考源码框架,unordered_map和unordered_set复用之前我们实现的哈希表。
② 我们这里相比源码调整⼀下,key参数就用K,value参数就用V,哈希表中的数据类型,我们使用T。
③ 其次跟map和set相比而言unordered_map和unordered_set的模拟实现类结构更复杂⼀点,但是大框架和思路是完全类似的。因为HashTable实现了泛型不知道T参数导致是K,还是pair<K, V>,那么insert内部进行插入时要用K对象转换成整形取模和K比较相等,因为pair的value不参与计算取模,且默认支持的是key和value⼀起比较相等,我们需要时的任何时候只需要比较K对象,所以我们在unordered_map和unordered_set层分别实现⼀个MapKeyOfT和SetKeyOfT的仿函数传给HashTable的KeyOfT,然后HashTable中通过KeyOfT仿函数取出T类型对象中的K对象,再转换成整形取模和K比较相等,具体细节参考如下代码实现。
#pragma once
#include "Hash.h"
//unordered_map参考实现代码
namespace gjk
{
template <class K,class V>
class Unordered_map
{
struct MapKeyOfT
{
const K& operator()(const pair<K,V>& kv)
{
return kv.first;
}
};
public:
bool insert(const pair<K, V>& data)
{
return _h.Insert(data);
}
bool erase(const pair<K, V>& kv)
{
return _h.Erase(kv);
}
private:
Hash<K, pair<K,V>, MapKeyOfT> _h;
};
}
#pragma once
#include "Hash.h"
//unordered_set参考实现代码
namespace gjk
{
template <class K>
class Unordered_set
{
struct SetKeyOfT
{
const K& operator()(const pair<K,K>& key)
{
return key.first;
}
};
public:
bool insert(const pair<K,K>& data)
{
return _h.Insert(data);
}
bool erase(const pair<K, K>& data)
{
return _h.Erase(data);
}
private:
Hash<K, pair<K,K>,SetKeyOfT> _h;
};
}
2.2 支持iterator的实现
iterator核心源码:
template <class Value, class Key, class HashFcn,
class ExtractKey, class EqualKey, class Alloc>
struct __hashtable_iterator {
typedef hashtable<Value, Key, HashFcn, ExtractKey, EqualKey, Alloc>
hashtable;
typedef __hashtable_iterator<Value, Key, HashFcn,
ExtractKey, EqualKey, Alloc>
iterator;
typedef __hashtable_const_iterator<Value, Key, HashFcn,
ExtractKey, EqualKey, Alloc>
const_iterator;
typedef __hashtable_node<Value> node;
typedef forward_iterator_tag iterator_category;
typedef Value value_type;
node* cur;
hashtable* ht;
__hashtable_iterator(node* n, hashtable* tab) : cur(n), ht(tab) {}
__hashtable_iterator() {}
reference operator*() const { return cur->val; }
#ifndef __SGI_STL_NO_ARROW_OPERATOR
pointer operator->() const { return &(operator*()); }
#endif /* __SGI_STL_NO_ARROW_OPERATOR */
iterator& operator++();
iterator operator++(int);
bool operator==(const iterator& it) const { return cur == it.cur; }
bool operator!=(const iterator& it) const { return cur != it.cur; }
};
template <class V, class K, class HF, class ExK, class EqK, class A>
__hashtable_iterator<V, K, HF, ExK, EqK, A>&
__hashtable_iterator<V, K, HF, ExK, EqK, A>::operator++()
{
const node* old = cur;
cur = cur->next;
if (!cur) {
size_type bucket = ht->bkt_num(old->val);
while (!cur && ++bucket < ht->buckets.size())
cur = ht->buckets[bucket];
} r
eturn* this;
}
iterator实现思路分析:
① iterator实现的大框架跟list的iterator思路是⼀致的,用⼀个类型封装结点的指针,再通过重载运算符实现,迭代器像指针⼀样访问的行为,要注意的是哈希表的迭代器是单向迭代器。
② 这里的难点是operator++的实现。iterator中有⼀个指向结点的指针,如果当前桶下面还有结点,则结点的指针指向下⼀个结点即可。如果当前桶走完了,则需要想办法计算找到下⼀个桶。这里的难点是反而是结构设计的问题,参考上面的源码,我们可以看到iterator中除了有结点的指针,还有哈希表对象的指针,这样当前桶走完了,要计算下⼀个桶就相对容易多了,用key值计算出当前桶位置,依次往后找下⼀个不为空的桶即可。
③ begin()返回第⼀个桶中第⼀个节点指针构造的迭代器,这里end()返回迭代器可以用空表示。
④ unordered_set的iterator也不支持修改,我们把unordered_set的第二个模板参数改成const K即可, HashTable<K, const K, SetKeyOfT, Hash> _ht。
⑤ unordered_map的iterator不支持修改key但是可以修改value,我们把unordered_map的第二个模板参数pair的第⼀个参数改成const K即可, HashTable<K, pair<const K, V>,MapKeyOfT, Hash> _ht。
⑥ 支持完整的迭代器还有很多细节需要修改,具体参考下面题的代码。
2.3 myunordered_map和myunordered_set完整代码实现
#pragma once
#include "Hash.h"
//unordered_map参考实现代码
namespace gjk
{
template <class K,class V>
class Unordered_map
{
struct MapKeyOfT
{
const K& operator()(const pair<K,V>& kv)
{
return kv.first;
}
};
public:
typedef typename Hash<K, pair<K, V>, MapKeyOfT>::Iterator iterator;
typedef typename Hash<K, pair<K, V>, MapKeyOfT>::ConstIterator const_iterator;
bool insert(const pair<K, V>& data)
{
return _h.Insert(data);
}
bool erase(const pair<K, V>& kv)
{
return _h.Erase(kv);
}
iterator begin()
{
return _h.Begin();
}
const_iterator begin() const
{
return _h.Begin();
}
iterator end()
{
return _h.End();
}
const_iterator end() const
{
return _h.End();
}
private:
Hash<K, pair<K,V>, MapKeyOfT> _h;
};
}
#pragma once
#include "Hash.h"
//unordered_set参考实现代码
namespace gjk
{
template <class K>
class Unordered_set
{
struct SetKeyOfT
{
const K& operator()(const pair<K,K>& key)
{
return key.first;
}
};
public:
typedef typename Hash<K, pair<K, K>, SetKeyOfT>::Iterator iterator;
typedef typename Hash<K, pair<K, K>, SetKeyOfT>::ConstIterator const_iterator;
iterator begin()
{
return _h.Begin();
}
const_iterator begin() const
{
return _h.Begin();
}
iterator end()
{
return _h.End();
}
const_iterator end() const
{
return _h.End();
}
bool insert(const pair<K,K>& data)
{
return _h.Insert(data);
}
bool erase(const pair<K, K>& data)
{
return _h.Erase(data);
}
private:
Hash<K, pair<K,K>,SetKeyOfT> _h;
};
}
#pragma once
#include <iostream>
#include <vector>
using namespace std;
//hash表参考实现代码
namespace gjk
{
enum State
{
EXIST,
EMPTY,
DELETE
};
template <class T>//这里的链节点采用 next、parent结构,方便后面删除
struct HashDataNode
{
T _data;
HashDataNode<T>* _next = nullptr;
HashDataNode<T>* _parent = nullptr;
State _state = EMPTY;
};
template <class K, class T, class KeyOfT, class getk>
class Hash;
template <class K,class T,class Ref,class Ptr,class KeyOfT,class getk>
struct HashIterator
{
typedef HashDataNode<T> Node;
typedef HashIterator<K,T, Ref, Ptr,KeyOfT,getk> Self;
typedef Hash<K, T, KeyOfT, getk> HT;
Node* _node;
HT _ht;
HashIterator(Node* node,Hash<K,T,KeyOfT,getk>& ht)
:_node(node)
,_ht(ht)
{}
Self& operator++()
{
if (_node->_next)
_node = _node->_next;
else
{
size_t size = _ht._arr.size();
size_t sub = _node->_data.first % size + 1;
while (sub < size && !_ht._arr[sub]) sub++;
if (sub < size)
_node = _ht._arr[sub];
else _node = nullptr;
}
return *this;
}
Self& operator++(int)
{
if (_node->_next)
_node = _node->_next;
else
{
size_t size = _ht._arr.size();
size_t sub = _node->_data.first % size + 1;
while (sub < size && !_ht._arr[sub]) sub++;
if (sub < size)
_node = _ht._arr[sub];
else _node = nullptr;
}
return *this;
}
Self& operator--()
{
if (_node->_parent)
_node = _node->_parent;
else
{
size_t size = _ht._arr.size();
size_t sub = _node->_data.first % size + 1;
while (sub >= 0 && !_ht._arr[sub]) sub--;
if (sub >= 0)
{
_node = _ht._arr[sub];
while (_node->_next) _node = _node->_next;
}
else _node = nullptr;
}
return *this;
}
Self& operator--(int)
{
if (_node->_parent)
_node = _node->_parent;
else
{
size_t size = _ht._arr.size();
size_t sub = _node->_data.first % size + 1;
while (sub >= 0 && !_ht._arr[sub]) sub--;
if (sub >= 0)
{
_node = _ht._arr[sub];
while (_node->_next) _node = _node->_next;
}
else _node = nullptr;
}
return *this;
}
Ref operator*()
{
return _node->_data;
}
Ptr operator->()
{
return &_node->_data;
}
bool operator!=(const Self& s)
{
return _node != s._node;
}
bool operator==(const Self& s)
{
return _node == s._node;
}
};
template <class K>
struct GetK
{
size_t operator()(const K& key)
{
return (size_t)key;
}
};
template <>
struct GetK<string>
{
size_t operator()(const string& key)
{
size_t sum = 0;
for (auto e : key)
{
sum *= 31;
sum += e;
}
return sum;
}
};
template <class K,class T,class KeyOfT,class getk = GetK<K>>
class Hash
{
template <class T, class K, class Ref, class Ptr, class KeyOfT, class getk>
friend struct HashIterator;
typedef HashDataNode<T> Node;
public:
typedef HashIterator<K, T, T&, T*, KeyOfT, getk> Iterator;
typedef HashIterator<K, T, const T&, const T*, KeyOfT, getk> ConstIterator;
Iterator Begin()
{
if (!_n) return End();
for (size_t i = 0; i < _n; i++)
{
Node* cur = _arr[i];
if (cur) return Iterator(cur, *this);
}
return End();
}
ConstIterator Begin() const
{
if (!_n) return End();
for (size_t i = 0; i < _n; i++)
{
Node* cur = _arr[i];
if (cur) return ConstIterator(cur, *this);
}
return End();
}
Iterator End()
{
return Iterator(nullptr, *this);
}
ConstIterator End() const
{
return ConstIterator(nullptr, *this);
}
Hash(size_t size = 10)
{
_arr.resize(size);
_n = 0;
}
bool Insert(const T& data)
{
KeyOfT kot;
getk gk;
if (Find(data)) return false;
if (_n * 10 / _arr.size() >= 10)
{
Hash<K,T,KeyOfT> newhash;
newhash._arr.resize(2 * _arr.size());
for (size_t i = 0; i < _arr.size(); i++)
{
if (_arr[i]&&_arr[i]->_state == EXIST)
{
newhash._arr[i] = _arr[i];
Node* cur = _arr[i]->_next;
Node* move = newhash._arr[i];
while (cur)
{
Node* tmp = move;
move->_next = cur;
cur = cur->_next;
move = move->_next;
move->_parent = tmp;
}
}
}
_arr.swap(newhash._arr);//交换_arr
}
size_t sub = gk(kot(data)) % _arr.size();
Node* newnode = new Node;
newnode->_data = data;
newnode->_state = EXIST;
if (!_arr[sub] || _arr[sub]->_state != EXIST) _arr[sub] = newnode;
else
{
Node* cur = _arr[sub];
while (cur->_next) cur = cur->_next;
cur->_next = newnode;
newnode->_parent = cur;
}
_n++;
return true;
}
Node* Find(const T& data)
{
KeyOfT kot;
getk gk;
size_t sub = gk(kot(data)) % _arr.size();
if (!_arr[sub]) return nullptr;
else
{
Node* cur = _arr[sub];
while (cur)
{
if (cur->_data.second == data.second) return cur;
cur = cur->_next;
}
}
return nullptr;
}
bool Erase(const T& data)
{
KeyOfT kot;
Node* ret = Find(data);
if (!ret) return false;
Node* prev = ret->_parent;
Node* next = ret->_next;
if (!prev)
{
_arr[ret->_data.first] = next;
if (next) next->_parent = nullptr;
}
else
{
if(next) next->_parent = prev;
prev->_next = next;
}
delete ret;
return true;
}
void Printf()
{
for (size_t i = 0; i < _arr.size(); i++)
{
if (_arr[i])
{
Node* cur = _arr[i];
while (cur)
{
cout << cur->_data.second << "";
cur = cur->_next;
}
}
}
cout << endl;
}
private:
vector<Node*> _arr;
size_t _n;
};
}
创作不易,点个赞呗,蟹蟹啦~