【C++进阶】封装哈希表实现myunordered_map和myunordered

本文链接：https://blog.csdn.net/2301_78022459/article/details/143103698

【C++进阶】封装哈希表实现myunordered_map和myunordered_set

🥕个人主页：开敲🍉

🔥所属专栏：C++🥭

🌼文章目录🌼

1. 源码及框架分析

2. 模拟实现unordered_map和unordered_set

2.1 实现出复用哈希表的框架，并支持insert

2.2 支持iterator的实现

2.3 myunordered_map和myunordered_set完整代码实现

1. 源码及框架分析

SGI-STL30版本源代码中没有unordered_map和unordered_set，SGI-STL30版本是C++11之前的STL版本，这两个容器是C++11之后才更新的。但是SGI-STL30实现了哈希表，只是容器的名字是hash_map和hash_set，他是作为非标准的容器出现的，非标准是指非C++标准规定必须实现的，源代码在hash_map/hash_set/stl_hash_map/stl_hash_set/stl_hashtable.h中 hash_map和hash_set的实现结构框架核心部分截取出来如下：

// stl_hash_set
template <class Value, class HashFcn = hash<Value>,
	class EqualKey = equal_to<Value>,
	class Alloc = alloc>
class hash_set
{
	p
		rivate :
	typedef hashtable<Value, Value, HashFcn, identity<Value>,
		EqualKey, Alloc> ht;
	ht rep;
public:
	typedef typename ht::key_type key_type;
	typedef typename ht::value_type value_type;
	typedef typename ht::hasher hasher;
	typedef typename ht::key_equal key_equal;
	typedef typename ht::const_iterator iterator;
	typedef typename ht::const_iterator const_iterator;
	hasher hash_funct() const { return rep.hash_funct(); }
	key_equal key_eq() const { return rep.key_eq(); }
};
// stl_hash_map
template <class Key, class T, class HashFcn = hash<Key>,
	class EqualKey = equal_to<Key>,
	class Alloc = alloc>
class hash_map
{
	p
		rivate :
	typedef hashtable<pair<const Key, T>, Key, HashFcn,
		select1st<pair<const Key, T> >, EqualKey, Alloc> ht;
	ht rep;
public:
	typedef typename ht::key_type key_type;
	typedef T data_type;
	typedef T mapped_type;
	typedef typename ht::value_type value_type;
	typedef typename ht::hasher hasher;
	typedef typename ht::key_equal key_equal;
	typedef typename ht::iterator iterator;
	typedef typename ht::const_iterator const_iterator;
};
// stl_hashtable.h
template <class Value, class Key, class HashFcn,
	class ExtractKey, class EqualKey,
	class Alloc>
class hashtable {
public:
	typedef Key key_type;
	typedef Value value_type;
	typedef HashFcn hasher;
	typedef EqualKey key_equal;
private:
	hasher hash;
	key_equal equals;
	ExtractKey get_key;
	typedef __hashtable_node<Value> node;
	vector<node*, Alloc> buckets;
	size_type num_elements;
public:
	typedef __hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey,
		Alloc> iterator;
	pair<iterator, bool> insert_unique(const value_type& obj);
	const_iterator find(const key_type& key) const;
};
template <class Value>
struct __hashtable_node
{
	__hashtable_node* next;
	Value val;
};

通过源码可以看到，结构上hash_map和hash_set跟map和set的完全类似，复用同⼀个hashtable实现key和key/value结构，hash_set传给hash_table的是两个key，hash_map传给hash_table的是pair<const key, value>。

2. 模拟实现unordered_map和unordered_set

2.1 实现出复用哈希表的框架，并支持insert

① 参考源码框架，unordered_map和unordered_set复用之前我们实现的哈希表。

② 我们这里相比源码调整⼀下，key参数就用K，value参数就用V，哈希表中的数据类型，我们使用T。

③ 其次跟map和set相比而言unordered_map和unordered_set的模拟实现类结构更复杂⼀点，但是大框架和思路是完全类似的。因为HashTable实现了泛型不知道T参数导致是K，还是pair<K, V>，那么insert内部进行插入时要用K对象转换成整形取模和K比较相等，因为pair的value不参与计算取模，且默认支持的是key和value⼀起比较相等，我们需要时的任何时候只需要比较K对象，所以我们在unordered_map和unordered_set层分别实现⼀个MapKeyOfT和SetKeyOfT的仿函数传给HashTable的KeyOfT，然后HashTable中通过KeyOfT仿函数取出T类型对象中的K对象，再转换成整形取模和K比较相等，具体细节参考如下代码实现。

#pragma once


#include "Hash.h"

//unordered_map参考实现代码
namespace gjk
{
	template <class K,class V>
	class Unordered_map
	{
		struct MapKeyOfT
		{
			const K& operator()(const pair<K,V>& kv)
			{
				return kv.first;
			}
		};
	public:

		bool insert(const pair<K, V>& data)
		{
			return _h.Insert(data);
		}

		bool erase(const pair<K, V>& kv)
		{
			return _h.Erase(kv);
		}
	private:
		Hash<K, pair<K,V>, MapKeyOfT> _h;
	};
}

#pragma once


#include "Hash.h"


//unordered_set参考实现代码
namespace gjk
{
	template <class K>
	class Unordered_set
	{
		struct SetKeyOfT
		{
			const K& operator()(const pair<K,K>& key)
			{
				return key.first;
			}
		};
	public:

		bool insert(const pair<K,K>& data)
		{
			return _h.Insert(data);
		}

		bool erase(const pair<K, K>& data)
		{
			return _h.Erase(data);
		}
	private:
		Hash<K, pair<K,K>,SetKeyOfT> _h;
	};
}

2.2 支持iterator的实现

iterator核心源码：

template <class Value, class Key, class HashFcn,
	class ExtractKey, class EqualKey, class Alloc>
struct __hashtable_iterator {
	typedef hashtable<Value, Key, HashFcn, ExtractKey, EqualKey, Alloc>
		hashtable;
	typedef __hashtable_iterator<Value, Key, HashFcn,
		ExtractKey, EqualKey, Alloc>
		iterator;
	typedef __hashtable_const_iterator<Value, Key, HashFcn,
		ExtractKey, EqualKey, Alloc>
		const_iterator;
	typedef __hashtable_node<Value> node;
	typedef forward_iterator_tag iterator_category;
	typedef Value value_type;
	node* cur;
	hashtable* ht;
	__hashtable_iterator(node* n, hashtable* tab) : cur(n), ht(tab) {}
	__hashtable_iterator() {}
	reference operator*() const { return cur->val; }
#ifndef __SGI_STL_NO_ARROW_OPERATOR
	pointer operator->() const { return &(operator*()); }
#endif /* __SGI_STL_NO_ARROW_OPERATOR */
	iterator& operator++();
	iterator operator++(int);
	bool operator==(const iterator& it) const { return cur == it.cur; }
	bool operator!=(const iterator& it) const { return cur != it.cur; }
};
template <class V, class K, class HF, class ExK, class EqK, class A>
__hashtable_iterator<V, K, HF, ExK, EqK, A>&
__hashtable_iterator<V, K, HF, ExK, EqK, A>::operator++()
{
	const node* old = cur;
	cur = cur->next;
	if (!cur) {
		size_type bucket = ht->bkt_num(old->val);
		while (!cur && ++bucket < ht->buckets.size())
			cur = ht->buckets[bucket];
	} r
		eturn* this;
}

iterator实现思路分析：

① iterator实现的大框架跟list的iterator思路是⼀致的，用⼀个类型封装结点的指针，再通过重载运算符实现，迭代器像指针⼀样访问的行为，要注意的是哈希表的迭代器是单向迭代器。

② 这里的难点是operator++的实现。iterator中有⼀个指向结点的指针，如果当前桶下面还有结点，则结点的指针指向下⼀个结点即可。如果当前桶走完了，则需要想办法计算找到下⼀个桶。这里的难点是反而是结构设计的问题，参考上面的源码，我们可以看到iterator中除了有结点的指针，还有哈希表对象的指针，这样当前桶走完了，要计算下⼀个桶就相对容易多了，用key值计算出当前桶位置，依次往后找下⼀个不为空的桶即可。

③ begin()返回第⼀个桶中第⼀个节点指针构造的迭代器，这里end()返回迭代器可以用空表示。

④ unordered_set的iterator也不支持修改，我们把unordered_set的第二个模板参数改成const K即可， HashTable<K, const K, SetKeyOfT, Hash> _ht。

⑤ unordered_map的iterator不支持修改key但是可以修改value，我们把unordered_map的第二个模板参数pair的第⼀个参数改成const K即可， HashTable<K, pair<const K, V>,MapKeyOfT, Hash> _ht。

⑥ 支持完整的迭代器还有很多细节需要修改，具体参考下面题的代码。

2.3 myunordered_map和myunordered_set完整代码实现

#pragma once
#include "Hash.h"

//unordered_map参考实现代码
namespace gjk
{
	template <class K,class V>
	class Unordered_map
	{
		struct MapKeyOfT
		{
			const K& operator()(const pair<K,V>& kv)
			{
				return kv.first;
			}
		};
	public:
		typedef typename Hash<K, pair<K, V>, MapKeyOfT>::Iterator iterator;
		typedef typename Hash<K, pair<K, V>, MapKeyOfT>::ConstIterator const_iterator;

		bool insert(const pair<K, V>& data)
		{
			return _h.Insert(data);
		}

		bool erase(const pair<K, V>& kv)
		{
			return _h.Erase(kv);
		}

		iterator begin()
		{
			return _h.Begin();
		}

		const_iterator begin() const
		{
			return _h.Begin();
		}

		iterator end()
		{
			return _h.End();
		}

		const_iterator end() const
		{
			return _h.End();
		}
	private:
		Hash<K, pair<K,V>, MapKeyOfT> _h;
	};
}

#pragma once
#include "Hash.h"

//unordered_set参考实现代码
namespace gjk
{
	template <class K>
	class Unordered_set
	{
		struct SetKeyOfT
		{
			const K& operator()(const pair<K,K>& key)
			{
				return key.first;
			}
		};
	public:
		typedef typename Hash<K, pair<K, K>, SetKeyOfT>::Iterator iterator;
		typedef typename Hash<K, pair<K, K>, SetKeyOfT>::ConstIterator const_iterator;

		iterator begin()
		{
			return _h.Begin();
		}

		const_iterator begin() const
		{
			return _h.Begin();
		}

		iterator end()
		{
			return _h.End();
		}

		const_iterator end() const
		{
			return _h.End();
		}

		bool insert(const pair<K,K>& data)
		{
			return _h.Insert(data);
		}

		bool erase(const pair<K, K>& data)
		{
			return _h.Erase(data);
		}
	private:
		Hash<K, pair<K,K>,SetKeyOfT> _h;
	};
}

#pragma once
#include <iostream>
#include <vector>
using namespace std;

//hash表参考实现代码
namespace gjk
{
	enum State
	{
		EXIST,
		EMPTY,
		DELETE
	};


	template <class T>//这里的链节点采用 next、parent结构，方便后面删除
	struct HashDataNode
	{
		T _data;
		HashDataNode<T>* _next = nullptr;
		HashDataNode<T>* _parent = nullptr;
		State _state = EMPTY;
	};

	template <class K, class T, class KeyOfT, class getk>
	class Hash;

	template <class K,class T,class Ref,class Ptr,class KeyOfT,class getk>
	struct HashIterator
	{
		typedef HashDataNode<T> Node;
		typedef HashIterator<K,T, Ref, Ptr,KeyOfT,getk> Self;
		typedef Hash<K, T, KeyOfT, getk> HT;

		Node* _node;
		HT _ht;
		HashIterator(Node* node,Hash<K,T,KeyOfT,getk>& ht)
			:_node(node)
			,_ht(ht)
		{}

		Self& operator++()
		{
			if (_node->_next)
				_node = _node->_next;
			else
			{
				size_t size = _ht._arr.size();
				size_t sub = _node->_data.first % size + 1;
				while (sub < size && !_ht._arr[sub]) sub++;
				if (sub < size)
					_node = _ht._arr[sub];
				else _node = nullptr;
			}
			return *this;
		}

		Self& operator++(int)
		{
			if (_node->_next)
				_node = _node->_next;
			else
			{
				size_t size = _ht._arr.size();
				size_t sub = _node->_data.first % size + 1;
				while (sub < size && !_ht._arr[sub]) sub++;
				if (sub < size)
					_node = _ht._arr[sub];
				else _node = nullptr;
			}
			return *this;
		}

		Self& operator--()
		{
			if (_node->_parent)
				_node = _node->_parent;
			else
			{
				size_t size = _ht._arr.size();
				size_t sub = _node->_data.first % size + 1;
				while (sub >= 0 && !_ht._arr[sub]) sub--;
				if (sub >= 0)
				{
					_node = _ht._arr[sub];
					while (_node->_next) _node = _node->_next;
				}
				else _node = nullptr;
			}
			return *this;
		}

		Self& operator--(int)
		{
			if (_node->_parent)
				_node = _node->_parent;
			else
			{
				size_t size = _ht._arr.size();
				size_t sub = _node->_data.first % size + 1;
				while (sub >= 0 && !_ht._arr[sub]) sub--;
				if (sub >= 0)
				{
					_node = _ht._arr[sub];
					while (_node->_next) _node = _node->_next;
				}
				else _node = nullptr;
			}
			return *this;
		}


		Ref operator*()
		{
			return _node->_data;
		}

		Ptr operator->()
		{
			return &_node->_data;
		}

		bool operator!=(const Self& s)
		{
			return _node != s._node;
		}

		bool operator==(const Self& s)
		{
			return _node == s._node;
		}

	};

	template <class K>
	struct GetK
	{
		size_t operator()(const K& key)
		{
			return (size_t)key;
		}
	};

	template <>
	struct GetK<string>
	{
		size_t operator()(const string& key)
		{
			size_t sum = 0;
			for (auto e : key)
			{
				sum *= 31;
				sum += e;
			}
			return sum;
		}
	};

	template <class K,class T,class KeyOfT,class getk = GetK<K>>
	class Hash
	{
		template <class T, class K, class Ref, class Ptr, class KeyOfT, class getk>
		friend struct HashIterator;
		typedef HashDataNode<T> Node;
	public:
		typedef HashIterator<K, T, T&, T*, KeyOfT, getk> Iterator;
		typedef HashIterator<K, T, const T&, const T*, KeyOfT, getk> ConstIterator;

		Iterator Begin()
		{
			if (!_n) return End();
			for (size_t i = 0; i < _n; i++)
			{
				Node* cur = _arr[i];
				if (cur) return Iterator(cur, *this);
			}
			return End();
		}

		ConstIterator Begin() const
		{
			if (!_n) return End();
			for (size_t i = 0; i < _n; i++)
			{
				Node* cur = _arr[i];
				if (cur) return ConstIterator(cur, *this);
			}
			return End();
		}


		Iterator End()
		{
			return Iterator(nullptr, *this);
		}

		ConstIterator End() const
		{
			return ConstIterator(nullptr, *this);
		}


		Hash(size_t size = 10)
		{
			_arr.resize(size);
			_n = 0;
		}

		bool Insert(const T& data)
		{
			KeyOfT kot;
			getk gk;
			if (Find(data)) return false;
			if (_n * 10 / _arr.size() >= 10)
			{
				Hash<K,T,KeyOfT> newhash;
				newhash._arr.resize(2 * _arr.size());
				for (size_t i = 0; i < _arr.size(); i++)
				{
					if (_arr[i]&&_arr[i]->_state == EXIST)
					{
						newhash._arr[i] = _arr[i];
						Node* cur = _arr[i]->_next;
						Node* move = newhash._arr[i];
						while (cur)
						{
							Node* tmp = move;
							move->_next = cur;
							cur = cur->_next;
							move = move->_next;
							move->_parent = tmp;
						}
					}
				}
				_arr.swap(newhash._arr);//交换_arr
			}

			size_t sub = gk(kot(data)) % _arr.size();
			Node* newnode = new Node;
			newnode->_data = data;
			newnode->_state = EXIST;
			if (!_arr[sub] || _arr[sub]->_state != EXIST) _arr[sub] = newnode;
			else
			{
				Node* cur = _arr[sub];
				while (cur->_next) cur = cur->_next;
				cur->_next = newnode;
				newnode->_parent = cur;
			}

			_n++;
			return true;
		}


		Node* Find(const T& data)
		{
			KeyOfT kot;
			getk gk;
			size_t sub = gk(kot(data)) % _arr.size();
			if (!_arr[sub]) return nullptr;
			else
			{
				Node* cur = _arr[sub];
				while (cur)
				{
					if (cur->_data.second == data.second) return cur;
					cur = cur->_next;
				}
			}
			return nullptr;
		}


		bool Erase(const T& data)
		{
			KeyOfT kot;
			Node* ret = Find(data);
			if (!ret) return false;
			Node* prev = ret->_parent;
			Node* next = ret->_next;
			if (!prev)
			{
				_arr[ret->_data.first] = next;
				if (next) next->_parent = nullptr;
			}
			else
			{
				if(next) next->_parent = prev;
				prev->_next = next;
			}
			delete ret;
			return true;
		}

		void Printf()
		{
			for (size_t i = 0; i < _arr.size(); i++)
			{
				if (_arr[i])
				{
					Node* cur = _arr[i];
					while (cur)
					{
						cout << cur->_data.second << "";
						cur = cur->_next;
					}
				}
			}
			cout << endl;
		}
		

	private:
		vector<Node*> _arr;
		size_t _n;
	};
}

创作不易，点个赞呗，蟹蟹啦~