【C++】哈希实现unordered_map/set

来源：刀刀网

关于哈希模拟实现unordered_map/set，与红黑树模拟实现map/set的大体思路相似。

HashTable的迭代器

operator++

template<class K,class T,class KeyOfT>
struct __HashTableIterator
{
	typedef __HashTableIterator<K, T, KeyOfT> Self;
	typedef HashNode<T> Node;
	Node* _node;
	__HashTableIterator(const T& node)
		:_node(node)
	{		}
    Self operator++()
    {
        //...
    }
}

如果我们不管插入元素的顺序，只要求遍历，如何实现operator++呢？

当前桶遍历完了，继续找下一个桶遍历。计算当前节点在哪个桶，再往后找不为空的桶。所以迭代器里不能只有节点还要传一个HashTable。

struct HashNode
{
	T _data;
	HashNode<T>* _next;
	HashNode(const T& data)
		:_data(data)
		, _next(nullptr)
	{		}
};
//...
template<class K,class T,class KeyOfT,class Hash>
struct __HashTableIterator
{
	typedef __HashTableIterator<K, T, KeyOfT,Hash> Self;
	typedef HashNode<T> Node;
	typedef HashTable<K, T, KeyOfT, Hash> HT;
	Node* _node;
	HT* _pht;
	__HashTableIterator(const T& node,HT* pht)
		:_node(node)
		,_pht(pht)
	{		}
	Self operator++()
	{
		if (_node->_next)
		{
			_node = _node->_next;
		}
		else
		{
			KeyOfT koft;
			//当前桶遍历完了，找下一个桶继续遍历
			size_t i = _pht->HashFunc(koft(_node->_data)) % _pht->tablse.size();
			++i;
			for (; i < _pht.size(); ++i)
			{
				Node* cur = _pht->tables[i];
				if (cur)
				{
					_node = cur;
					return *this;
				}
			}
			_node = nullptr;
		}
		return *this;
	}
};
//...
template<class K, class T, class KeyOfT, class Hash>
class HashTable
{
	typedef HashNode<T> Node;
public:
    typedef __HashTableIterator<K, T, KeyOfT, Hash> iterator;
	iterator begin()
	{
		for (size_t i = 0; i < _tables.size(); ++i)
		{
			if (_tables[i])
			{
				return iterator(_tables[i],this);
			}
		}
		return end();
	}
	iterator end()
	{
		return iterator(nullptr,this);
	}
    //...
private:
	vector<Node*> _tables;
	size_t _num = 0;
}

发现会报错，找不到__HashIterator里找不到HashTable，原因是，HashTable是在__HashIterator的下面定义的，编译器找不到，能找到HashNode是因为HashNode在__HashIterator上面已经定义过了。

那把__HashIterator定义在HashTable后面呢？也不行，HashTable中也用到了__HashIterator。

所以要在__HashIterator加上HashTable的声明。

前置声明

//前置声明
template<class K, class T, class KeyOfT, class Hash>
class HashTable;

template<class K,class T,class KeyOfT,class Hash>
struct __HashTableIterator
{ }

unordered_map/set是单向迭代器，没有operator--。

注意：以上实现的operator++方式，没有按照插入顺序去遍历，跟STL库的实现方式不同。

MyUnorderedSet.hpp

#include"HashTable.hpp"
using namespace OPEN_HASH;
namespace zxa
{
	template<class K,class Hash= _Hash<K>>
	class unordered_set
	{
		struct SetKOfT
		{
			const K& operator()(const K& k)
			{
				return k;
			}
		};
	public:
		typedef typename HashTable<K, K, SetKOfT, Hash>::iterator iterator;
		iterator begin()
		{
			return _ht.begin();
		}
		iterator end()
		{
			return _ht.end();
		}
		pair<iterator,bool> Insert(const K& k)
		{
			return _ht.Insert(k);
		}
	private:
		HashTable<K, K, SetKOfT,Hash> _ht;
	};
	void test_unordered_set();
}

MyUnorderedMap.hpp

#include"HashTable.hpp"
using namespace OPEN_HASH;

namespace zxa
{
	template<class K,class V,class Hash= _Hash<K>>
	class unordered_map
	{
		struct MapKOfT
		{
			const K& operator()(const pair<K,V> & kv)
			{
				return kv.first;
			}
		};
	public:
		typedef typename HashTable<K, pair<K, V>, MapKOfT, Hash>::iterator iterator;
		iterator begin()
		{
			return _ht.begin();
		}
		iterator end()
		{
			return _ht.end();
		}
		pair<iterator,bool> Insert(const pair<K,V>& kv)
		{
			return _ht.Insert(kv);
		}
		V& operator[](const K& key)
		{
			pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
			return ret.first->second;
		}
	private:
		HashTable<K, pair<K, V>, MapKOfT,Hash> _ht;
	};
	void test_unordered_map();
}

HashTable.hpp

#pragma once
#include<vector>
#include<string>
#include<iostream>
using namespace std;

namespace OPEN_HASH
{
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;
		HashNode(const T& data)
			:_data(data)
			, _next(nullptr)
		{		}
	};

	//前置声明
	template<class K, class T, class KeyOfT, class Hash>
	class HashTable;

	template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
	struct __HashTableIterator
	{
		typedef __HashTableIterator<K, T,Ref,Ptr, KeyOfT,Hash> Self;
		typedef HashNode<T> Node;
		typedef HashTable<K, T, KeyOfT, Hash> HT;
		Node* _node;
		HT* _pht;
		__HashTableIterator(Node* node,HT* pht)
			:_node(node)
			,_pht(pht)
		{		}
		Ref operator*()
		{
			return _node->_data;
		}
		Ptr operator->()
		{
			return &_node->_data;
		}
		Self& operator++()
		{
			if (_node->_next)
			{
				_node = _node->_next;
			}
			else
			{
				KeyOfT koft;
				//当前桶遍历完了，找下一个桶继续遍历
				size_t i = _pht->HashFunc(koft(_node->_data)) % _pht->_tables.size();
				++i;
				for (; i < _pht->_tables.size(); ++i)
				{
					Node* cur = _pht->_tables[i];
					if (cur)
					{
						_node = cur;
						return *this;
					}
				}
				_node = nullptr;
			}
			return *this;
		}
		bool operator!=(const Self& s)
		{
			return _node != s._node;
		}
		bool operator==(const Self& s)
		{
			return _node == s._node;
		}
	};

	template<class K>
	struct _Hash
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};
	//模板特化 
	template<>
	struct _Hash<string>
	{
		size_t operator()(const string& key)
		{
			size_t hash = 0;
			for (size_t i = 0; i < key.size(); ++i)
			{
				hash *= 131;//也可以乘以31、131、1313、13131、131313..  
				hash += key[i];
			}
			return hash;
		}
	};

	template<class K, class T,class KeyOfT, class Hash>
	class HashTable
	{
		typedef HashNode<T> Node;
	public:
		template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
		friend struct __HashTableIterator;
		
		typedef __HashTableIterator<K, T, T&, T*, KeyOfT, Hash> iterator;
		typedef __HashTableIterator<K, T, const T&, const T*, KeyOfT, Hash> const_iterator;

		iterator begin()
		{
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				if (_tables[i])
				{
					return iterator(_tables[i],this);
				}
			}
			return end();
		}
		iterator end()
		{
			return iterator(nullptr,this);
		}
		const_iterator begin() const
		{
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				if (_tables[i])
				{
					return const_iterator(_tables[i], this);
				}
			}
			return end();
		}
		const_iterator end() const
		{
			return const_iterator(nullptr, this);
		}
		~HashTable()
		{
			for (size_t i = 0; i < _tables.size(); ++i)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_tables[i] = nullptr;
			}
		}
		size_t HashFunc(const K& key)
		{
			Hash hash;
			return hash(key);
		};

		size_t GetNextPrime(size_t num)
		{
			const int PRIMECOUNT = 28;
			static const size_t primeList[PRIMECOUNT] =
			{
				53ul, 97ul, 193ul, 3ul, 769ul,1543ul, 3079ul, 6151ul, 122ul, 24593ul,
				49157ul, 98317ul, 196613ul, 393241ul, 7833ul,1572869ul, 3145739ul, 6291469ul, 12582917ul,
				25165843ul,50331653ul, 100663319ul, 201326611ul, 4026531ul,8053057ul,
				1610612741ul, 3221225473ul, 4294967291ul
			};
			size_t i = 0;
			for (; i < PRIMECOUNT; ++i)
			{
				if (primeList[i] > num)
					return primeList[i];
			}
			return primeList[i];
		}

		pair<iterator,bool> Insert(const T& data)
		{
			KeyOfT koft;
			//如果负载因子等于1，增容
			// 1、开2倍大小的新表
			// 2、把旧表中的数据重新映射到新表中
			// 3、释放旧表
			if (_tables.size() == _num)
			{
				//size_t newsize = _tables.size() == 0 ? 10 : _tables.size() * 2;
				size_t newsize = GetNextPrime(_tables.size());
				vector<Node*> newtables;
				newtables.resize(newsize);
				for (size_t i = 0; i < _tables.size(); ++i)
				{
					Node* cur = _tables[i];
					//将旧表中的节点取下来重新计算在新表中的位置
					while (cur)
					{
						Node* next = cur->_next;
						size_t index = HashFunc(koft(cur->_data)) % newtables.size();
						cur->_next = newtables[index];
						newtables[index] = cur;
						cur = next;
					}
					_tables[i] = nullptr;
				}
				_tables.swap(newtables);
			}
			//计算元素在表中的位置
			size_t index = HashFunc(koft(data)) % _tables.size();
			Node* cur = _tables[index];
			//1、先查找在不在表中
			while (cur)
			{
				if (koft(cur->_data) == koft(data))
				{
					return make_pair(iterator(cur, this), false);
				}
				else
				{
					cur = cur->_next;
				}
			}
			//2、头插到挂的链表中（也可以尾插）
			Node* newnode = new Node(data);
			newnode->_next = _tables[index];
			_tables[index] = newnode;
			++_num;
			return make_pair(iterator(newnode,this),true);
		}
		pair<iterator,bool> Find(const T& key)
		{
			KeyOfT koft;
			size_t index = HashFunc(key) % _tables.size();
			Node* cur = _tables[index];
			while (cur)
			{
				if (koft(cur->_data) == key)
				{
					return make_pair(iterator(cur, this), true);
				}
			}
			return make_pair(iterator(nullptr, this), false);
		}
		bool Erase(const T& key)
		{
			KeyOfT koft;
			size_t index = HashFunc(key) % _tables.size();
			Node* cur = _tables[index];
			Node* prev = nullptr;
			while (cur)
			{
				if (koft(cur->_data) == key)
				{
					//prev为空，要删除的为头节点
					if (prev == nullptr)
					{
						_tables[index] = cur->_next;
					}
					else
					{
						prev->_next = cur->_next;
					}
					delete cur;
					return true;
				}
				else
				{
					prev = cur;
					cur = cur->_next;
				}
			}
			return false;
		}
	private:
		vector<Node*> _tables;
		size_t _num = 0;
	};
}

Test.cpp

#define _CRT_SECURE_NO_WARNINGS 
#include"MyUnorderedMap.hpp"
#include"MyUnorderedSet.hpp"
#include"HashTable.hpp"

void zxa::test_unordered_set()
{
	unordered_set<int> s;
	s.Insert(1);
	s.Insert(3);
	s.Insert(15);
	s.Insert(22);
	s.Insert(40);
	unordered_set<int>::iterator it = s.begin();
	while (it != s.end())
	{
		cout << *it << endl;
		++it;
	}
	cout << endl;
}
void zxa::test_unordered_map()
{
	unordered_map<string, string> dict;
	dict.Insert(make_pair("sort", "排序"));
	dict.Insert(make_pair("left", "右边"));
	dict.Insert(make_pair("string", "字符串"));
	dict["left"] = "左边";
	dict["right"] = "右边";
	unordered_map<string, string>::iterator it = dict.begin();
	while (it != dict.end())
	{
		cout << it->first << ":" << it->second << endl;
		++it;
	}
	cout << endl;
}
int main()
{
	zxa::test_unordered_set();
	zxa::test_unordered_map();
	return 0;
}

运行结果

补充

typename关键字

当在模板定义中引用一个依赖于模板参数的类型成员时，必须使用typename来明确告诉编译器该名称是一个类型。这是因为模板参数在编译时是未知的，编译器无法确定一个特定的成员名称是否是一个类型还是一个静态成员变量、枚举值或其他非类型成员。

优化

当表的大小是一个素数时，发生冲突的概率会降低一些。

//计算元素在表中的位置
size_t index = HashFunc(koft(data)) % _tables.size();

//新表的空间
size_t newsize = _tables.size() == 0 ? 10 : _tables.size() * 2;

除留余数法，最好模一个素数，如何每次快速取一个类似两倍关系的素数？

使用以下方法（STL库中使用的也是这个）：

size_t GetNextPrime(size_t num)
{
	const int PRIMECOUNT = 28;
	static const size_t primeList[PRIMECOUNT] =
	{
		53ul, 97ul, 193ul, 3ul, 769ul,1543ul, 3079ul, 6151ul, 122ul, 24593ul,
		49157ul, 98317ul, 196613ul, 393241ul, 7833ul,1572869ul, 3145739ul, 6291469ul, 12582917ul,
		25165843ul,50331653ul, 100663319ul, 201326611ul, 4026531ul,8053057ul,
		1610612741ul, 3221225473ul, 4294967291ul
	};
	size_t i = 0;
	for (; i < PRIMECOUNT; ++i)
	{
		if (primeList[i] > num)
			return primeList[i];
	}
	return primeList[i];
}

本文主要实现了HashTable迭代器的operator++，其他细节和红黑树模拟map/se方式类似，这里直接实现了。

因篇幅问题不能全部显示，请点此查看更多更全内容

查看全文