18 files changed, 1998 insertions, 89 deletions
diff --git a/src/backend/access/hash/Makefile b/src/backend/access/hash/Makefile
index e2e7e914931..b154569b465 100644
--- a/src/backend/access/hash/Makefile
+++ b/src/backend/access/hash/Makefile
@@ -13,6 +13,6 @@ top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
 OBJS = hash.o hashfunc.o hashinsert.o hashovfl.o hashpage.o hashsearch.o \
-       hashsort.o hashutil.o hashvalidate.o
+       hashsort.o hashutil.o hashvalidate.o hash_xlog.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/hash/README b/src/backend/access/hash/README
index 703ae982071..00beb86ffae 100644
--- a/src/backend/access/hash/README
+++ b/src/backend/access/hash/README
@@ -213,7 +213,7 @@ this flag must be clear before splitting a bucket; thus, a bucket can't be
 split again until the previous split is totally complete.
 
 The moved-by-split flag on a tuple indicates that tuple is moved from old to
-new bucket.  Concurrent scans can skip such tuples till the split operation
+new bucket.  Concurrent scans will skip such tuples until the split operation
 is finished.  Once the tuple is marked as moved-by-split, it will remain so
 forever but that does no harm.  We have intentionally not cleared it as that
 can generate an additional I/O which is not necessary.
@@ -287,13 +287,17 @@ The insertion algorithm is rather similar:
 	if current page is full, release lock but not pin, read/exclusive-lock
      next page; repeat as needed
 	>> see below if no space in any page of bucket
+	take buffer content lock in exclusive mode on metapage
 	insert tuple at appropriate place in page
-	mark current page dirty and release buffer content lock and pin
-	if the current page is not a bucket page, release the pin on bucket page
-	pin meta page and take buffer content lock in exclusive mode
+	mark current page dirty
 	increment tuple count, decide if split needed
-	mark meta page dirty and release buffer content lock and pin
-	done if no split needed, else enter Split algorithm below
+	mark meta page dirty
+	write WAL for insertion of tuple
+	release the buffer content lock on metapage
+	release buffer content lock on current page
+	if current page is not a bucket page, release the pin on bucket page
+	if split is needed, enter Split algorithm below
+	release the pin on metapage
 
 To speed searches, the index entries within any individual index page are
 kept sorted by hash code; the insertion code must take care to insert new
@@ -328,12 +332,17 @@ existing bucket in two, thereby lowering the fill ratio:
        try to finish the split and the cleanup work
        if that succeeds, start over; if it fails, give up
 	mark the old and new buckets indicating split is in progress
+	mark both old and new buckets as dirty
+	write WAL for allocation of new page for split
 	copy the tuples that belongs to new bucket from old bucket, marking
      them as moved-by-split
+	write WAL record for moving tuples to new page once the new page is full
+	or all the pages of old bucket are finished
 	release lock but not pin for primary bucket page of old bucket,
 	 read/shared-lock next page; repeat as needed
 	clear the bucket-being-split and bucket-being-populated flags
 	mark the old bucket indicating split-cleanup
+	write WAL for changing the flags on both old and new buckets
 
 The split operation's attempt to acquire cleanup-lock on the old bucket number
 could fail if another process holds any lock or pin on it.  We do not want to
@@ -369,6 +378,8 @@ The fourth operation is garbage collection (bulk deletion):
 		acquire cleanup lock on primary bucket page
 		loop:
 			scan and remove tuples
+			mark the target page dirty
+			write WAL for deleting tuples from target page
 			if this is the last bucket page, break out of loop
 			pin and x-lock next page
 			release prior lock and pin (except keep pin on primary bucket page)
@@ -383,7 +394,8 @@ The fourth operation is garbage collection (bulk deletion):
 	check if number of buckets changed
 	if so, release content lock and pin and return to for-each-bucket loop
 	else update metapage tuple count
-	mark meta page dirty and release buffer content lock and pin
+	mark meta page dirty and write WAL for update of metapage
+	release buffer content lock and pin
 
 Note that this is designed to allow concurrent splits and scans.  If a split
 occurs, tuples relocated into the new bucket will be visited twice by the
@@ -425,18 +437,16 @@ Obtaining an overflow page:
 	search for a free page (zero bit in bitmap)
 	if found:
 		set bit in bitmap
-		mark bitmap page dirty and release content lock
+		mark bitmap page dirty
 		take metapage buffer content lock in exclusive mode
 		if first-free-bit value did not change,
 			update it and mark meta page dirty
-		release meta page buffer content lock
-		return page number
 	else (not found):
 	release bitmap page buffer content lock
 	loop back to try next bitmap page, if any
 -- here when we have checked all bitmap pages; we hold meta excl. lock
 	extend index to add another overflow page; update meta information
-	mark meta page dirty and release buffer content lock
+	mark meta page dirty
 	return page number
 
 It is slightly annoying to release and reacquire the metapage lock
@@ -456,12 +466,17 @@ like this:
 
 	-- having determined that no space is free in the target bucket:
 	remember last page of bucket, drop write lock on it
-	call free-page-acquire routine
 	re-write-lock last page of bucket
 	if it is not last anymore, step to the last page
-	update (former) last page to point to new page
+	execute free-page-acquire (obtaining an overflow page) mechanism
+      described above
+	update (former) last page to point to the new page and mark buffer dirty
 	write-lock and initialize new page, with back link to former last page
-	write and release former last page
+	write WAL for addition of overflow page
+	release the locks on meta page and bitmap page acquired in
+      free-page-acquire algorithm
+	release the lock on former last page
+	release the lock on new overflow page
 	insert tuple into new page
 	-- etc.
 
@@ -488,12 +503,14 @@ accessors of pages in the bucket.  The algorithm is:
 	determine which bitmap page contains the free space bit for page
 	release meta page buffer content lock
 	pin bitmap page and take buffer content lock in exclusive mode
-	update bitmap bit
-	mark bitmap page dirty and release buffer content lock and pin
-	if page number is less than what we saw as first-free-bit in meta:
 	retake meta page buffer content lock in exclusive mode
+	move (insert) tuples that belong to the overflow page being freed
+	update bitmap bit
+	mark bitmap page dirty
 	if page number is still less than first-free-bit,
 		update first-free-bit field and mark meta page dirty
+	write WAL for delinking overflow page operation
+	release buffer content lock and pin
 	release meta page buffer content lock and pin
 
 We have to do it this way because we must clear the bitmap bit before
@@ -504,8 +521,91 @@ page acquirer will scan more bitmap bits than he needs to.  What must be
 avoided is having first-free-bit greater than the actual first free bit,
 because then that free page would never be found by searchers.
 
-All the freespace operations should be called while holding no buffer
-locks.  Since they need no lmgr locks, deadlock is not possible.
+The reason of moving tuples from overflow page while delinking the later is
+to make that as an atomic operation.  Not doing so could lead to spurious reads
+on standby.  Basically, the user might see the same tuple twice.
+
+
+WAL Considerations
+------------------
+
+The hash index operations like create index, insert, delete, bucket split,
+allocate overflow page, and squeeze in themselves don't guarantee hash index
+consistency after a crash.  To provide robustness, we write WAL for each of
+these operations.
+
+CREATE INDEX writes multiple WAL records.  First, we write a record to cover
+the initializatoin of the metapage, followed by one for each new bucket
+created, followed by one for the initial bitmap page.  It's not important for
+index creation to appear atomic, because the index isn't yet visible to any
+other transaction, and the creating transaction will roll back in the event of
+a crash.  It would be difficult to cover the whole operation with a single
+write-ahead log record anyway, because we can log only a fixed number of
+pages, as given by XLR_MAX_BLOCK_ID (32), with current XLog machinery.
+
+Ordinary item insertions (that don't force a page split or need a new overflow
+page) are single WAL entries.  They touch a single bucket page and the
+metapage.  The metapage is updated during replay as it is updated during
+original operation.
+
+If an insertion causes the addition of an overflow page, there will be one
+WAL entry for the new overflow page and second entry for insert itself.
+
+If an insertion causes a bucket split, there will be one WAL entry for insert
+itself, followed by a WAL entry for allocating a new bucket, followed by a WAL
+entry for each overflow bucket page in the new bucket to which the tuples are
+moved from old bucket, followed by a WAL entry to indicate that split is
+complete for both old and new buckets.  A split operation which requires
+overflow pages to complete the operation will need to write a WAL record for
+each new allocation of an overflow page.
+
+As splitting involves multiple atomic actions, it's possible that the system
+crashes between moving tuples from bucket pages of the old bucket to new
+bucket.  In such a case, after recovery, the old and new buckets will be
+marked with bucket-being-split and bucket-being-populated flags respectively
+which indicates that split is in progress for those buckets.  The reader
+algorithm works correctly, as it will scan both the old and new buckets when
+the split is in progress as explained in the reader algorithm section above.
+
+We finish the split at next insert or split operation on the old bucket as
+explained in insert and split algorithm above.  It could be done during
+searches, too, but it seems best not to put any extra updates in what would
+otherwise be a read-only operation (updating is not possible in hot standby
+mode anyway).  It would seem natural to complete the split in VACUUM, but since
+splitting a bucket might require allocating a new page, it might fail if you
+run out of disk space.  That would be bad during VACUUM - the reason for
+running VACUUM in the first place might be that you run out of disk space,
+and now VACUUM won't finish because you're out of disk space.  In contrast,
+an insertion can require enlarging the physical file anyway.
+
+Deletion of tuples from a bucket is performed for two reasons: to remove dead
+tuples, and to remove tuples that were moved by a bucket split.  A WAL entry
+is made for each bucket page from which tuples are removed, and then another
+WAL entry is made when we clear the needs-split-cleanup flag.  If dead tuples
+are removed, a separate WAL entry is made to update the metapage.
+
+As deletion involves multiple atomic operations, it is quite possible that
+system crashes after (a) removing tuples from some of the bucket pages, (b)
+before clearing the garbage flag, or (c) before updating the metapage.  If the
+system crashes before completing (b), it will again try to clean the bucket
+during next vacuum or insert after recovery which can have some performance
+impact, but it will work fine. If the system crashes before completing (c),
+after recovery there could be some additional splits until the next vacuum
+updates the metapage, but the other operations like insert, delete and scan
+will work correctly.  We can fix this problem by actually updating the
+metapage based on delete operation during replay, but it's not clear whether
+it's worth the complication.
+
+A squeeze operation moves tuples from one of the buckets later in the chain to
+one of the bucket earlier in chain and writes WAL record when either the
+bucket to which it is writing tuples is filled or bucket from which it
+is removing the tuples becomes empty.
+
+As a squeeze operation involves writing multiple atomic operations, it is
+quite possible that the system crashes before completing the operation on
+entire bucket.  After recovery, the operations will work correctly, but
+the index will remain bloated and this can impact performance of read and
+insert operations until the next vacuum squeeze the bucket completely.
 
 
 Other Notes
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index 1f8a7f61c72..641676964bb 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -28,6 +28,7 @@
 #include "utils/builtins.h"
 #include "utils/index_selfuncs.h"
 #include "utils/rel.h"
+#include "miscadmin.h"
 
 
 /* Working state for hashbuild and its callback */
@@ -303,6 +304,11 @@ hashgettuple(IndexScanDesc scan, ScanDirection dir)
 		buf = so->hashso_curbuf;
 		Assert(BufferIsValid(buf));
 		page = BufferGetPage(buf);
+
+		/*
+		 * We don't need test for old snapshot here as the current buffer is
+		 * pinned, so vacuum can't clean the page.
+		 */
 		maxoffnum = PageGetMaxOffsetNumber(page);
 		for (offnum = ItemPointerGetOffsetNumber(current);
 			 offnum <= maxoffnum;
@@ -623,6 +629,7 @@ loop_top:
 	}
 
 	/* Okay, we're really done.  Update tuple count in metapage. */
+	START_CRIT_SECTION();
 
 	if (orig_maxbucket == metap->hashm_maxbucket &&
 		orig_ntuples == metap->hashm_ntuples)
@@ -649,6 +656,26 @@ loop_top:
 	}
 
 	MarkBufferDirty(metabuf);
+
+	/* XLOG stuff */
+	if (RelationNeedsWAL(rel))
+	{
+		xl_hash_update_meta_page xlrec;
+		XLogRecPtr	recptr;
+
+		xlrec.ntuples = metap->hashm_ntuples;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &xlrec, sizeof(SizeOfHashUpdateMetaPage));
+
+		XLogRegisterBuffer(0, metabuf, REGBUF_STANDARD);
+
+		recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_UPDATE_META_PAGE);
+		PageSetLSN(BufferGetPage(metabuf), recptr);
+	}
+
+	END_CRIT_SECTION();
+
 	_hash_relbuf(rel, metabuf);
 
 	/* return statistics */
@@ -816,9 +843,40 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
 		 */
 		if (ndeletable > 0)
 		{
+			/* No ereport(ERROR) until changes are logged */
+			START_CRIT_SECTION();
+
 			PageIndexMultiDelete(page, deletable, ndeletable);
 			bucket_dirty = true;
 			MarkBufferDirty(buf);
+
+			/* XLOG stuff */
+			if (RelationNeedsWAL(rel))
+			{
+				xl_hash_delete xlrec;
+				XLogRecPtr	recptr;
+
+				xlrec.is_primary_bucket_page = (buf == bucket_buf) ? true : false;
+
+				XLogBeginInsert();
+				XLogRegisterData((char *) &xlrec, SizeOfHashDelete);
+
+				/*
+				 * bucket buffer needs to be registered to ensure that we can
+				 * acquire a cleanup lock on it during replay.
+				 */
+				if (!xlrec.is_primary_bucket_page)
+					XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD | REGBUF_NO_IMAGE);
+
+				XLogRegisterBuffer(1, buf, REGBUF_STANDARD);
+				XLogRegisterBufData(1, (char *) deletable,
+									ndeletable * sizeof(OffsetNumber));
+
+				recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_DELETE);
+				PageSetLSN(BufferGetPage(buf), recptr);
+			}
+
+			END_CRIT_SECTION();
 		}
 
 		/* bail out if there are no more pages to scan. */
@@ -866,8 +924,25 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
 		page = BufferGetPage(bucket_buf);
 		bucket_opaque = (HashPageOpaque) PageGetSpecialPointer(page);
 
+		/* No ereport(ERROR) until changes are logged */
+		START_CRIT_SECTION();
+
 		bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
 		MarkBufferDirty(bucket_buf);
+
+		/* XLOG stuff */
+		if (RelationNeedsWAL(rel))
+		{
+			XLogRecPtr	recptr;
+
+			XLogBeginInsert();
+			XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD);
+
+			recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_CLEANUP);
+			PageSetLSN(page, recptr);
+		}
+
+		END_CRIT_SECTION();
 	}
 
 	/*
@@ -881,9 +956,3 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
 	else
 		LockBuffer(bucket_buf, BUFFER_LOCK_UNLOCK);
 }
-
-void
-hash_redo(XLogReaderState *record)
-{
-	elog(PANIC, "hash_redo: unimplemented");
-}
diff --git a/src/backend/access/hash/hash_xlog.c b/src/backend/access/hash/hash_xlog.c
new file mode 100644
index 00000000000..d435215259b
--- /dev/null
+++ b/src/backend/access/hash/hash_xlog.c
@@ -0,0 +1,963 @@
+/*-------------------------------------------------------------------------
+ *
+ * hash_xlog.c
+ *	  WAL replay logic for hash index.
+ *
+ *
+ * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/backend/access/hash/hash_xlog.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/hash.h"
+#include "access/hash_xlog.h"
+#include "access/xlogutils.h"
+
+/*
+ * replay a hash index meta page
+ */
+static void
+hash_xlog_init_meta_page(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	Page		page;
+	Buffer		metabuf;
+
+	xl_hash_init_meta_page *xlrec = (xl_hash_init_meta_page *) XLogRecGetData(record);
+
+	/* create the index' metapage */
+	metabuf = XLogInitBufferForRedo(record, 0);
+	Assert(BufferIsValid(metabuf));
+	_hash_init_metabuffer(metabuf, xlrec->num_tuples, xlrec->procid,
+						  xlrec->ffactor, true);
+	page = (Page) BufferGetPage(metabuf);
+	PageSetLSN(page, lsn);
+	MarkBufferDirty(metabuf);
+	/* all done */
+	UnlockReleaseBuffer(metabuf);
+}
+
+/*
+ * replay a hash index bitmap page
+ */
+static void
+hash_xlog_init_bitmap_page(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	Buffer		bitmapbuf;
+	Buffer		metabuf;
+	Page		page;
+	HashMetaPage metap;
+	uint32		num_buckets;
+
+	xl_hash_init_bitmap_page *xlrec = (xl_hash_init_bitmap_page *) XLogRecGetData(record);
+
+	/*
+	 * Initialize bitmap page
+	 */
+	bitmapbuf = XLogInitBufferForRedo(record, 0);
+	_hash_initbitmapbuffer(bitmapbuf, xlrec->bmsize, true);
+	PageSetLSN(BufferGetPage(bitmapbuf), lsn);
+	MarkBufferDirty(bitmapbuf);
+	UnlockReleaseBuffer(bitmapbuf);
+
+	/* add the new bitmap page to the metapage's list of bitmaps */
+	if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
+	{
+		/*
+		 * Note: in normal operation, we'd update the metapage while still
+		 * holding lock on the bitmap page.  But during replay it's not
+		 * necessary to hold that lock, since nobody can see it yet; the
+		 * creating transaction hasn't yet committed.
+		 */
+		page = BufferGetPage(metabuf);
+		metap = HashPageGetMeta(page);
+
+		num_buckets = metap->hashm_maxbucket + 1;
+		metap->hashm_mapp[metap->hashm_nmaps] = num_buckets + 1;
+		metap->hashm_nmaps++;
+
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(metabuf);
+	}
+	if (BufferIsValid(metabuf))
+		UnlockReleaseBuffer(metabuf);
+}
+
+/*
+ * replay a hash index insert without split
+ */
+static void
+hash_xlog_insert(XLogReaderState *record)
+{
+	HashMetaPage metap;
+	XLogRecPtr	lsn = record->EndRecPtr;
+	xl_hash_insert *xlrec = (xl_hash_insert *) XLogRecGetData(record);
+	Buffer		buffer;
+	Page		page;
+
+	if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
+	{
+		Size		datalen;
+		char	   *datapos = XLogRecGetBlockData(record, 0, &datalen);
+
+		page = BufferGetPage(buffer);
+
+		if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
+						false, false) == InvalidOffsetNumber)
+			elog(PANIC, "hash_xlog_insert: failed to add item");
+
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(buffer);
+	}
+	if (BufferIsValid(buffer))
+		UnlockReleaseBuffer(buffer);
+
+	if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
+	{
+		/*
+		 * Note: in normal operation, we'd update the metapage while still
+		 * holding lock on the page we inserted into.  But during replay it's
+		 * not necessary to hold that lock, since no other index updates can
+		 * be happening concurrently.
+		 */
+		page = BufferGetPage(buffer);
+		metap = HashPageGetMeta(page);
+		metap->hashm_ntuples += 1;
+
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(buffer);
+	}
+	if (BufferIsValid(buffer))
+		UnlockReleaseBuffer(buffer);
+}
+
+/*
+ * replay addition of overflow page for hash index
+ */
+static void
+hash_xlog_add_ovfl_page(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	xl_hash_add_ovfl_page *xlrec = (xl_hash_add_ovfl_page *) XLogRecGetData(record);
+	Buffer		leftbuf;
+	Buffer		ovflbuf;
+	Buffer		metabuf;
+	BlockNumber leftblk;
+	BlockNumber rightblk;
+	BlockNumber newmapblk = InvalidBlockNumber;
+	Page		ovflpage;
+	HashPageOpaque ovflopaque;
+	uint32	   *num_bucket;
+	char	   *data;
+	Size datalen PG_USED_FOR_ASSERTS_ONLY;
+	bool		new_bmpage = false;
+
+	XLogRecGetBlockTag(record, 0, NULL, NULL, &rightblk);
+	XLogRecGetBlockTag(record, 1, NULL, NULL, &leftblk);
+
+	ovflbuf = XLogInitBufferForRedo(record, 0);
+	Assert(BufferIsValid(ovflbuf));
+
+	data = XLogRecGetBlockData(record, 0, &datalen);
+	num_bucket = (uint32 *) data;
+	Assert(datalen == sizeof(uint32));
+	_hash_initbuf(ovflbuf, InvalidBlockNumber, *num_bucket, LH_OVERFLOW_PAGE,
+				  true);
+	/* update backlink */
+	ovflpage = BufferGetPage(ovflbuf);
+	ovflopaque = (HashPageOpaque) PageGetSpecialPointer(ovflpage);
+	ovflopaque->hasho_prevblkno = leftblk;
+
+	PageSetLSN(ovflpage, lsn);
+	MarkBufferDirty(ovflbuf);
+
+	if (XLogReadBufferForRedo(record, 1, &leftbuf) == BLK_NEEDS_REDO)
+	{
+		Page		leftpage;
+		HashPageOpaque leftopaque;
+
+		leftpage = BufferGetPage(leftbuf);
+		leftopaque = (HashPageOpaque) PageGetSpecialPointer(leftpage);
+		leftopaque->hasho_nextblkno = rightblk;
+
+		PageSetLSN(leftpage, lsn);
+		MarkBufferDirty(leftbuf);
+	}
+
+	if (BufferIsValid(leftbuf))
+		UnlockReleaseBuffer(leftbuf);
+	UnlockReleaseBuffer(ovflbuf);
+
+	/*
+	 * Note: in normal operation, we'd update the bitmap and meta page while
+	 * still holding lock on the overflow pages.  But during replay it's not
+	 * necessary to hold those locks, since no other index updates can be
+	 * happening concurrently.
+	 */
+	if (XLogRecHasBlockRef(record, 2))
+	{
+		Buffer		mapbuffer;
+
+		if (XLogReadBufferForRedo(record, 2, &mapbuffer) == BLK_NEEDS_REDO)
+		{
+			Page		mappage = (Page) BufferGetPage(mapbuffer);
+			uint32	   *freep = NULL;
+			char	   *data;
+			uint32	   *bitmap_page_bit;
+
+			freep = HashPageGetBitmap(mappage);
+
+			data = XLogRecGetBlockData(record, 2, &datalen);
+			bitmap_page_bit = (uint32 *) data;
+
+			SETBIT(freep, *bitmap_page_bit);
+
+			PageSetLSN(mappage, lsn);
+			MarkBufferDirty(mapbuffer);
+		}
+		if (BufferIsValid(mapbuffer))
+			UnlockReleaseBuffer(mapbuffer);
+	}
+
+	if (XLogRecHasBlockRef(record, 3))
+	{
+		Buffer		newmapbuf;
+
+		newmapbuf = XLogInitBufferForRedo(record, 3);
+
+		_hash_initbitmapbuffer(newmapbuf, xlrec->bmsize, true);
+
+		new_bmpage = true;
+		newmapblk = BufferGetBlockNumber(newmapbuf);
+
+		MarkBufferDirty(newmapbuf);
+		PageSetLSN(BufferGetPage(newmapbuf), lsn);
+
+		UnlockReleaseBuffer(newmapbuf);
+	}
+
+	if (XLogReadBufferForRedo(record, 4, &metabuf) == BLK_NEEDS_REDO)
+	{
+		HashMetaPage metap;
+		Page		page;
+		uint32	   *firstfree_ovflpage;
+
+		data = XLogRecGetBlockData(record, 4, &datalen);
+		firstfree_ovflpage = (uint32 *) data;
+
+		page = BufferGetPage(metabuf);
+		metap = HashPageGetMeta(page);
+		metap->hashm_firstfree = *firstfree_ovflpage;
+
+		if (!xlrec->bmpage_found)
+		{
+			metap->hashm_spares[metap->hashm_ovflpoint]++;
+
+			if (new_bmpage)
+			{
+				Assert(BlockNumberIsValid(newmapblk));
+
+				metap->hashm_mapp[metap->hashm_nmaps] = newmapblk;
+				metap->hashm_nmaps++;
+				metap->hashm_spares[metap->hashm_ovflpoint]++;
+			}
+		}
+
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(metabuf);
+	}
+	if (BufferIsValid(metabuf))
+		UnlockReleaseBuffer(metabuf);
+}
+
+/*
+ * replay allocation of page for split operation
+ */
+static void
+hash_xlog_split_allocate_page(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	xl_hash_split_allocate_page *xlrec = (xl_hash_split_allocate_page *) XLogRecGetData(record);
+	Buffer		oldbuf;
+	Buffer		newbuf;
+	Buffer		metabuf;
+	Size datalen PG_USED_FOR_ASSERTS_ONLY;
+	char	   *data;
+	XLogRedoAction action;
+
+	/*
+	 * To be consistent with normal operation, here we take cleanup locks on
+	 * both the old and new buckets even though there can't be any concurrent
+	 * inserts.
+	 */
+
+	/* replay the record for old bucket */
+	action = XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &oldbuf);
+
+	/*
+	 * Note that we still update the page even if it was restored from a full
+	 * page image, because the special space is not included in the image.
+	 */
+	if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
+	{
+		Page		oldpage;
+		HashPageOpaque oldopaque;
+
+		oldpage = BufferGetPage(oldbuf);
+		oldopaque = (HashPageOpaque) PageGetSpecialPointer(oldpage);
+
+		oldopaque->hasho_flag = xlrec->old_bucket_flag;
+		oldopaque->hasho_prevblkno = xlrec->new_bucket;
+
+		PageSetLSN(oldpage, lsn);
+		MarkBufferDirty(oldbuf);
+	}
+
+	/* replay the record for new bucket */
+	newbuf = XLogInitBufferForRedo(record, 1);
+	_hash_initbuf(newbuf, xlrec->new_bucket, xlrec->new_bucket,
+				  xlrec->new_bucket_flag, true);
+	if (!IsBufferCleanupOK(newbuf))
+		elog(PANIC, "hash_xlog_split_allocate_page: failed to acquire cleanup lock");
+	MarkBufferDirty(newbuf);
+	PageSetLSN(BufferGetPage(newbuf), lsn);
+
+	/*
+	 * We can release the lock on old bucket early as well but doing here to
+	 * consistent with normal operation.
+	 */
+	if (BufferIsValid(oldbuf))
+		UnlockReleaseBuffer(oldbuf);
+	if (BufferIsValid(newbuf))
+		UnlockReleaseBuffer(newbuf);
+
+	/*
+	 * Note: in normal operation, we'd update the meta page while still
+	 * holding lock on the old and new bucket pages.  But during replay it's
+	 * not necessary to hold those locks, since no other bucket splits can be
+	 * happening concurrently.
+	 */
+
+	/* replay the record for metapage changes */
+	if (XLogReadBufferForRedo(record, 2, &metabuf) == BLK_NEEDS_REDO)
+	{
+		Page		page;
+		HashMetaPage metap;
+
+		page = BufferGetPage(metabuf);
+		metap = HashPageGetMeta(page);
+		metap->hashm_maxbucket = xlrec->new_bucket;
+
+		data = XLogRecGetBlockData(record, 2, &datalen);
+
+		if (xlrec->flags & XLH_SPLIT_META_UPDATE_MASKS)
+		{
+			uint32		lowmask;
+			uint32	   *highmask;
+
+			/* extract low and high masks. */
+			memcpy(&lowmask, data, sizeof(uint32));
+			highmask = (uint32 *) ((char *) data + sizeof(uint32));
+
+			/* update metapage */
+			metap->hashm_lowmask = lowmask;
+			metap->hashm_highmask = *highmask;
+
+			data += sizeof(uint32) * 2;
+		}
+
+		if (xlrec->flags & XLH_SPLIT_META_UPDATE_SPLITPOINT)
+		{
+			uint32		ovflpoint;
+			uint32	   *ovflpages;
+
+			/* extract information of overflow pages. */
+			memcpy(&ovflpoint, data, sizeof(uint32));
+			ovflpages = (uint32 *) ((char *) data + sizeof(uint32));
+
+			/* update metapage */
+			metap->hashm_spares[ovflpoint] = *ovflpages;
+			metap->hashm_ovflpoint = ovflpoint;
+		}
+
+		MarkBufferDirty(metabuf);
+		PageSetLSN(BufferGetPage(metabuf), lsn);
+	}
+
+	if (BufferIsValid(metabuf))
+		UnlockReleaseBuffer(metabuf);
+}
+
+/*
+ * replay of split operation
+ */
+static void
+hash_xlog_split_page(XLogReaderState *record)
+{
+	Buffer		buf;
+
+	if (XLogReadBufferForRedo(record, 0, &buf) != BLK_RESTORED)
+		elog(ERROR, "Hash split record did not contain a full-page image");
+
+	UnlockReleaseBuffer(buf);
+}
+
+/*
+ * replay completion of split operation
+ */
+static void
+hash_xlog_split_complete(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	xl_hash_split_complete *xlrec = (xl_hash_split_complete *) XLogRecGetData(record);
+	Buffer		oldbuf;
+	Buffer		newbuf;
+	XLogRedoAction action;
+
+	/* replay the record for old bucket */
+	action = XLogReadBufferForRedo(record, 0, &oldbuf);
+
+	/*
+	 * Note that we still update the page even if it was restored from a full
+	 * page image, because the bucket flag is not included in the image.
+	 */
+	if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
+	{
+		Page		oldpage;
+		HashPageOpaque oldopaque;
+
+		oldpage = BufferGetPage(oldbuf);
+		oldopaque = (HashPageOpaque) PageGetSpecialPointer(oldpage);
+
+		oldopaque->hasho_flag = xlrec->old_bucket_flag;
+
+		PageSetLSN(oldpage, lsn);
+		MarkBufferDirty(oldbuf);
+	}
+	if (BufferIsValid(oldbuf))
+		UnlockReleaseBuffer(oldbuf);
+
+	/* replay the record for new bucket */
+	action = XLogReadBufferForRedo(record, 1, &newbuf);
+
+	/*
+	 * Note that we still update the page even if it was restored from a full
+	 * page image, because the bucket flag is not included in the image.
+	 */
+	if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
+	{
+		Page		newpage;
+		HashPageOpaque nopaque;
+
+		newpage = BufferGetPage(newbuf);
+		nopaque = (HashPageOpaque) PageGetSpecialPointer(newpage);
+
+		nopaque->hasho_flag = xlrec->new_bucket_flag;
+
+		PageSetLSN(newpage, lsn);
+		MarkBufferDirty(newbuf);
+	}
+	if (BufferIsValid(newbuf))
+		UnlockReleaseBuffer(newbuf);
+}
+
+/*
+ * replay move of page contents for squeeze operation of hash index
+ */
+static void
+hash_xlog_move_page_contents(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	xl_hash_move_page_contents *xldata = (xl_hash_move_page_contents *) XLogRecGetData(record);
+	Buffer		bucketbuf = InvalidBuffer;
+	Buffer		writebuf = InvalidBuffer;
+	Buffer		deletebuf = InvalidBuffer;
+	XLogRedoAction action;
+
+	/*
+	 * Ensure we have a cleanup lock on primary bucket page before we start
+	 * with the actual replay operation.  This is to ensure that neither a
+	 * scan can start nor a scan can be already-in-progress during the replay
+	 * of this operation.  If we allow scans during this operation, then they
+	 * can miss some records or show the same record multiple times.
+	 */
+	if (xldata->is_prim_bucket_same_wrt)
+		action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
+	else
+	{
+		/*
+		 * we don't care for return value as the purpose of reading bucketbuf
+		 * is to ensure a cleanup lock on primary bucket page.
+		 */
+		(void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
+
+		action = XLogReadBufferForRedo(record, 1, &writebuf);
+	}
+
+	/* replay the record for adding entries in overflow buffer */
+	if (action == BLK_NEEDS_REDO)
+	{
+		Page		writepage;
+		char	   *begin;
+		char	   *data;
+		Size		datalen;
+		uint16		ninserted = 0;
+
+		data = begin = XLogRecGetBlockData(record, 1, &datalen);
+
+		writepage = (Page) BufferGetPage(writebuf);
+
+		if (xldata->ntups > 0)
+		{
+			OffsetNumber *towrite = (OffsetNumber *) data;
+
+			data += sizeof(OffsetNumber) * xldata->ntups;
+
+			while (data - begin < datalen)
+			{
+				IndexTuple	itup = (IndexTuple) data;
+				Size		itemsz;
+				OffsetNumber l;
+
+				itemsz = IndexTupleDSize(*itup);
+				itemsz = MAXALIGN(itemsz);
+
+				data += itemsz;
+
+				l = PageAddItem(writepage, (Item) itup, itemsz, towrite[ninserted], false, false);
+				if (l == InvalidOffsetNumber)
+					elog(ERROR, "hash_xlog_move_page_contents: failed to add item to hash index page, size %d bytes",
+						 (int) itemsz);
+
+				ninserted++;
+			}
+		}
+
+		/*
+		 * number of tuples inserted must be same as requested in REDO record.
+		 */
+		Assert(ninserted == xldata->ntups);
+
+		PageSetLSN(writepage, lsn);
+		MarkBufferDirty(writebuf);
+	}
+
+	/* replay the record for deleting entries from overflow buffer */
+	if (XLogReadBufferForRedo(record, 2, &deletebuf) == BLK_NEEDS_REDO)
+	{
+		Page		page;
+		char	   *ptr;
+		Size		len;
+
+		ptr = XLogRecGetBlockData(record, 2, &len);
+
+		page = (Page) BufferGetPage(deletebuf);
+
+		if (len > 0)
+		{
+			OffsetNumber *unused;
+			OffsetNumber *unend;
+
+			unused = (OffsetNumber *) ptr;
+			unend = (OffsetNumber *) ((char *) ptr + len);
+
+			if ((unend - unused) > 0)
+				PageIndexMultiDelete(page, unused, unend - unused);
+		}
+
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(deletebuf);
+	}
+
+	/*
+	 * Replay is complete, now we can release the buffers. We release locks at
+	 * end of replay operation to ensure that we hold lock on primary bucket
+	 * page till end of operation.  We can optimize by releasing the lock on
+	 * write buffer as soon as the operation for same is complete, if it is
+	 * not same as primary bucket page, but that doesn't seem to be worth
+	 * complicating the code.
+	 */
+	if (BufferIsValid(deletebuf))
+		UnlockReleaseBuffer(deletebuf);
+
+	if (BufferIsValid(writebuf))
+		UnlockReleaseBuffer(writebuf);
+
+	if (BufferIsValid(bucketbuf))
+		UnlockReleaseBuffer(bucketbuf);
+}
+
+/*
+ * replay squeeze page operation of hash index
+ */
+static void
+hash_xlog_squeeze_page(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	xl_hash_squeeze_page *xldata = (xl_hash_squeeze_page *) XLogRecGetData(record);
+	Buffer		bucketbuf = InvalidBuffer;
+	Buffer		writebuf;
+	Buffer		ovflbuf;
+	Buffer		prevbuf = InvalidBuffer;
+	Buffer		mapbuf;
+	XLogRedoAction action;
+
+	/*
+	 * Ensure we have a cleanup lock on primary bucket page before we start
+	 * with the actual replay operation.  This is to ensure that neither a
+	 * scan can start nor a scan can be already-in-progress during the replay
+	 * of this operation.  If we allow scans during this operation, then they
+	 * can miss some records or show the same record multiple times.
+	 */
+	if (xldata->is_prim_bucket_same_wrt)
+		action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &writebuf);
+	else
+	{
+		/*
+		 * we don't care for return value as the purpose of reading bucketbuf
+		 * is to ensure a cleanup lock on primary bucket page.
+		 */
+		(void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
+
+		action = XLogReadBufferForRedo(record, 1, &writebuf);
+	}
+
+	/* replay the record for adding entries in overflow buffer */
+	if (action == BLK_NEEDS_REDO)
+	{
+		Page		writepage;
+		char	   *begin;
+		char	   *data;
+		Size		datalen;
+		uint16		ninserted = 0;
+
+		data = begin = XLogRecGetBlockData(record, 1, &datalen);
+
+		writepage = (Page) BufferGetPage(writebuf);
+
+		if (xldata->ntups > 0)
+		{
+			OffsetNumber *towrite = (OffsetNumber *) data;
+
+			data += sizeof(OffsetNumber) * xldata->ntups;
+
+			while (data - begin < datalen)
+			{
+				IndexTuple	itup = (IndexTuple) data;
+				Size		itemsz;
+				OffsetNumber l;
+
+				itemsz = IndexTupleDSize(*itup);
+				itemsz = MAXALIGN(itemsz);
+
+				data += itemsz;
+
+				l = PageAddItem(writepage, (Item) itup, itemsz, towrite[ninserted], false, false);
+				if (l == InvalidOffsetNumber)
+					elog(ERROR, "hash_xlog_squeeze_page: failed to add item to hash index page, size %d bytes",
+						 (int) itemsz);
+
+				ninserted++;
+			}
+		}
+
+		/*
+		 * number of tuples inserted must be same as requested in REDO record.
+		 */
+		Assert(ninserted == xldata->ntups);
+
+		/*
+		 * if the page on which are adding tuples is a page previous to freed
+		 * overflow page, then update its nextblno.
+		 */
+		if (xldata->is_prev_bucket_same_wrt)
+		{
+			HashPageOpaque writeopaque = (HashPageOpaque) PageGetSpecialPointer(writepage);
+
+			writeopaque->hasho_nextblkno = xldata->nextblkno;
+		}
+
+		PageSetLSN(writepage, lsn);
+		MarkBufferDirty(writebuf);
+	}
+
+	/* replay the record for initializing overflow buffer */
+	if (XLogReadBufferForRedo(record, 2, &ovflbuf) == BLK_NEEDS_REDO)
+	{
+		Page		ovflpage;
+
+		ovflpage = BufferGetPage(ovflbuf);
+
+		_hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
+
+		PageSetLSN(ovflpage, lsn);
+		MarkBufferDirty(ovflbuf);
+	}
+	if (BufferIsValid(ovflbuf))
+		UnlockReleaseBuffer(ovflbuf);
+
+	/* replay the record for page previous to the freed overflow page */
+	if (!xldata->is_prev_bucket_same_wrt &&
+		XLogReadBufferForRedo(record, 3, &prevbuf) == BLK_NEEDS_REDO)
+	{
+		Page		prevpage = BufferGetPage(prevbuf);
+		HashPageOpaque prevopaque = (HashPageOpaque) PageGetSpecialPointer(prevpage);
+
+		prevopaque->hasho_nextblkno = xldata->nextblkno;
+
+		PageSetLSN(prevpage, lsn);
+		MarkBufferDirty(prevbuf);
+	}
+	if (BufferIsValid(prevbuf))
+		UnlockReleaseBuffer(prevbuf);
+
+	/* replay the record for page next to the freed overflow page */
+	if (XLogRecHasBlockRef(record, 4))
+	{
+		Buffer		nextbuf;
+
+		if (XLogReadBufferForRedo(record, 4, &nextbuf) == BLK_NEEDS_REDO)
+		{
+			Page		nextpage = BufferGetPage(nextbuf);
+			HashPageOpaque nextopaque = (HashPageOpaque) PageGetSpecialPointer(nextpage);
+
+			nextopaque->hasho_prevblkno = xldata->prevblkno;
+
+			PageSetLSN(nextpage, lsn);
+			MarkBufferDirty(nextbuf);
+		}
+		if (BufferIsValid(nextbuf))
+			UnlockReleaseBuffer(nextbuf);
+	}
+
+	if (BufferIsValid(writebuf))
+		UnlockReleaseBuffer(writebuf);
+
+	if (BufferIsValid(bucketbuf))
+		UnlockReleaseBuffer(bucketbuf);
+
+	/*
+	 * Note: in normal operation, we'd update the bitmap and meta page while
+	 * still holding lock on the primary bucket page and overflow pages.  But
+	 * during replay it's not necessary to hold those locks, since no other
+	 * index updates can be happening concurrently.
+	 */
+	/* replay the record for bitmap page */
+	if (XLogReadBufferForRedo(record, 5, &mapbuf) == BLK_NEEDS_REDO)
+	{
+		Page		mappage = (Page) BufferGetPage(mapbuf);
+		uint32	   *freep = NULL;
+		char	   *data;
+		uint32	   *bitmap_page_bit;
+		Size		datalen;
+
+		freep = HashPageGetBitmap(mappage);
+
+		data = XLogRecGetBlockData(record, 5, &datalen);
+		bitmap_page_bit = (uint32 *) data;
+
+		CLRBIT(freep, *bitmap_page_bit);
+
+		PageSetLSN(mappage, lsn);
+		MarkBufferDirty(mapbuf);
+	}
+	if (BufferIsValid(mapbuf))
+		UnlockReleaseBuffer(mapbuf);
+
+	/* replay the record for meta page */
+	if (XLogRecHasBlockRef(record, 6))
+	{
+		Buffer		metabuf;
+
+		if (XLogReadBufferForRedo(record, 6, &metabuf) == BLK_NEEDS_REDO)
+		{
+			HashMetaPage metap;
+			Page		page;
+			char	   *data;
+			uint32	   *firstfree_ovflpage;
+			Size		datalen;
+
+			data = XLogRecGetBlockData(record, 6, &datalen);
+			firstfree_ovflpage = (uint32 *) data;
+
+			page = BufferGetPage(metabuf);
+			metap = HashPageGetMeta(page);
+			metap->hashm_firstfree = *firstfree_ovflpage;
+
+			PageSetLSN(page, lsn);
+			MarkBufferDirty(metabuf);
+		}
+		if (BufferIsValid(metabuf))
+			UnlockReleaseBuffer(metabuf);
+	}
+}
+
+/*
+ * replay delete operation of hash index
+ */
+static void
+hash_xlog_delete(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	xl_hash_delete *xldata = (xl_hash_delete *) XLogRecGetData(record);
+	Buffer		bucketbuf = InvalidBuffer;
+	Buffer		deletebuf;
+	Page		page;
+	XLogRedoAction action;
+
+	/*
+	 * Ensure we have a cleanup lock on primary bucket page before we start
+	 * with the actual replay operation.  This is to ensure that neither a
+	 * scan can start nor a scan can be already-in-progress during the replay
+	 * of this operation.  If we allow scans during this operation, then they
+	 * can miss some records or show the same record multiple times.
+	 */
+	if (xldata->is_primary_bucket_page)
+		action = XLogReadBufferForRedoExtended(record, 1, RBM_NORMAL, true, &deletebuf);
+	else
+	{
+		/*
+		 * we don't care for return value as the purpose of reading bucketbuf
+		 * is to ensure a cleanup lock on primary bucket page.
+		 */
+		(void) XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &bucketbuf);
+
+		action = XLogReadBufferForRedo(record, 1, &deletebuf);
+	}
+
+	/* replay the record for deleting entries in bucket page */
+	if (action == BLK_NEEDS_REDO)
+	{
+		char	   *ptr;
+		Size		len;
+
+		ptr = XLogRecGetBlockData(record, 1, &len);
+
+		page = (Page) BufferGetPage(deletebuf);
+
+		if (len > 0)
+		{
+			OffsetNumber *unused;
+			OffsetNumber *unend;
+
+			unused = (OffsetNumber *) ptr;
+			unend = (OffsetNumber *) ((char *) ptr + len);
+
+			if ((unend - unused) > 0)
+				PageIndexMultiDelete(page, unused, unend - unused);
+		}
+
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(deletebuf);
+	}
+	if (BufferIsValid(deletebuf))
+		UnlockReleaseBuffer(deletebuf);
+
+	if (BufferIsValid(bucketbuf))
+		UnlockReleaseBuffer(bucketbuf);
+}
+
+/*
+ * replay split cleanup flag operation for primary bucket page.
+ */
+static void
+hash_xlog_split_cleanup(XLogReaderState *record)
+{
+	XLogRecPtr	lsn = record->EndRecPtr;
+	Buffer		buffer;
+	Page		page;
+
+	if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
+	{
+		HashPageOpaque bucket_opaque;
+
+		page = (Page) BufferGetPage(buffer);
+
+		bucket_opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+		bucket_opaque->hasho_flag &= ~LH_BUCKET_NEEDS_SPLIT_CLEANUP;
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(buffer);
+	}
+	if (BufferIsValid(buffer))
+		UnlockReleaseBuffer(buffer);
+}
+
+/*
+ * replay for update meta page
+ */
+static void
+hash_xlog_update_meta_page(XLogReaderState *record)
+{
+	HashMetaPage metap;
+	XLogRecPtr	lsn = record->EndRecPtr;
+	xl_hash_update_meta_page *xldata = (xl_hash_update_meta_page *) XLogRecGetData(record);
+	Buffer		metabuf;
+	Page		page;
+
+	if (XLogReadBufferForRedo(record, 0, &metabuf) == BLK_NEEDS_REDO)
+	{
+		page = BufferGetPage(metabuf);
+		metap = HashPageGetMeta(page);
+
+		metap->hashm_ntuples = xldata->ntuples;
+
+		PageSetLSN(page, lsn);
+		MarkBufferDirty(metabuf);
+	}
+	if (BufferIsValid(metabuf))
+		UnlockReleaseBuffer(metabuf);
+}
+
+void
+hash_redo(XLogReaderState *record)
+{
+	uint8		info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+
+	switch (info)
+	{
+		case XLOG_HASH_INIT_META_PAGE:
+			hash_xlog_init_meta_page(record);
+			break;
+		case XLOG_HASH_INIT_BITMAP_PAGE:
+			hash_xlog_init_bitmap_page(record);
+			break;
+		case XLOG_HASH_INSERT:
+			hash_xlog_insert(record);
+			break;
+		case XLOG_HASH_ADD_OVFL_PAGE:
+			hash_xlog_add_ovfl_page(record);
+			break;
+		case XLOG_HASH_SPLIT_ALLOCATE_PAGE:
+			hash_xlog_split_allocate_page(record);
+			break;
+		case XLOG_HASH_SPLIT_PAGE:
+			hash_xlog_split_page(record);
+			break;
+		case XLOG_HASH_SPLIT_COMPLETE:
+			hash_xlog_split_complete(record);
+			break;
+		case XLOG_HASH_MOVE_PAGE_CONTENTS:
+			hash_xlog_move_page_contents(record);
+			break;
+		case XLOG_HASH_SQUEEZE_PAGE:
+			hash_xlog_squeeze_page(record);
+			break;
+		case XLOG_HASH_DELETE:
+			hash_xlog_delete(record);
+			break;
+		case XLOG_HASH_SPLIT_CLEANUP:
+			hash_xlog_split_cleanup(record);
+			break;
+		case XLOG_HASH_UPDATE_META_PAGE:
+			hash_xlog_update_meta_page(record);
+			break;
+		default:
+			elog(PANIC, "hash_redo: unknown op code %u", info);
+	}
+}
diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c
index 354e7339cf4..241728fe6b1 100644
--- a/src/backend/access/hash/hashinsert.c
+++ b/src/backend/access/hash/hashinsert.c
@@ -16,6 +16,8 @@
 #include "postgres.h"
 
 #include "access/hash.h"
+#include "access/hash_xlog.h"
+#include "miscadmin.h"
 #include "utils/rel.h"
 
 
@@ -40,6 +42,7 @@ _hash_doinsert(Relation rel, IndexTuple itup)
 	bool		do_expand;
 	uint32		hashkey;
 	Bucket		bucket;
+	OffsetNumber itup_off;
 
 	/*
 	 * Get the hash key for the item (it's stored in the index tuple itself).
@@ -158,25 +161,20 @@ restart_insert:
 		Assert(pageopaque->hasho_bucket == bucket);
 	}
 
-	/* found page with enough space, so add the item here */
-	(void) _hash_pgaddtup(rel, buf, itemsz, itup);
-
-	/*
-	 * dirty and release the modified page.  if the page we modified was an
-	 * overflow page, we also need to separately drop the pin we retained on
-	 * the primary bucket page.
-	 */
-	MarkBufferDirty(buf);
-	_hash_relbuf(rel, buf);
-	if (buf != bucket_buf)
-		_hash_dropbuf(rel, bucket_buf);
-
 	/*
 	 * Write-lock the metapage so we can increment the tuple count. After
 	 * incrementing it, check to see if it's time for a split.
 	 */
 	LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
 
+	/* Do the update.  No ereport(ERROR) until changes are logged */
+	START_CRIT_SECTION();
+
+	/* found page with enough space, so add the item here */
+	itup_off = _hash_pgaddtup(rel, buf, itemsz, itup);
+	MarkBufferDirty(buf);
+
+	/* metapage operations */
 	metap = HashPageGetMeta(metapage);
 	metap->hashm_ntuples += 1;
 
@@ -184,10 +182,43 @@ restart_insert:
 	do_expand = metap->hashm_ntuples >
 		(double) metap->hashm_ffactor * (metap->hashm_maxbucket + 1);
 
-	/* Write out the metapage and drop lock, but keep pin */
 	MarkBufferDirty(metabuf);
+
+	/* XLOG stuff */
+	if (RelationNeedsWAL(rel))
+	{
+		xl_hash_insert xlrec;
+		XLogRecPtr	recptr;
+
+		xlrec.offnum = itup_off;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &xlrec, SizeOfHashInsert);
+
+		XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
+
+		XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
+		XLogRegisterBufData(0, (char *) itup, IndexTupleDSize(*itup));
+
+		recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INSERT);
+
+		PageSetLSN(BufferGetPage(buf), recptr);
+		PageSetLSN(BufferGetPage(metabuf), recptr);
+	}
+
+	END_CRIT_SECTION();
+
+	/* drop lock on metapage, but keep pin */
 	LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
 
+	/*
+	 * Release the modified page and ensure to release the pin on primary
+	 * page.
+	 */
+	_hash_relbuf(rel, buf);
+	if (buf != bucket_buf)
+		_hash_dropbuf(rel, bucket_buf);
+
 	/* Attempt to split if a split is needed */
 	if (do_expand)
 		_hash_expandtable(rel, metabuf);
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c
index 1087480f7eb..a3cae21c605 100644
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@@ -18,6 +18,8 @@
 #include "postgres.h"
 
 #include "access/hash.h"
+#include "access/hash_xlog.h"
+#include "miscadmin.h"
 #include "utils/rel.h"
 
 
@@ -136,6 +138,13 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
 	 * page is released, then finally acquire the lock on new overflow buffer.
 	 * We need this locking order to avoid deadlock with backends that are
 	 * doing inserts.
+	 *
+	 * Note: We could have avoided locking many buffers here if we made two
+	 * WAL records for acquiring an overflow page (one to allocate an overflow
+	 * page and another to add it to overflow bucket chain).  However, doing
+	 * so can leak an overflow page, if the system crashes after allocation.
+	 * Needless to say, it is better to have a single record from a
+	 * performance point of view as well.
 	 */
 	LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
 
@@ -303,8 +312,12 @@ _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf, bool retain_pin)
 found:
 
 	/*
-	 * Do the update.
+	 * Do the update.  No ereport(ERROR) until changes are logged. We want to
+	 * log the changes for bitmap page and overflow page together to avoid
+	 * loss of pages in case the new page is added.
 	 */
+	START_CRIT_SECTION();
+
 	if (page_found)
 	{
 		Assert(BufferIsValid(mapbuf));
@@ -362,6 +375,51 @@ found:
 
 	MarkBufferDirty(buf);
 
+	/* XLOG stuff */
+	if (RelationNeedsWAL(rel))
+	{
+		XLogRecPtr	recptr;
+		xl_hash_add_ovfl_page xlrec;
+
+		xlrec.bmpage_found = page_found;
+		xlrec.bmsize = metap->hashm_bmsize;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &xlrec, SizeOfHashAddOvflPage);
+
+		XLogRegisterBuffer(0, ovflbuf, REGBUF_WILL_INIT);
+		XLogRegisterBufData(0, (char *) &pageopaque->hasho_bucket, sizeof(Bucket));
+
+		XLogRegisterBuffer(1, buf, REGBUF_STANDARD);
+
+		if (BufferIsValid(mapbuf))
+		{
+			XLogRegisterBuffer(2, mapbuf, REGBUF_STANDARD);
+			XLogRegisterBufData(2, (char *) &bitmap_page_bit, sizeof(uint32));
+		}
+
+		if (BufferIsValid(newmapbuf))
+			XLogRegisterBuffer(3, newmapbuf, REGBUF_WILL_INIT);
+
+		XLogRegisterBuffer(4, metabuf, REGBUF_STANDARD);
+		XLogRegisterBufData(4, (char *) &metap->hashm_firstfree, sizeof(uint32));
+
+		recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_ADD_OVFL_PAGE);
+
+		PageSetLSN(BufferGetPage(ovflbuf), recptr);
+		PageSetLSN(BufferGetPage(buf), recptr);
+
+		if (BufferIsValid(mapbuf))
+			PageSetLSN(BufferGetPage(mapbuf), recptr);
+
+		if (BufferIsValid(newmapbuf))
+			PageSetLSN(BufferGetPage(newmapbuf), recptr);
+
+		PageSetLSN(BufferGetPage(metabuf), recptr);
+	}
+
+	END_CRIT_SECTION();
+
 	if (retain_pin)
 		LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 	else
@@ -408,7 +466,11 @@ _hash_firstfreebit(uint32 map)
  *	Remove this overflow page from its bucket's chain, and mark the page as
  *	free.  On entry, ovflbuf is write-locked; it is released before exiting.
  *
- *	Add the tuples (itups) to wbuf.
+ *	Add the tuples (itups) to wbuf in this function.  We could do that in the
+ *	caller as well, but the advantage of doing it here is we can easily write
+ *	the WAL for XLOG_HASH_SQUEEZE_PAGE operation.  Addition of tuples and
+ *	removal of overflow page has to done as an atomic operation, otherwise
+ *	during replay on standby users might find duplicate records.
  *
  *	Since this function is invoked in VACUUM, we provide an access strategy
  *	parameter that controls fetches of the bucket pages.
@@ -430,8 +492,6 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
 	HashMetaPage metap;
 	Buffer		metabuf;
 	Buffer		mapbuf;
-	Buffer		prevbuf = InvalidBuffer;
-	Buffer		nextbuf = InvalidBuffer;
 	BlockNumber ovflblkno;
 	BlockNumber prevblkno;
 	BlockNumber blkno;
@@ -445,6 +505,9 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
 	int32		bitmappage,
 				bitmapbit;
 	Bucket bucket PG_USED_FOR_ASSERTS_ONLY;
+	Buffer		prevbuf = InvalidBuffer;
+	Buffer		nextbuf = InvalidBuffer;
+	bool		update_metap = false;
 
 	/* Get information from the doomed page */
 	_hash_checkpage(rel, ovflbuf, LH_OVERFLOW_PAGE);
@@ -508,6 +571,12 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
 	/* Get write-lock on metapage to update firstfree */
 	LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
 
+	/* This operation needs to log multiple tuples, prepare WAL for that */
+	if (RelationNeedsWAL(rel))
+		XLogEnsureRecordSpace(HASH_XLOG_FREE_OVFL_BUFS, 4 + nitups);
+
+	START_CRIT_SECTION();
+
 	/*
 	 * we have to insert tuples on the "write" page, being careful to preserve
 	 * hashkey ordering.  (If we insert many tuples into the same "write" page
@@ -519,7 +588,11 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
 		MarkBufferDirty(wbuf);
 	}
 
-	/* Initialize the freed overflow page. */
+	/*
+	 * Initialize the freed overflow page.  Just zeroing the page won't work,
+	 * because WAL replay routines expect pages to be initialized. See
+	 * explanation of RBM_NORMAL mode atop XLogReadBufferExtended.
+	 */
 	_hash_pageinit(ovflpage, BufferGetPageSize(ovflbuf));
 	MarkBufferDirty(ovflbuf);
 
@@ -550,9 +623,83 @@ _hash_freeovflpage(Relation rel, Buffer bucketbuf, Buffer ovflbuf,
 	if (ovflbitno < metap->hashm_firstfree)
 	{
 		metap->hashm_firstfree = ovflbitno;
+		update_metap = true;
 		MarkBufferDirty(metabuf);
 	}
 
+	/* XLOG stuff */
+	if (RelationNeedsWAL(rel))
+	{
+		xl_hash_squeeze_page xlrec;
+		XLogRecPtr	recptr;
+		int			i;
+
+		xlrec.prevblkno = prevblkno;
+		xlrec.nextblkno = nextblkno;
+		xlrec.ntups = nitups;
+		xlrec.is_prim_bucket_same_wrt = (wbuf == bucketbuf);
+		xlrec.is_prev_bucket_same_wrt = (wbuf == prevbuf);
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &xlrec, SizeOfHashSqueezePage);
+
+		/*
+		 * bucket buffer needs to be registered to ensure that we can acquire
+		 * a cleanup lock on it during replay.
+		 */
+		if (!xlrec.is_prim_bucket_same_wrt)
+			XLogRegisterBuffer(0, bucketbuf, REGBUF_STANDARD | REGBUF_NO_IMAGE);
+
+		XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
+		if (xlrec.ntups > 0)
+		{
+			XLogRegisterBufData(1, (char *) itup_offsets,
+								nitups * sizeof(OffsetNumber));
+			for (i = 0; i < nitups; i++)
+				XLogRegisterBufData(1, (char *) itups[i], tups_size[i]);
+		}
+
+		XLogRegisterBuffer(2, ovflbuf, REGBUF_STANDARD);
+
+		/*
+		 * If prevpage and the writepage (block in which we are moving tuples
+		 * from overflow) are same, then no need to separately register
+		 * prevpage.  During replay, we can directly update the nextblock in
+		 * writepage.
+		 */
+		if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
+			XLogRegisterBuffer(3, prevbuf, REGBUF_STANDARD);
+
+		if (BufferIsValid(nextbuf))
+			XLogRegisterBuffer(4, nextbuf, REGBUF_STANDARD);
+
+		XLogRegisterBuffer(5, mapbuf, REGBUF_STANDARD);
+		XLogRegisterBufData(5, (char *) &bitmapbit, sizeof(uint32));
+
+		if (update_metap)
+		{
+			XLogRegisterBuffer(6, metabuf, REGBUF_STANDARD);
+			XLogRegisterBufData(6, (char *) &metap->hashm_firstfree, sizeof(uint32));
+		}
+
+		recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SQUEEZE_PAGE);
+
+		PageSetLSN(BufferGetPage(wbuf), recptr);
+		PageSetLSN(BufferGetPage(ovflbuf), recptr);
+
+		if (BufferIsValid(prevbuf) && !xlrec.is_prev_bucket_same_wrt)
+			PageSetLSN(BufferGetPage(prevbuf), recptr);
+		if (BufferIsValid(nextbuf))
+			PageSetLSN(BufferGetPage(nextbuf), recptr);
+
+		PageSetLSN(BufferGetPage(mapbuf), recptr);
+
+		if (update_metap)
+			PageSetLSN(BufferGetPage(metabuf), recptr);
+	}
+
+	END_CRIT_SECTION();
+
 	/* release previous bucket if it is not same as write bucket */
 	if (BufferIsValid(prevbuf) && prevblkno != writeblkno)
 		_hash_relbuf(rel, prevbuf);
@@ -601,7 +748,11 @@ _hash_initbitmapbuffer(Buffer buf, uint16 bmsize, bool initpage)
 	freep = HashPageGetBitmap(pg);
 	MemSet(freep, 0xFF, bmsize);
 
-	/* Set pd_lower just past the end of the bitmap page data. */
+	/*
+	 * Set pd_lower just past the end of the bitmap page data.  We could even
+	 * set pd_lower equal to pd_upper, but this is more precise and makes the
+	 * page look compressible to xlog.c.
+	 */
 	((PageHeader) pg)->pd_lower = ((char *) freep + bmsize) - (char *) pg;
 }
 
@@ -761,6 +912,15 @@ readpage:
 					Assert(nitups == ndeletable);
 
 					/*
+					 * This operation needs to log multiple tuples, prepare
+					 * WAL for that.
+					 */
+					if (RelationNeedsWAL(rel))
+						XLogEnsureRecordSpace(0, 3 + nitups);
+
+					START_CRIT_SECTION();
+
+					/*
 					 * we have to insert tuples on the "write" page, being
 					 * careful to preserve hashkey ordering.  (If we insert
 					 * many tuples into the same "write" page it would be
@@ -773,6 +933,43 @@ readpage:
 					PageIndexMultiDelete(rpage, deletable, ndeletable);
 					MarkBufferDirty(rbuf);
 
+					/* XLOG stuff */
+					if (RelationNeedsWAL(rel))
+					{
+						XLogRecPtr	recptr;
+						xl_hash_move_page_contents xlrec;
+
+						xlrec.ntups = nitups;
+						xlrec.is_prim_bucket_same_wrt = (wbuf == bucket_buf) ? true : false;
+
+						XLogBeginInsert();
+						XLogRegisterData((char *) &xlrec, SizeOfHashMovePageContents);
+
+						/*
+						 * bucket buffer needs to be registered to ensure that
+						 * we can acquire a cleanup lock on it during replay.
+						 */
+						if (!xlrec.is_prim_bucket_same_wrt)
+							XLogRegisterBuffer(0, bucket_buf, REGBUF_STANDARD | REGBUF_NO_IMAGE);
+
+						XLogRegisterBuffer(1, wbuf, REGBUF_STANDARD);
+						XLogRegisterBufData(1, (char *) itup_offsets,
+											nitups * sizeof(OffsetNumber));
+						for (i = 0; i < nitups; i++)
+							XLogRegisterBufData(1, (char *) itups[i], tups_size[i]);
+
+						XLogRegisterBuffer(2, rbuf, REGBUF_STANDARD);
+						XLogRegisterBufData(2, (char *) deletable,
+										  ndeletable * sizeof(OffsetNumber));
+
+						recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_MOVE_PAGE_CONTENTS);
+
+						PageSetLSN(BufferGetPage(wbuf), recptr);
+						PageSetLSN(BufferGetPage(rbuf), recptr);
+					}
+
+					END_CRIT_SECTION();
+
 					tups_moved = true;
 				}
 
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index c73929cebbb..dc606f162e1 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -29,6 +29,7 @@
 #include "postgres.h"
 
 #include "access/hash.h"
+#include "access/hash_xlog.h"
 #include "miscadmin.h"
 #include "storage/lmgr.h"
 #include "storage/smgr.h"
@@ -43,6 +44,7 @@ static void _hash_splitbucket(Relation rel, Buffer metabuf,
 				  HTAB *htab,
 				  uint32 maxbucket,
 				  uint32 highmask, uint32 lowmask);
+static void log_split_page(Relation rel, Buffer buf);
 
 
 /*
@@ -381,6 +383,25 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
 	pg = BufferGetPage(metabuf);
 	metap = HashPageGetMeta(pg);
 
+	/* XLOG stuff */
+	if (RelationNeedsWAL(rel))
+	{
+		xl_hash_init_meta_page xlrec;
+		XLogRecPtr	recptr;
+
+		xlrec.num_tuples = num_tuples;
+		xlrec.procid = metap->hashm_procid;
+		xlrec.ffactor = metap->hashm_ffactor;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &xlrec, SizeOfHashInitMetaPage);
+		XLogRegisterBuffer(0, metabuf, REGBUF_WILL_INIT);
+
+		recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INIT_META_PAGE);
+
+		PageSetLSN(BufferGetPage(metabuf), recptr);
+	}
+
 	num_buckets = metap->hashm_maxbucket + 1;
 
 	/*
@@ -405,6 +426,12 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
 		buf = _hash_getnewbuf(rel, blkno, forkNum);
 		_hash_initbuf(buf, metap->hashm_maxbucket, i, LH_BUCKET_PAGE, false);
 		MarkBufferDirty(buf);
+
+		log_newpage(&rel->rd_node,
+					forkNum,
+					blkno,
+					BufferGetPage(buf),
+					true);
 		_hash_relbuf(rel, buf);
 	}
 
@@ -431,6 +458,31 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
 	metap->hashm_nmaps++;
 	MarkBufferDirty(metabuf);
 
+	/* XLOG stuff */
+	if (RelationNeedsWAL(rel))
+	{
+		xl_hash_init_bitmap_page xlrec;
+		XLogRecPtr	recptr;
+
+		xlrec.bmsize = metap->hashm_bmsize;
+
+		XLogBeginInsert();
+		XLogRegisterData((char *) &xlrec, SizeOfHashInitBitmapPage);
+		XLogRegisterBuffer(0, bitmapbuf, REGBUF_WILL_INIT);
+
+		/*
+		 * This is safe only because nobody else can be modifying the index at
+		 * this stage; it's only visible to the transaction that is creating
+		 * it.
+		 */
+		XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
+
+		recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_INIT_BITMAP_PAGE);
+
+		PageSetLSN(BufferGetPage(bitmapbuf), recptr);
+		PageSetLSN(BufferGetPage(metabuf), recptr);
+	}
+
 	/* all done */
 	_hash_relbuf(rel, bitmapbuf);
 	_hash_relbuf(rel, metabuf);
@@ -525,7 +577,10 @@ _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid,
 	metap->hashm_ovflpoint = log2_num_buckets;
 	metap->hashm_firstfree = 0;
 
-	/* Set pd_lower just past the end of the metadata. */
+	/*
+	 * Set pd_lower just past the end of the metadata.  This is to log full
+	 * page image of metapage in xloginsert.c.
+	 */
 	((PageHeader) page)->pd_lower =
 		((char *) metap + sizeof(HashMetaPageData)) - (char *) page;
 }
@@ -569,6 +624,8 @@ _hash_expandtable(Relation rel, Buffer metabuf)
 	uint32		maxbucket;
 	uint32		highmask;
 	uint32		lowmask;
+	bool		metap_update_masks = false;
+	bool		metap_update_splitpoint = false;
 
 restart_expand:
 
@@ -728,7 +785,11 @@ restart_expand:
 		 * The number of buckets in the new splitpoint is equal to the total
 		 * number already in existence, i.e. new_bucket.  Currently this maps
 		 * one-to-one to blocks required, but someday we may need a more
-		 * complicated calculation here.
+		 * complicated calculation here.  We treat allocation of buckets as a
+		 * separate WAL-logged action.  Even if we fail after this operation,
+		 * won't leak bucket pages; rather, the next split will consume this
+		 * space. In any case, even without failure we don't use all the space
+		 * in one split operation.
 		 */
 		if (!_hash_alloc_buckets(rel, start_nblkno, new_bucket))
 		{
@@ -757,8 +818,7 @@ restart_expand:
 	 * Since we are scribbling on the pages in the shared buffers, establish a
 	 * critical section.  Any failure in this next code leaves us with a big
 	 * problem: the metapage is effectively corrupt but could get written back
-	 * to disk.  We don't really expect any failure, but just to be sure,
-	 * establish a critical section.
+	 * to disk.
 	 */
 	START_CRIT_SECTION();
 
@@ -772,6 +832,7 @@ restart_expand:
 		/* Starting a new doubling */
 		metap->hashm_lowmask = metap->hashm_highmask;
 		metap->hashm_highmask = new_bucket | metap->hashm_lowmask;
+		metap_update_masks = true;
 	}
 
 	/*
@@ -784,6 +845,7 @@ restart_expand:
 	{
 		metap->hashm_spares[spare_ndx] = metap->hashm_spares[metap->hashm_ovflpoint];
 		metap->hashm_ovflpoint = spare_ndx;
+		metap_update_splitpoint = true;
 	}
 
 	MarkBufferDirty(metabuf);
@@ -829,6 +891,49 @@ restart_expand:
 
 	MarkBufferDirty(buf_nblkno);
 
+	/* XLOG stuff */
+	if (RelationNeedsWAL(rel))
+	{
+		xl_hash_split_allocate_page xlrec;
+		XLogRecPtr	recptr;
+
+		xlrec.new_bucket = maxbucket;
+		xlrec.old_bucket_flag = oopaque->hasho_flag;
+		xlrec.new_bucket_flag = nopaque->hasho_flag;
+		xlrec.flags = 0;
+
+		XLogBeginInsert();
+
+		XLogRegisterBuffer(0, buf_oblkno, REGBUF_STANDARD);
+		XLogRegisterBuffer(1, buf_nblkno, REGBUF_WILL_INIT);
+		XLogRegisterBuffer(2, metabuf, REGBUF_STANDARD);
+
+		if (metap_update_masks)
+		{
+			xlrec.flags |= XLH_SPLIT_META_UPDATE_MASKS;
+			XLogRegisterBufData(2, (char *) &metap->hashm_lowmask, sizeof(uint32));
+			XLogRegisterBufData(2, (char *) &metap->hashm_highmask, sizeof(uint32));
+		}
+
+		if (metap_update_splitpoint)
+		{
+			xlrec.flags |= XLH_SPLIT_META_UPDATE_SPLITPOINT;
+			XLogRegisterBufData(2, (char *) &metap->hashm_ovflpoint,
+								sizeof(uint32));
+			XLogRegisterBufData(2,
+					   (char *) &metap->hashm_spares[metap->hashm_ovflpoint],
+								sizeof(uint32));
+		}
+
+		XLogRegisterData((char *) &xlrec, SizeOfHashSplitAllocPage);
+
+		recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_ALLOCATE_PAGE);
+
+		PageSetLSN(BufferGetPage(buf_oblkno), recptr);
+		PageSetLSN(BufferGetPage(buf_nblkno), recptr);
+		PageSetLSN(BufferGetPage(metabuf), recptr);
+	}
+
 	END_CRIT_SECTION();
 
 	/* drop lock, but keep pin */
@@ -883,6 +988,7 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
 {
 	BlockNumber lastblock;
 	char		zerobuf[BLCKSZ];
+	Page		page;
 
 	lastblock = firstblock + nblocks - 1;
 
@@ -893,7 +999,20 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
 	if (lastblock < firstblock || lastblock == InvalidBlockNumber)
 		return false;
 
-	MemSet(zerobuf, 0, sizeof(zerobuf));
+	page = (Page) zerobuf;
+
+	/*
+	 * Initialize the freed overflow page.  Just zeroing the page won't work,
+	 * See _hash_freeovflpage for similar usage.
+	 */
+	_hash_pageinit(page, BLCKSZ);
+
+	if (RelationNeedsWAL(rel))
+		log_newpage(&rel->rd_node,
+					MAIN_FORKNUM,
+					lastblock,
+					zerobuf,
+					true);
 
 	RelationOpenSmgr(rel);
 	smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf, false);
@@ -951,6 +1070,11 @@ _hash_splitbucket(Relation rel,
 	Page		npage;
 	HashPageOpaque oopaque;
 	HashPageOpaque nopaque;
+	OffsetNumber itup_offsets[MaxIndexTuplesPerPage];
+	IndexTuple	itups[MaxIndexTuplesPerPage];
+	Size		all_tups_size = 0;
+	int			i;
+	uint16		nitups = 0;
 
 	bucket_obuf = obuf;
 	opage = BufferGetPage(obuf);
@@ -1029,29 +1153,38 @@ _hash_splitbucket(Relation rel,
 				itemsz = IndexTupleDSize(*new_itup);
 				itemsz = MAXALIGN(itemsz);
 
-				if (PageGetFreeSpace(npage) < itemsz)
+				if (PageGetFreeSpaceForMultipleTuples(npage, nitups + 1) < (all_tups_size + itemsz))
 				{
-					/* write out nbuf and drop lock, but keep pin */
+					/*
+					 * Change the shared buffer state in critical section,
+					 * otherwise any error could make it unrecoverable.
+					 */
+					START_CRIT_SECTION();
+
+					_hash_pgaddmultitup(rel, nbuf, itups, itup_offsets, nitups);
 					MarkBufferDirty(nbuf);
+					/* log the split operation before releasing the lock */
+					log_split_page(rel, nbuf);
+
+					END_CRIT_SECTION();
+
 					/* drop lock, but keep pin */
 					LockBuffer(nbuf, BUFFER_LOCK_UNLOCK);
+
+					/* be tidy */
+					for (i = 0; i < nitups; i++)
+						pfree(itups[i]);
+					nitups = 0;
+					all_tups_size = 0;
+
 					/* chain to a new overflow page */
 					nbuf = _hash_addovflpage(rel, metabuf, nbuf, (nbuf == bucket_nbuf) ? true : false);
 					npage = BufferGetPage(nbuf);
 					nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
 				}
 
-				/*
-				 * Insert tuple on new page, using _hash_pgaddtup to ensure
-				 * correct ordering by hashkey.  This is a tad inefficient
-				 * since we may have to shuffle itempointers repeatedly.
-				 * Possible future improvement: accumulate all the items for
-				 * the new page and qsort them before insertion.
-				 */
-				(void) _hash_pgaddtup(rel, nbuf, itemsz, new_itup);
-
-				/* be tidy */
-				pfree(new_itup);
+				itups[nitups++] = new_itup;
+				all_tups_size += itemsz;
 			}
 			else
 			{
@@ -1073,11 +1206,27 @@ _hash_splitbucket(Relation rel,
 		/* Exit loop if no more overflow pages in old bucket */
 		if (!BlockNumberIsValid(oblkno))
 		{
+			/*
+			 * Change the shared buffer state in critical section, otherwise
+			 * any error could make it unrecoverable.
+			 */
+			START_CRIT_SECTION();
+
+			_hash_pgaddmultitup(rel, nbuf, itups, itup_offsets, nitups);
 			MarkBufferDirty(nbuf);
+			/* log the split operation before releasing the lock */
+			log_split_page(rel, nbuf);
+
+			END_CRIT_SECTION();
+
 			if (nbuf == bucket_nbuf)
 				LockBuffer(nbuf, BUFFER_LOCK_UNLOCK);
 			else
 				_hash_relbuf(rel, nbuf);
+
+			/* be tidy */
+			for (i = 0; i < nitups; i++)
+				pfree(itups[i]);
 			break;
 		}
 
@@ -1103,6 +1252,8 @@ _hash_splitbucket(Relation rel,
 	npage = BufferGetPage(bucket_nbuf);
 	nopaque = (HashPageOpaque) PageGetSpecialPointer(npage);
 
+	START_CRIT_SECTION();
+
 	oopaque->hasho_flag &= ~LH_BUCKET_BEING_SPLIT;
 	nopaque->hasho_flag &= ~LH_BUCKET_BEING_POPULATED;
 
@@ -1119,6 +1270,29 @@ _hash_splitbucket(Relation rel,
 	 */
 	MarkBufferDirty(bucket_obuf);
 	MarkBufferDirty(bucket_nbuf);
+
+	if (RelationNeedsWAL(rel))
+	{
+		XLogRecPtr	recptr;
+		xl_hash_split_complete xlrec;
+
+		xlrec.old_bucket_flag = oopaque->hasho_flag;
+		xlrec.new_bucket_flag = nopaque->hasho_flag;
+
+		XLogBeginInsert();
+
+		XLogRegisterData((char *) &xlrec, SizeOfHashSplitComplete);
+
+		XLogRegisterBuffer(0, bucket_obuf, REGBUF_STANDARD);
+		XLogRegisterBuffer(1, bucket_nbuf, REGBUF_STANDARD);
+
+		recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_COMPLETE);
+
+		PageSetLSN(BufferGetPage(bucket_obuf), recptr);
+		PageSetLSN(BufferGetPage(bucket_nbuf), recptr);
+	}
+
+	END_CRIT_SECTION();
 }
 
 /*
@@ -1245,6 +1419,32 @@ _hash_finish_split(Relation rel, Buffer metabuf, Buffer obuf, Bucket obucket,
 }
 
 /*
+ *	log_split_page() -- Log the split operation
+ *
+ *	We log the split operation when the new page in new bucket gets full,
+ *	so we log the entire page.
+ *
+ *	'buf' must be locked by the caller which is also responsible for unlocking
+ *	it.
+ */
+static void
+log_split_page(Relation rel, Buffer buf)
+{
+	if (RelationNeedsWAL(rel))
+	{
+		XLogRecPtr	recptr;
+
+		XLogBeginInsert();
+
+		XLogRegisterBuffer(0, buf, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
+
+		recptr = XLogInsert(RM_HASH_ID, XLOG_HASH_SPLIT_PAGE);
+
+		PageSetLSN(BufferGetPage(buf), recptr);
+	}
+}
+
+/*
  *	_hash_getcachedmetap() -- Returns cached metapage data.
  *
  *	If metabuf is not InvalidBuffer, caller must hold a pin, but no lock, on
diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c
index 9e5d7e4babe..d7337703b0b 100644
--- a/src/backend/access/hash/hashsearch.c
+++ b/src/backend/access/hash/hashsearch.c
@@ -123,6 +123,7 @@ _hash_readnext(IndexScanDesc scan,
 	if (block_found)
 	{
 		*pagep = BufferGetPage(*bufp);
+		TestForOldSnapshot(scan->xs_snapshot, rel, *pagep);
 		*opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
 	}
 }
@@ -168,6 +169,7 @@ _hash_readprev(IndexScanDesc scan,
 		*bufp = _hash_getbuf(rel, blkno, HASH_READ,
 							 LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
 		*pagep = BufferGetPage(*bufp);
+		TestForOldSnapshot(scan->xs_snapshot, rel, *pagep);
 		*opaquep = (HashPageOpaque) PageGetSpecialPointer(*pagep);
 
 		/*
@@ -283,6 +285,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
 
 	buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_READ, NULL);
 	page = BufferGetPage(buf);
+	TestForOldSnapshot(scan->xs_snapshot, rel, page);
 	opaque = (HashPageOpaque) PageGetSpecialPointer(page);
 	bucket = opaque->hasho_bucket;
 
@@ -318,6 +321,7 @@ _hash_first(IndexScanDesc scan, ScanDirection dir)
 		LockBuffer(buf, BUFFER_LOCK_UNLOCK);
 
 		old_buf = _hash_getbuf(rel, old_blkno, HASH_READ, LH_BUCKET_PAGE);
+		TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(old_buf));
 
 		/*
 		 * remember the split bucket buffer so as to use it later for
@@ -520,6 +524,7 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
 					_hash_readprev(scan, &buf, &page, &opaque);
 					if (BufferIsValid(buf))
 					{
+						TestForOldSnapshot(scan->xs_snapshot, rel, page);
 						maxoff = PageGetMaxOffsetNumber(page);
 						offnum = _hash_binsearch_last(page, so->hashso_sk_hash);
 					}
diff --git a/src/backend/access/rmgrdesc/hashdesc.c b/src/backend/access/rmgrdesc/hashdesc.c
index 7eac8191cad..f1cc9ff9514 100644
--- a/src/backend/access/rmgrdesc/hashdesc.c
+++ b/src/backend/access/rmgrdesc/hashdesc.c
@@ -19,10 +19,142 @@
 void
 hash_desc(StringInfo buf, XLogReaderState *record)
 {
+	char	   *rec = XLogRecGetData(record);
+	uint8		info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+
+	switch (info)
+	{
+		case XLOG_HASH_INIT_META_PAGE:
+			{
+				xl_hash_init_meta_page *xlrec = (xl_hash_init_meta_page *) rec;
+
+				appendStringInfo(buf, "num_tuples %g, fillfactor %d",
+								 xlrec->num_tuples, xlrec->ffactor);
+				break;
+			}
+		case XLOG_HASH_INIT_BITMAP_PAGE:
+			{
+				xl_hash_init_bitmap_page *xlrec = (xl_hash_init_bitmap_page *) rec;
+
+				appendStringInfo(buf, "bmsize %d", xlrec->bmsize);
+				break;
+			}
+		case XLOG_HASH_INSERT:
+			{
+				xl_hash_insert *xlrec = (xl_hash_insert *) rec;
+
+				appendStringInfo(buf, "off %u", xlrec->offnum);
+				break;
+			}
+		case XLOG_HASH_ADD_OVFL_PAGE:
+			{
+				xl_hash_add_ovfl_page *xlrec = (xl_hash_add_ovfl_page *) rec;
+
+				appendStringInfo(buf, "bmsize %d, bmpage_found %c",
+						   xlrec->bmsize, (xlrec->bmpage_found) ? 'T' : 'F');
+				break;
+			}
+		case XLOG_HASH_SPLIT_ALLOCATE_PAGE:
+			{
+				xl_hash_split_allocate_page *xlrec = (xl_hash_split_allocate_page *) rec;
+
+				appendStringInfo(buf, "new_bucket %u, meta_page_masks_updated %c, issplitpoint_changed %c",
+								 xlrec->new_bucket,
+					(xlrec->flags & XLH_SPLIT_META_UPDATE_MASKS) ? 'T' : 'F',
+								 (xlrec->flags & XLH_SPLIT_META_UPDATE_SPLITPOINT) ? 'T' : 'F');
+				break;
+			}
+		case XLOG_HASH_SPLIT_COMPLETE:
+			{
+				xl_hash_split_complete *xlrec = (xl_hash_split_complete *) rec;
+
+				appendStringInfo(buf, "old_bucket_flag %u, new_bucket_flag %u",
+							 xlrec->old_bucket_flag, xlrec->new_bucket_flag);
+				break;
+			}
+		case XLOG_HASH_MOVE_PAGE_CONTENTS:
+			{
+				xl_hash_move_page_contents *xlrec = (xl_hash_move_page_contents *) rec;
+
+				appendStringInfo(buf, "ntups %d, is_primary %c",
+								 xlrec->ntups,
+								 xlrec->is_prim_bucket_same_wrt ? 'T' : 'F');
+				break;
+			}
+		case XLOG_HASH_SQUEEZE_PAGE:
+			{
+				xl_hash_squeeze_page *xlrec = (xl_hash_squeeze_page *) rec;
+
+				appendStringInfo(buf, "prevblkno %u, nextblkno %u, ntups %d, is_primary %c",
+								 xlrec->prevblkno,
+								 xlrec->nextblkno,
+								 xlrec->ntups,
+								 xlrec->is_prim_bucket_same_wrt ? 'T' : 'F');
+				break;
+			}
+		case XLOG_HASH_DELETE:
+			{
+				xl_hash_delete *xlrec = (xl_hash_delete *) rec;
+
+				appendStringInfo(buf, "is_primary %c",
+								 xlrec->is_primary_bucket_page ? 'T' : 'F');
+				break;
+			}
+		case XLOG_HASH_UPDATE_META_PAGE:
+			{
+				xl_hash_update_meta_page *xlrec = (xl_hash_update_meta_page *) rec;
+
+				appendStringInfo(buf, "ntuples %g",
+								 xlrec->ntuples);
+				break;
+			}
+	}
 }
 
 const char *
 hash_identify(uint8 info)
 {
-	return NULL;
+	const char *id = NULL;
+
+	switch (info & ~XLR_INFO_MASK)
+	{
+		case XLOG_HASH_INIT_META_PAGE:
+			id = "INIT_META_PAGE";
+			break;
+		case XLOG_HASH_INIT_BITMAP_PAGE:
+			id = "INIT_BITMAP_PAGE";
+			break;
+		case XLOG_HASH_INSERT:
+			id = "INSERT";
+			break;
+		case XLOG_HASH_ADD_OVFL_PAGE:
+			id = "ADD_OVFL_PAGE";
+			break;
+		case XLOG_HASH_SPLIT_ALLOCATE_PAGE:
+			id = "SPLIT_ALLOCATE_PAGE";
+			break;
+		case XLOG_HASH_SPLIT_PAGE:
+			id = "SPLIT_PAGE";
+			break;
+		case XLOG_HASH_SPLIT_COMPLETE:
+			id = "SPLIT_COMPLETE";
+			break;
+		case XLOG_HASH_MOVE_PAGE_CONTENTS:
+			id = "MOVE_PAGE_CONTENTS";
+			break;
+		case XLOG_HASH_SQUEEZE_PAGE:
+			id = "SQUEEZE_PAGE";
+			break;
+		case XLOG_HASH_DELETE:
+			id = "DELETE";
+			break;
+		case XLOG_HASH_SPLIT_CLEANUP:
+			id = "SPLIT_CLEANUP";
+			break;
+		case XLOG_HASH_UPDATE_META_PAGE:
+			id = "UPDATE_META_PAGE";
+			break;
+	}
+
+	return id;
 }
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 72bb06c7602..9618032356a 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -506,11 +506,6 @@ DefineIndex(Oid relationId,
 	accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
 	amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
 
-	if (strcmp(accessMethodName, "hash") == 0 &&
-		RelationNeedsWAL(rel))
-		ereport(WARNING,
-				(errmsg("hash indexes are not WAL-logged and their use is discouraged")));
-
 	if (stmt->unique && !amRoutine->amcanunique)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 9001e202b03..ce55fc52777 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -5880,13 +5880,10 @@ RelationIdIsInInitFile(Oid relationId)
 /*
  * Tells whether any index for the relation is unlogged.
  *
- * Any index using the hash AM is implicitly unlogged.
- *
  * Note: There doesn't seem to be any way to have an unlogged index attached
- * to a permanent table except to create a hash index, but it seems best to
- * keep this general so that it returns sensible results even when they seem
- * obvious (like for an unlogged table) and to handle possible future unlogged
- * indexes on permanent tables.
+ * to a permanent table, but it seems best to keep this general so that it
+ * returns sensible results even when they seem obvious (like for an unlogged
+ * table) and to handle possible future unlogged indexes on permanent tables.
  */
 bool
 RelationHasUnloggedIndex(Relation rel)
@@ -5908,8 +5905,7 @@ RelationHasUnloggedIndex(Relation rel)
 			elog(ERROR, "cache lookup failed for relation %u", indexoid);
 		reltup = (Form_pg_class) GETSTRUCT(tp);
 
-		if (reltup->relpersistence == RELPERSISTENCE_UNLOGGED
-			|| reltup->relam == HASH_AM_OID)
+		if (reltup->relpersistence == RELPERSISTENCE_UNLOGGED)
 			result = true;
 
 		ReleaseSysCache(tp);
diff --git a/src/include/access/hash_xlog.h b/src/include/access/hash_xlog.h
index cc231632e12..2075ab7afad 100644
--- a/src/include/access/hash_xlog.h
+++ b/src/include/access/hash_xlog.h
@@ -16,7 +16,239 @@
 
 #include "access/xlogreader.h"
 #include "lib/stringinfo.h"
+#include "storage/off.h"
 
+/* Number of buffers required for XLOG_HASH_SQUEEZE_PAGE operation */
+#define HASH_XLOG_FREE_OVFL_BUFS	6
+
+/*
+ * XLOG records for hash operations
+ */
+#define XLOG_HASH_INIT_META_PAGE	0x00		/* initialize the meta page */
+#define XLOG_HASH_INIT_BITMAP_PAGE	0x10		/* initialize the bitmap page */
+#define XLOG_HASH_INSERT		0x20	/* add index tuple without split */
+#define XLOG_HASH_ADD_OVFL_PAGE 0x30	/* add overflow page */
+#define XLOG_HASH_SPLIT_ALLOCATE_PAGE	0x40	/* allocate new page for split */
+#define XLOG_HASH_SPLIT_PAGE	0x50	/* split page */
+#define XLOG_HASH_SPLIT_COMPLETE	0x60		/* completion of split
+												 * operation */
+#define XLOG_HASH_MOVE_PAGE_CONTENTS	0x70	/* remove tuples from one page
+												 * and add to another page */
+#define XLOG_HASH_SQUEEZE_PAGE	0x80	/* add tuples to one of the previous
+										 * pages in chain and free the ovfl
+										 * page */
+#define XLOG_HASH_DELETE		0x90	/* delete index tuples from a page */
+#define XLOG_HASH_SPLIT_CLEANUP 0xA0	/* clear split-cleanup flag in primary
+										 * bucket page after deleting tuples
+										 * that are moved due to split	*/
+#define XLOG_HASH_UPDATE_META_PAGE	0xB0		/* update meta page after
+												 * vacuum */
+
+
+/*
+ * xl_hash_split_allocate_page flag values, 8 bits are available.
+ */
+#define XLH_SPLIT_META_UPDATE_MASKS		(1<<0)
+#define XLH_SPLIT_META_UPDATE_SPLITPOINT		(1<<1)
+
+/*
+ * This is what we need to know about a HASH index create.
+ *
+ * Backup block 0: metapage
+ */
+typedef struct xl_hash_createidx
+{
+	double		num_tuples;
+	RegProcedure procid;
+	uint16		ffactor;
+}	xl_hash_createidx;
+#define SizeOfHashCreateIdx (offsetof(xl_hash_createidx, ffactor) + sizeof(uint16))
+
+/*
+ * This is what we need to know about simple (without split) insert.
+ *
+ * This data record is used for XLOG_HASH_INSERT
+ *
+ * Backup Blk 0: original page (data contains the inserted tuple)
+ * Backup Blk 1: metapage (HashMetaPageData)
+ */
+typedef struct xl_hash_insert
+{
+	OffsetNumber offnum;
+}	xl_hash_insert;
+
+#define SizeOfHashInsert	(offsetof(xl_hash_insert, offnum) + sizeof(OffsetNumber))
+
+/*
+ * This is what we need to know about addition of overflow page.
+ *
+ * This data record is used for XLOG_HASH_ADD_OVFL_PAGE
+ *
+ * Backup Blk 0: newly allocated overflow page
+ * Backup Blk 1: page before new overflow page in the bucket chain
+ * Backup Blk 2: bitmap page
+ * Backup Blk 3: new bitmap page
+ * Backup Blk 4: metapage
+ */
+typedef struct xl_hash_add_ovfl_page
+{
+	uint16		bmsize;
+	bool		bmpage_found;
+}	xl_hash_add_ovfl_page;
+
+#define SizeOfHashAddOvflPage	\
+	(offsetof(xl_hash_add_ovfl_page, bmpage_found) + sizeof(bool))
+
+/*
+ * This is what we need to know about allocating a page for split.
+ *
+ * This data record is used for XLOG_HASH_SPLIT_ALLOCATE_PAGE
+ *
+ * Backup Blk 0: page for old bucket
+ * Backup Blk 1: page for new bucket
+ * Backup Blk 2: metapage
+ */
+typedef struct xl_hash_split_allocate_page
+{
+	uint32		new_bucket;
+	uint16		old_bucket_flag;
+	uint16		new_bucket_flag;
+	uint8		flags;
+}	xl_hash_split_allocate_page;
+
+#define SizeOfHashSplitAllocPage	\
+	(offsetof(xl_hash_split_allocate_page, flags) + sizeof(uint8))
+
+/*
+ * This is what we need to know about completing the split operation.
+ *
+ * This data record is used for XLOG_HASH_SPLIT_COMPLETE
+ *
+ * Backup Blk 0: page for old bucket
+ * Backup Blk 1: page for new bucket
+ */
+typedef struct xl_hash_split_complete
+{
+	uint16		old_bucket_flag;
+	uint16		new_bucket_flag;
+}	xl_hash_split_complete;
+
+#define SizeOfHashSplitComplete \
+	(offsetof(xl_hash_split_complete, new_bucket_flag) + sizeof(uint16))
+
+/*
+ * This is what we need to know about move page contents required during
+ * squeeze operation.
+ *
+ * This data record is used for XLOG_HASH_MOVE_PAGE_CONTENTS
+ *
+ * Backup Blk 0: bucket page
+ * Backup Blk 1: page containing moved tuples
+ * Backup Blk 2: page from which tuples will be removed
+ */
+typedef struct xl_hash_move_page_contents
+{
+	uint16		ntups;
+	bool		is_prim_bucket_same_wrt;		/* TRUE if the page to which
+												 * tuples are moved is same as
+												 * primary bucket page */
+}	xl_hash_move_page_contents;
+
+#define SizeOfHashMovePageContents	\
+	(offsetof(xl_hash_move_page_contents, is_prim_bucket_same_wrt) + sizeof(bool))
+
+/*
+ * This is what we need to know about the squeeze page operation.
+ *
+ * This data record is used for XLOG_HASH_SQUEEZE_PAGE
+ *
+ * Backup Blk 0: page containing tuples moved from freed overflow page
+ * Backup Blk 1: freed overflow page
+ * Backup Blk 2: page previous to the freed overflow page
+ * Backup Blk 3: page next to the freed overflow page
+ * Backup Blk 4: bitmap page containing info of freed overflow page
+ * Backup Blk 5: meta page
+ */
+typedef struct xl_hash_squeeze_page
+{
+	BlockNumber prevblkno;
+	BlockNumber nextblkno;
+	uint16		ntups;
+	bool		is_prim_bucket_same_wrt;		/* TRUE if the page to which
+												 * tuples are moved is same as
+												 * primary bucket page */
+	bool		is_prev_bucket_same_wrt;		/* TRUE if the page to which
+												 * tuples are moved is the
+												 * page previous to the freed
+												 * overflow page */
+}	xl_hash_squeeze_page;
+
+#define SizeOfHashSqueezePage	\
+	(offsetof(xl_hash_squeeze_page, is_prev_bucket_same_wrt) + sizeof(bool))
+
+/*
+ * This is what we need to know about the deletion of index tuples from a page.
+ *
+ * This data record is used for XLOG_HASH_DELETE
+ *
+ * Backup Blk 0: primary bucket page
+ * Backup Blk 1: page from which tuples are deleted
+ */
+typedef struct xl_hash_delete
+{
+	bool		is_primary_bucket_page; /* TRUE if the operation is for
+										 * primary bucket page */
+}	xl_hash_delete;
+
+#define SizeOfHashDelete	(offsetof(xl_hash_delete, is_primary_bucket_page) + sizeof(bool))
+
+/*
+ * This is what we need for metapage update operation.
+ *
+ * This data record is used for XLOG_HASH_UPDATE_META_PAGE
+ *
+ * Backup Blk 0: meta page
+ */
+typedef struct xl_hash_update_meta_page
+{
+	double		ntuples;
+}	xl_hash_update_meta_page;
+
+#define SizeOfHashUpdateMetaPage	\
+	(offsetof(xl_hash_update_meta_page, ntuples) + sizeof(double))
+
+/*
+ * This is what we need to initialize metapage.
+ *
+ * This data record is used for XLOG_HASH_INIT_META_PAGE
+ *
+ * Backup Blk 0: meta page
+ */
+typedef struct xl_hash_init_meta_page
+{
+	double		num_tuples;
+	RegProcedure procid;
+	uint16		ffactor;
+}	xl_hash_init_meta_page;
+
+#define SizeOfHashInitMetaPage		\
+	(offsetof(xl_hash_init_meta_page, ffactor) + sizeof(uint16))
+
+/*
+ * This is what we need to initialize bitmap page.
+ *
+ * This data record is used for XLOG_HASH_INIT_BITMAP_PAGE
+ *
+ * Backup Blk 0: bitmap page
+ * Backup Blk 1: meta page
+ */
+typedef struct xl_hash_init_bitmap_page
+{
+	uint16		bmsize;
+}	xl_hash_init_bitmap_page;
+
+#define SizeOfHashInitBitmapPage	\
+	(offsetof(xl_hash_init_bitmap_page, bmsize) + sizeof(uint16))
 
 extern void hash_redo(XLogReaderState *record);
 extern void hash_desc(StringInfo buf, XLogReaderState *record);
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index e519fdb0f69..26cd05933ca 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -2335,13 +2335,9 @@ Options: fastupdate=on, gin_pending_list_limit=128
 -- HASH
 --
 CREATE INDEX hash_i4_index ON hash_i4_heap USING hash (random int4_ops);
-WARNING:  hash indexes are not WAL-logged and their use is discouraged
 CREATE INDEX hash_name_index ON hash_name_heap USING hash (random name_ops);
-WARNING:  hash indexes are not WAL-logged and their use is discouraged
 CREATE INDEX hash_txt_index ON hash_txt_heap USING hash (random text_ops);
-WARNING:  hash indexes are not WAL-logged and their use is discouraged
 CREATE INDEX hash_f8_index ON hash_f8_heap USING hash (random float8_ops);
-WARNING:  hash indexes are not WAL-logged and their use is discouraged
 CREATE UNLOGGED TABLE unlogged_hash_table (id int4);
 CREATE INDEX unlogged_hash_index ON unlogged_hash_table USING hash (id int4_ops);
 DROP TABLE unlogged_hash_table;
@@ -2350,7 +2346,6 @@ DROP TABLE unlogged_hash_table;
 -- maintenance_work_mem setting and fillfactor:
 SET maintenance_work_mem = '1MB';
 CREATE INDEX hash_tuplesort_idx ON tenk1 USING hash (stringu1 name_ops) WITH (fillfactor = 10);
-WARNING:  hash indexes are not WAL-logged and their use is discouraged
 EXPLAIN (COSTS OFF)
 SELECT count(*) FROM tenk1 WHERE stringu1 = 'TVAAAA';
                       QUERY PLAN                       
diff --git a/src/test/regress/expected/enum.out b/src/test/regress/expected/enum.out
index 514d1d01a10..0e6030443f9 100644
--- a/src/test/regress/expected/enum.out
+++ b/src/test/regress/expected/enum.out
@@ -383,7 +383,6 @@ DROP INDEX enumtest_btree;
 -- Hash index / opclass with the = operator
 --
 CREATE INDEX enumtest_hash ON enumtest USING hash (col);
-WARNING:  hash indexes are not WAL-logged and their use is discouraged
 SELECT * FROM enumtest WHERE col = 'orange';
   col   
 --------
diff --git a/src/test/regress/expected/hash_index.out b/src/test/regress/expected/hash_index.out
index f8b9f029b21..0a18efacfc4 100644
--- a/src/test/regress/expected/hash_index.out
+++ b/src/test/regress/expected/hash_index.out
@@ -201,7 +201,6 @@ SELECT h.seqno AS f20000
 --
 CREATE TABLE hash_split_heap (keycol INT);
 CREATE INDEX hash_split_index on hash_split_heap USING HASH (keycol);
-WARNING:  hash indexes are not WAL-logged and their use is discouraged
 INSERT INTO hash_split_heap SELECT 1 FROM generate_series(1, 70000) a;
 VACUUM FULL hash_split_heap;
 -- Let's do a backward scan.
@@ -230,5 +229,4 @@ DROP TABLE hash_temp_heap CASCADE;
 CREATE TABLE hash_heap_float4 (x float4, y int);
 INSERT INTO hash_heap_float4 VALUES (1.1,1);
 CREATE INDEX hash_idx ON hash_heap_float4 USING hash (x);
-WARNING:  hash indexes are not WAL-logged and their use is discouraged
 DROP TABLE hash_heap_float4 CASCADE;
diff --git a/src/test/regress/expected/macaddr.out b/src/test/regress/expected/macaddr.out
index e84ff5f8c0a..151f9ce59bb 100644
--- a/src/test/regress/expected/macaddr.out
+++ b/src/test/regress/expected/macaddr.out
@@ -41,7 +41,6 @@ SELECT * FROM macaddr_data;
 
 CREATE INDEX macaddr_data_btree ON macaddr_data USING btree (b);
 CREATE INDEX macaddr_data_hash ON macaddr_data USING hash (b);
-WARNING:  hash indexes are not WAL-logged and their use is discouraged
 SELECT a, b, trunc(b) FROM macaddr_data ORDER BY 2, 1;
  a  |         b         |       trunc       
 ----+-------------------+-------------------
diff --git a/src/test/regress/expected/replica_identity.out b/src/test/regress/expected/replica_identity.out
index fa63235fc9d..67c34a92a4e 100644
--- a/src/test/regress/expected/replica_identity.out
+++ b/src/test/regress/expected/replica_identity.out
@@ -12,7 +12,6 @@ CREATE UNIQUE INDEX test_replica_identity_keyab_key ON test_replica_identity (ke
 CREATE UNIQUE INDEX test_replica_identity_oid_idx ON test_replica_identity (oid);
 CREATE UNIQUE INDEX test_replica_identity_nonkey ON test_replica_identity (keya, nonkey);
 CREATE INDEX test_replica_identity_hash ON test_replica_identity USING hash (nonkey);
-WARNING:  hash indexes are not WAL-logged and their use is discouraged
 CREATE UNIQUE INDEX test_replica_identity_expr ON test_replica_identity (keya, keyb, (3));
 CREATE UNIQUE INDEX test_replica_identity_partial ON test_replica_identity (keya, keyb) WHERE keyb != '3';
 -- default is 'd'/DEFAULT for user created tables
diff --git a/src/test/regress/expected/uuid.out b/src/test/regress/expected/uuid.out
index 423f27787f3..db66dc723ef 100644
--- a/src/test/regress/expected/uuid.out
+++ b/src/test/regress/expected/uuid.out
@@ -114,7 +114,6 @@ SELECT COUNT(*) FROM guid1 WHERE guid_field >= '22222222-2222-2222-2222-22222222
 -- btree and hash index creation test
 CREATE INDEX guid1_btree ON guid1 USING BTREE (guid_field);
 CREATE INDEX guid1_hash  ON guid1 USING HASH  (guid_field);
-WARNING:  hash indexes are not WAL-logged and their use is discouraged
 -- unique index test
 CREATE UNIQUE INDEX guid1_unique_BTREE ON guid1 USING BTREE (guid_field);
 -- should fail