static Buffer brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
- bool *was_extended);
+ bool *extended);
static Size br_page_get_freespace(Page page);
+static void brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer);
/*
BrinTuple *oldtup;
Size oldsz;
Buffer newbuf;
- bool extended = false;
+ bool extended;
Assert(newsz == MAXALIGN(newsz));
{
/* need a page on which to put the item */
newbuf = brin_getinsertbuffer(idxrel, oldbuf, newsz, &extended);
- /* XXX delay vacuuming FSM until locks are released? */
- if (extended)
- FreeSpaceMapVacuum(idxrel);
if (!BufferIsValid(newbuf))
+ {
+ Assert(!extended);
return false;
+ }
/*
* Note: it's possible (though unlikely) that the returned newbuf is
* buffer does in fact have enough space.
*/
if (newbuf == oldbuf)
+ {
+ Assert(!extended);
newbuf = InvalidBuffer;
+ }
}
else
{
if (!ItemIdIsNormal(oldlp))
{
LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+
+ /*
+ * If this happens, and the new buffer was obtained by extending the
+ * relation, then we need to ensure we don't leave it uninitialized or
+ * forget about it.
+ */
if (BufferIsValid(newbuf))
+ {
+ if (extended)
+ brin_initialize_empty_new_buffer(idxrel, newbuf);
UnlockReleaseBuffer(newbuf);
+ if (extended)
+ FreeSpaceMapVacuum(idxrel);
+ }
return false;
}
{
LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
if (BufferIsValid(newbuf))
+ {
+ if (extended)
+ brin_initialize_empty_new_buffer(idxrel, newbuf);
UnlockReleaseBuffer(newbuf);
+ if (extended)
+ FreeSpaceMapVacuum(idxrel);
+ }
return false;
}
brin_can_do_samepage_update(oldbuf, origsz, newsz))
{
if (BufferIsValid(newbuf))
+ {
+ /* as above */
+ if (extended)
+ brin_initialize_empty_new_buffer(idxrel, newbuf);
UnlockReleaseBuffer(newbuf);
+ }
START_CRIT_SECTION();
PageIndexDeleteNoCompact(oldpage, &oldoff, 1);
END_CRIT_SECTION();
LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+
+ if (extended)
+ FreeSpaceMapVacuum(idxrel);
+
return true;
}
else if (newbuf == InvalidBuffer)
Buffer revmapbuf;
ItemPointerData newtid;
OffsetNumber newoff;
+ BlockNumber newblk = InvalidBlockNumber;
+ Size freespace = 0;
revmapbuf = brinLockRevmapPageForUpdate(revmap, heapBlk);
START_CRIT_SECTION();
+ /*
+ * We need to initialize the page if it's newly obtained. Note we
+ * will WAL-log the initialization as part of the update, so we don't
+ * need to do that here.
+ */
+ if (extended)
+ brin_page_init(BufferGetPage(newbuf), BRIN_PAGETYPE_REGULAR);
+
PageIndexDeleteNoCompact(oldpage, &oldoff, 1);
newoff = PageAddItem(newpage, (Item) newtup, newsz,
InvalidOffsetNumber, false, false);
MarkBufferDirty(oldbuf);
MarkBufferDirty(newbuf);
+ /* needed to update FSM below */
+ if (extended)
+ {
+ newblk = BufferGetBlockNumber(newbuf);
+ freespace = br_page_get_freespace(newpage);
+ }
+
ItemPointerSet(&newtid, BufferGetBlockNumber(newbuf), newoff);
brinSetHeapBlockItemptr(revmapbuf, pagesPerRange, heapBlk, newtid);
MarkBufferDirty(revmapbuf);
LockBuffer(revmapbuf, BUFFER_LOCK_UNLOCK);
LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
UnlockReleaseBuffer(newbuf);
+
+ if (extended)
+ {
+ Assert(BlockNumberIsValid(newblk));
+ RecordPageWithFreeSpace(idxrel, newblk, freespace);
+ FreeSpaceMapVacuum(idxrel);
+ }
+
return true;
}
}
OffsetNumber off;
Buffer revmapbuf;
ItemPointerData tid;
- bool extended = false;
+ bool extended;
Assert(itemsz == MAXALIGN(itemsz));
{
*buffer = brin_getinsertbuffer(idxrel, InvalidBuffer, itemsz, &extended);
Assert(BufferIsValid(*buffer));
- Assert(br_page_get_freespace(BufferGetPage(*buffer)) >= itemsz);
+ Assert(extended || br_page_get_freespace(BufferGetPage(*buffer)) >= itemsz);
}
/* Now obtain lock on revmap buffer */
blk = BufferGetBlockNumber(*buffer);
START_CRIT_SECTION();
+ if (extended)
+ brin_page_init(BufferGetPage(*buffer), BRIN_PAGETYPE_REGULAR);
off = PageAddItem(page, (Item) tup, itemsz, InvalidOffsetNumber,
false, false);
if (off == InvalidOffsetNumber)
UnlockReleaseBuffer(buf);
}
+/*
+ * Given a BRIN index page, initialize it if necessary, and record it into the
+ * FSM if necessary. Return value is true if the FSM itself needs "vacuuming".
+ * The main use for this is when, during vacuuming, an uninitialized page is
+ * found, which could be the result of relation extension followed by a crash
+ * before the page can be used.
+ */
+bool
+brin_page_cleanup(Relation idxrel, Buffer buf)
+{
+ Page page = BufferGetPage(buf);
+ Size freespace;
+
+ /*
+ * If a page was left uninitialized, initialize it now; also record it in
+ * FSM.
+ *
+ * Somebody else might be extending the relation concurrently. To avoid
+ * re-initializing the page before they can grab the buffer lock, we
+ * acquire the extension lock momentarily. Since they hold the extension
+ * lock from before getting the page and after its been initialized, we're
+ * sure to see their initialization.
+ */
+ if (PageIsNew(page))
+ {
+ LockRelationForExtension(idxrel, ShareLock);
+ UnlockRelationForExtension(idxrel, ShareLock);
+
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ if (PageIsNew(page))
+ {
+ brin_initialize_empty_new_buffer(idxrel, buf);
+ LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+ return true;
+ }
+ LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+ }
+
+ /* Nothing to be done for non-regular index pages */
+ if (BRIN_IS_META_PAGE(BufferGetPage(buf)) ||
+ BRIN_IS_REVMAP_PAGE(BufferGetPage(buf)))
+ return false;
+
+ /* Measure free space and record it */
+ freespace = br_page_get_freespace(page);
+ if (freespace > GetRecordedFreeSpace(idxrel, BufferGetBlockNumber(buf)))
+ {
+ RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buf), freespace);
+ return true;
+ }
+
+ return false;
+}
+
/*
* Return a pinned and exclusively locked buffer which can be used to insert an
* index item of size itemsz. If oldbuf is a valid buffer, it is also locked
* (in an order determined to avoid deadlocks.)
*
- * If there's no existing page with enough free space to accommodate the new
- * item, the relation is extended. If this happens, *extended is set to true.
- *
* If we find that the old page is no longer a regular index page (because
* of a revmap extension), the old buffer is unlocked and we return
* InvalidBuffer.
+ *
+ * If there's no existing page with enough free space to accommodate the new
+ * item, the relation is extended. If this happens, *extended is set to true,
+ * and it is the caller's responsibility to initialize the page (and WAL-log
+ * that fact) prior to use.
+ *
+ * Note that in some corner cases it is possible for this routine to extend the
+ * relation and then not return the buffer. It is this routine's
+ * responsibility to WAL-log the page initialization and to record the page in
+ * FSM if that happens. Such a buffer may later be reused by this routine.
*/
static Buffer
brin_getinsertbuffer(Relation irel, Buffer oldbuf, Size itemsz,
- bool *was_extended)
+ bool *extended)
{
BlockNumber oldblk;
BlockNumber newblk;
Page page;
int freespace;
+ /*
+ * If the item is oversized, don't bother. We have another, more precise
+ * check below.
+ */
+ if (itemsz > BLCKSZ - sizeof(BrinSpecialSpace))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("index row size %lu exceeds maximum %lu for index \"%s\"",
+ (unsigned long) itemsz,
+ (unsigned long) BLCKSZ - sizeof(BrinSpecialSpace),
+ RelationGetRelationName(irel))));
+ return InvalidBuffer; /* keep compiler quiet */
+ }
+
+ *extended = false;
+
if (BufferIsValid(oldbuf))
oldblk = BufferGetBlockNumber(oldbuf);
else
{
Buffer buf;
bool extensionLockHeld = false;
- bool extended = false;
CHECK_FOR_INTERRUPTS();
}
buf = ReadBuffer(irel, P_NEW);
newblk = BufferGetBlockNumber(buf);
- *was_extended = extended = true;
+ *extended = true;
BRIN_elog((DEBUG2, "brin_getinsertbuffer: extending to page %u",
BufferGetBlockNumber(buf)));
if (!BRIN_IS_REGULAR_PAGE(BufferGetPage(oldbuf)))
{
LockBuffer(oldbuf, BUFFER_LOCK_UNLOCK);
+
+ /*
+ * It is possible that the new page was obtained from
+ * extending the relation. In that case, we must be sure to
+ * record it in the FSM before leaving, because otherwise the
+ * space would be lost forever. However, we cannot let an
+ * uninitialized page get in the FSM, so we need to initialize
+ * it first.
+ */
+ if (*extended)
+ {
+ brin_initialize_empty_new_buffer(irel, buf);
+ /* shouldn't matter, but don't confuse caller */
+ *extended = false;
+ }
+
+ if (extensionLockHeld)
+ UnlockRelationForExtension(irel, ExclusiveLock);
+
ReleaseBuffer(buf);
return InvalidBuffer;
}
page = BufferGetPage(buf);
- if (extended)
- brin_page_init(page, BRIN_PAGETYPE_REGULAR);
-
/*
* We have a new buffer to insert into. Check that the new page has
* enough free space, and return it if it does; otherwise start over.
* (br_page_get_freespace also checks that the FSM didn't hand us a
* page that has since been repurposed for the revmap.)
*/
- freespace = br_page_get_freespace(page);
+ freespace = *extended ?
+ BLCKSZ - sizeof(BrinSpecialSpace) : br_page_get_freespace(page);
if (freespace >= itemsz)
{
RelationSetTargetBlock(irel, BufferGetBlockNumber(buf));
* invalidations, make sure we update the more permanent FSM with
* data about it before going away.
*/
- if (extended)
+ if (*extended)
RecordPageWithFreeSpace(irel, BufferGetBlockNumber(buf),
freespace);
/*
* If an entirely new page does not contain enough free space for the
* new item, then surely that item is oversized. Complain loudly; but
- * first make sure we record the page as free, for next time.
+ * first make sure we initialize the page and record it as free, for
+ * next time.
*/
- if (extended)
+ if (*extended)
{
- RecordPageWithFreeSpace(irel, BufferGetBlockNumber(buf),
- freespace);
+ brin_initialize_empty_new_buffer(irel, buf);
+
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("index row size %lu exceeds maximum %lu for index \"%s\"",
}
}
+/*
+ * Initialize a page as an empty regular BRIN page, WAL-log this, and record
+ * the page in FSM.
+ *
+ * There are several corner situations in which we extend the relation to
+ * obtain a new page and later find that we cannot use it immediately. When
+ * that happens, we don't want to leave the page go unrecorded in FSM, because
+ * there is no mechanism to get the space back and the index would bloat.
+ * Also, because we would not WAL-log the action that would initialize the
+ * page, the page would go uninitialized in a standby (or after recovery).
+ */
+static void
+brin_initialize_empty_new_buffer(Relation idxrel, Buffer buffer)
+{
+ Page page;
+
+ BRIN_elog((DEBUG2,
+ "brin_initialize_empty_new_buffer: initializing blank page %u",
+ BufferGetBlockNumber(buffer)));
+
+ START_CRIT_SECTION();
+ page = BufferGetPage(buffer);
+ brin_page_init(page, BRIN_PAGETYPE_REGULAR);
+ MarkBufferDirty(buffer);
+ log_newpage_buffer(buffer, true);
+ END_CRIT_SECTION();
+
+ /*
+ * We update the FSM for this page, but this is not WAL-logged. This is
+ * acceptable because VACUUM will scan the index and update the FSM with
+ * pages whose FSM records were forgotten in a crash.
+ */
+ RecordPageWithFreeSpace(idxrel, BufferGetBlockNumber(buffer),
+ br_page_get_freespace(page));
+}
+
+
/*
* Return the amount of free space on a regular BRIN index page.
*