From 94979ef4d5dbb218afd099aedb3ae26cd5ba7350 Mon Sep 17 00:00:00 2001 From: ab2020c Date: Thu, 2 Mar 2023 16:36:12 +0800 Subject: [PATCH] =?UTF-8?q?=E5=90=88=E5=B9=B6pg=20pr=EF=BC=9APrivateRefCou?= =?UTF-8?q?nt=E7=9B=B8=E5=85=B3=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../process/threadpool/knl_thread.cpp | 1 + .../storage/access/transam/double_write.cpp | 2 + src/gausskernel/storage/buffer/bufmgr.cpp | 409 +++++++++--------- src/gausskernel/storage/nvm/nvmbuffer.cpp | 17 +- .../storage/smgr/segment/segbuffer.cpp | 38 +- src/include/knl/knl_thread.h | 12 +- src/include/storage/buf/bufmgr.h | 2 + 7 files changed, 250 insertions(+), 231 deletions(-) diff --git a/src/gausskernel/process/threadpool/knl_thread.cpp b/src/gausskernel/process/threadpool/knl_thread.cpp index f285832077..473a2efebf 100755 --- a/src/gausskernel/process/threadpool/knl_thread.cpp +++ b/src/gausskernel/process/threadpool/knl_thread.cpp @@ -1326,6 +1326,7 @@ static void knl_t_storage_init(knl_t_storage_context* storage_cxt) storage_cxt->PrivateRefCountHash = NULL; storage_cxt->PrivateRefCountOverflowed = 0; storage_cxt->PrivateRefCountClock = 0; + storage_cxt->ReservedRefCountEntry = NULL; storage_cxt->saved_info_valid = false; storage_cxt->prev_strategy_buf_id = 0; storage_cxt->prev_strategy_passes = 0; diff --git a/src/gausskernel/storage/access/transam/double_write.cpp b/src/gausskernel/storage/access/transam/double_write.cpp index a6119cbf80..4cc88c68a1 100644 --- a/src/gausskernel/storage/access/transam/double_write.cpp +++ b/src/gausskernel/storage/access/transam/double_write.cpp @@ -2106,6 +2106,8 @@ static XLogRecPtr dw_copy_page(ThrdDwCxt* thrd_dw_cxt, int buf_desc_id, bool* is errno_t rc; *is_skipped = true; + ReservePrivateRefCountEntry(); + buf_desc = GetBufferDescriptor(buf_desc_id); buf_state = LockBufHdr(buf_desc); if (!dw_buf_ckpt_needed(buf_state)) { diff --git a/src/gausskernel/storage/buffer/bufmgr.cpp b/src/gausskernel/storage/buffer/bufmgr.cpp index ecdbc2ade6..0501d95ac1 100644 --- a/src/gausskernel/storage/buffer/bufmgr.cpp +++ b/src/gausskernel/storage/buffer/bufmgr.cpp @@ -133,183 +133,167 @@ static Buffer ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumb static void TerminateBufferIO_common(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits); /* - * Return the PrivateRefCount entry for the passed buffer. It is searched - * only in PrivateRefCountArray which makes this function very short and - * suitable to be inline. - * For complete search, GetPrivateRefCountEntrySlow should be invoked after. - * - * Only works for shared buffers. + * Ensure that the the PrivateRefCountArray has sufficient space to store one + * more entry. This has to be called before using NewPrivateRefCountEntry() to + * fill a new entry - but it's perfectly fine to not use a reserved entry. */ -static PrivateRefCountEntry* GetPrivateRefCountEntryFast(Buffer buffer, PrivateRefCountEntry* &free_entry) +void ReservePrivateRefCountEntry(void) { - PrivateRefCountEntry* res = NULL; - int i; - - Assert(BufferIsValid(buffer)); - Assert(!BufferIsLocal(buffer)); + /* Already reserved (or freed), nothing to do */ + if (t_thrd.storage_cxt.ReservedRefCountEntry != NULL) + return; /* - * First search for references in the array, that'll be sufficient in the - * majority of cases. - */ - for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++) { - res = &t_thrd.storage_cxt.PrivateRefCountArray[i]; + * First search for a free entry the array, that'll be sufficient in the + * majority of cases. + */ + { + int i; - if (res->buffer == buffer) { - return res; - } + for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++) { + PrivateRefCountEntry *res; + + res = &t_thrd.storage_cxt.PrivateRefCountArray[i]; - /* Remember where to put a new refcount, should it become necessary. */ - if (free_entry == NULL && res->buffer == InvalidBuffer) { - free_entry = res; + if (res->buffer == InvalidBuffer) { + t_thrd.storage_cxt.ReservedRefCountEntry = res; + return; + } } } - return NULL; -} - -/* - * Return the PrivateRefCount entry for the passed buffer. - * - * This function will be based on the result of GetPrivateRefCountEntryFast - * to provide complete search, which would be slow. - * - * Returns NULL if create = false is passed and the buffer doesn't have a - * PrivateRefCount entry; allocates a new PrivateRefCountEntry if currently - * none exists and create = true is passed. - * - * If do_move is true - only allowed for create = false - the entry is - * optimized for frequent access. - * - * When a returned refcount entry isn't used anymore it has to be forgotten, - * using ForgetPrivateRefCountEntry(). - * - * Only works for shared buffers. - */ -static PrivateRefCountEntry* GetPrivateRefCountEntrySlow(Buffer buffer, - bool create, bool do_move, PrivateRefCountEntry* free_entry) -{ - Assert(!create || do_move); - Assert(BufferIsValid(buffer)); - Assert(!BufferIsLocal(buffer)); - - /* - * By here we know that the buffer, if already pinned, isn't residing in - * the array. - */ - PrivateRefCountEntry* res = NULL; - bool found = false; /* - * Look up the buffer in the hashtable if we've previously overflowed into - * it. - */ - if (t_thrd.storage_cxt.PrivateRefCountOverflowed > 0) { - res = (PrivateRefCountEntry *)hash_search(t_thrd.storage_cxt.PrivateRefCountHash, (void *)&buffer, HASH_FIND, - &found); - } + * No luck. All array entries are full. Move one array entry into the hash + * table. + */ + { + /* + * Move entry from the current clock position in the array into the + * hashtable. Use that slot. + */ + PrivateRefCountEntry *hashent; + bool found; - if (!found) { - if (!create) { - /* Neither array nor hash have an entry and no new entry is needed */ - return NULL; - } else if (free_entry != NULL) { - /* add entry into the free array slot */ - free_entry->buffer = buffer; - free_entry->refcount = 0; + /* select victim slot */ + t_thrd.storage_cxt.ReservedRefCountEntry = + &t_thrd.storage_cxt.PrivateRefCountArray[t_thrd.storage_cxt.PrivateRefCountClock++ % REFCOUNT_ARRAY_ENTRIES]; - return free_entry; - } else { - /* - * Move entry from the current clock position in the array into the - * hashtable. Use that slot. - */ - PrivateRefCountEntry *array_ent = NULL; - PrivateRefCountEntry *hash_ent = NULL; + /* Better be used, otherwise we shouldn't get here. */ + Assert(t_thrd.storage_cxt.ReservedRefCountEntry->buffer != InvalidBuffer); - /* select victim slot */ - array_ent = &t_thrd.storage_cxt - .PrivateRefCountArray[t_thrd.storage_cxt.PrivateRefCountClock++ % REFCOUNT_ARRAY_ENTRIES]; - Assert(array_ent->buffer != InvalidBuffer); + /* enter victim array entry into hashtable */ + hashent = (PrivateRefCountEntry*) hash_search(t_thrd.storage_cxt.PrivateRefCountHash, + (void *) &(t_thrd.storage_cxt.ReservedRefCountEntry->buffer), + HASH_ENTER, + &found); + Assert(!found); + hashent->refcount = t_thrd.storage_cxt.ReservedRefCountEntry->refcount; - /* enter victim array entry into hashtable */ - hash_ent = (PrivateRefCountEntry *)hash_search(t_thrd.storage_cxt.PrivateRefCountHash, - (void *)&array_ent->buffer, HASH_ENTER, &found); - Assert(!found); - hash_ent->refcount = array_ent->refcount; + /* clear the now free array slot */ + t_thrd.storage_cxt.ReservedRefCountEntry->buffer = InvalidBuffer; + t_thrd.storage_cxt.ReservedRefCountEntry->refcount = 0; - /* fill the now free array slot */ - array_ent->buffer = buffer; - array_ent->refcount = 0; + t_thrd.storage_cxt.PrivateRefCountOverflowed++; + } +} - t_thrd.storage_cxt.PrivateRefCountOverflowed++; +/* + * Fill a previously reserved refcount entry. + */ +PrivateRefCountEntry* NewPrivateRefCountEntry(Buffer buffer) +{ + PrivateRefCountEntry *res; - return array_ent; - } - } else { - if (!do_move) { - return res; - } else if (found && free_entry != NULL) { - /* move buffer from hashtable into the free array slot - * - * fill array slot - */ - free_entry->buffer = buffer; - free_entry->refcount = res->refcount; + /* only allowed to be called when a reservation has been made */ + Assert(t_thrd.storage_cxt.ReservedRefCountEntry != NULL); - /* delete from hashtable */ - (void)hash_search(t_thrd.storage_cxt.PrivateRefCountHash, (void *)&buffer, HASH_REMOVE, &found); - Assert(found); - Assert(t_thrd.storage_cxt.PrivateRefCountOverflowed > 0); - t_thrd.storage_cxt.PrivateRefCountOverflowed--; + /* use up the reserved entry */ + res = t_thrd.storage_cxt.ReservedRefCountEntry; + t_thrd.storage_cxt.ReservedRefCountEntry = NULL; - return free_entry; - } else { - /* - * Swap the entry in the hash table with the one in the array at the - * current clock position. - */ - PrivateRefCountEntry *array_ent = NULL; - PrivateRefCountEntry *hash_ent = NULL; + /* and fill it */ + res->buffer = buffer; + res->refcount = 0; - /* select victim slot */ - array_ent = &t_thrd.storage_cxt - .PrivateRefCountArray[t_thrd.storage_cxt.PrivateRefCountClock++ % REFCOUNT_ARRAY_ENTRIES]; - Assert(array_ent->buffer != InvalidBuffer); + return res; +} - /* enter victim entry into the hashtable */ - hash_ent = (PrivateRefCountEntry *)hash_search(t_thrd.storage_cxt.PrivateRefCountHash, - (void *)&array_ent->buffer, HASH_ENTER, &found); - Assert(!found); - hash_ent->refcount = array_ent->refcount; +/* + * Return the PrivateRefCount entry for the passed buffer. + * + * Returns NULL if a buffer doesn't have a refcount entry. Otherwise, if + * do_move is true, and the entry resides in the hashtable the entry is + * optimized for frequent access by moving it to the array. + */ +PrivateRefCountEntry *GetPrivateRefCountEntry(Buffer buffer, bool do_move) +{ + PrivateRefCountEntry* res = NULL; + int i; - /* fill now free array entry with previously searched entry */ - array_ent->buffer = res->buffer; - array_ent->refcount = res->refcount; + Assert(BufferIsValid(buffer)); + Assert(!BufferIsLocal(buffer)); - /* and remove the old entry */ - (void)hash_search(t_thrd.storage_cxt.PrivateRefCountHash, (void *)&array_ent->buffer, HASH_REMOVE, &found); - Assert(found); + /* + * First search for references in the array, that'll be sufficient in the + * majority of cases. + */ + for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++) { + res = &t_thrd.storage_cxt.PrivateRefCountArray[i]; - /* PrivateRefCountOverflowed stays the same -1 + +1 = 0 */ - return array_ent; + if (res->buffer == buffer) { + return res; } } - return NULL; -} + /* + * By here we know that the buffer, if already pinned, isn't residing in + * the array. + * + * Only look up the buffer in the hashtable if we've previously overflowed + * into it. + */ + if (t_thrd.storage_cxt.PrivateRefCountOverflowed == 0) + return NULL; -/* A combination of GetPrivateRefCountEntryFast & GetPrivateRefCountEntrySlow. */ -PrivateRefCountEntry *GetPrivateRefCountEntry(Buffer buffer, bool create, bool do_move) -{ - PrivateRefCountEntry *free_entry = NULL; - PrivateRefCountEntry *ref = NULL; + res = (PrivateRefCountEntry*) hash_search(t_thrd.storage_cxt.PrivateRefCountHash, + (void *) &buffer, + HASH_FIND, + NULL); - ref = GetPrivateRefCountEntryFast(buffer, free_entry); - if (ref == NULL) { - ref = GetPrivateRefCountEntrySlow(buffer, create, do_move, free_entry); - } + if (res == NULL) { + return NULL; + } else if (!do_move) { + /* caller doesn't want us to move the hash entry into the array */ + return res; + } else { + /* move buffer from hashtable into the free array slot */ + bool found; + PrivateRefCountEntry *free; + + /* Ensure there's a free array slot */ + ReservePrivateRefCountEntry(); + + /* Use up the reserved slot */ + Assert(t_thrd.storage_cxt.ReservedRefCountEntry != NULL); + free = t_thrd.storage_cxt.ReservedRefCountEntry; + t_thrd.storage_cxt.ReservedRefCountEntry = NULL; + Assert(free->buffer == InvalidBuffer); + + /* and fill it */ + free->buffer = buffer; + free->refcount = res->refcount; + + /* delete from hashtable */ + (void)hash_search(t_thrd.storage_cxt.PrivateRefCountHash, + (void *) &buffer, + HASH_REMOVE, + &found); + Assert(found); + Assert(t_thrd.storage_cxt.PrivateRefCountOverflowed > 0); + t_thrd.storage_cxt.PrivateRefCountOverflowed--; - return ref; + return free; + } } /* @@ -324,11 +308,12 @@ static int32 GetPrivateRefCount(Buffer buffer) Assert(BufferIsValid(buffer)); Assert(!BufferIsLocal(buffer)); - PrivateRefCountEntry *free_entry = NULL; - ref = GetPrivateRefCountEntryFast(buffer, free_entry); - if (ref == NULL) { - ref = GetPrivateRefCountEntrySlow(buffer, false, false, free_entry); - } + /* + * Not moving the entry - that's ok for the current users, but we might + * want to change this one day. + */ + ref = GetPrivateRefCountEntry(buffer, false); + if (ref == NULL) { return 0; } @@ -346,6 +331,12 @@ void ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref) if (ref >= &t_thrd.storage_cxt.PrivateRefCountArray[0] && ref < &t_thrd.storage_cxt.PrivateRefCountArray[REFCOUNT_ARRAY_ENTRIES]) { ref->buffer = InvalidBuffer; + /* + * Mark the just used entry as reserved - in many scenarios that + * allows us to avoid ever having to search the array/hash for free + * entries. + */ + t_thrd.storage_cxt.ReservedRefCountEntry = ref; } else { bool found = false; Buffer buffer = ref->buffer; @@ -604,6 +595,7 @@ static volatile BufferDesc *PageListBufferAlloc(SMgrRelation smgr, char relpersi * Loop here in case we have to try another victim buffer */ for (;;) { + ReservePrivateRefCountEntry(); /* * Select a victim buffer. * The buffer is returned with its header spinlock still held! @@ -1191,6 +1183,7 @@ void PageListBackWrite(uint32 *buf_list, int32 nbufs, uint32 flags = 0, SMgrRela /* Make sure we will have room to remember the buffer pin */ ResourceOwnerEnlargeBuffers(t_thrd.utils_cxt.CurrentResourceOwner); + ReservePrivateRefCountEntry(); /* * Check whether buffer needs writing. @@ -2730,6 +2723,11 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumbe /* Loop here in case we have to try another victim buffer */ for (;;) { bool needGetLock = false; + /* + * Ensure, while the spinlock's not yet held, that there's a free refcount + * entry. + */ + ReservePrivateRefCountEntry(); /* * Select a victim buffer. The buffer is returned with its header * spinlock still held! @@ -3104,7 +3102,7 @@ retry: UnlockBufHdr(buf, buf_state); LWLockRelease(old_partition_lock); /* safety check: should definitely not be our *own* pin */ - if (GetPrivateRefCount(buf->buf_id + 1) > 0) { + if (GetPrivateRefCount(BufferDescriptorGetBuffer(buf)) > 0) { ereport(ERROR, (errcode(ERRCODE_INVALID_BUFFER), (errmsg("buffer is pinned in InvalidateBuffer %d", buf->buf_id)))); } @@ -3346,22 +3344,19 @@ Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation, BlockNumber block_ */ bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy) { - int b = buf->buf_id; + Buffer b = BufferDescriptorGetBuffer(buf); bool result = false; PrivateRefCountEntry *ref = NULL; - /* When the secondly and thirdly parameter all both true, the ret value must not be NULL. */ - PrivateRefCountEntry *free_entry = NULL; - ref = GetPrivateRefCountEntryFast(b + 1, free_entry); - if (ref == NULL) { - ref = GetPrivateRefCountEntrySlow(b + 1, true, true, free_entry); - } - Assert(ref != NULL); + ref = GetPrivateRefCountEntry(b, true); - if (ref->refcount == 0) { + if (ref == NULL) { uint32 buf_state; uint32 old_buf_state; + ReservePrivateRefCountEntry(); + ref = NewPrivateRefCountEntry(b); + old_buf_state = pg_atomic_read_u32(&buf->state); for (;;) { if (old_buf_state & BM_LOCKED) { @@ -3389,7 +3384,7 @@ bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy) } ref->refcount++; Assert(ref->refcount > 0); - ResourceOwnerRememberBuffer(t_thrd.utils_cxt.CurrentResourceOwner, BufferDescriptorGetBuffer(buf)); + ResourceOwnerRememberBuffer(t_thrd.utils_cxt.CurrentResourceOwner, b); return result; } @@ -3397,27 +3392,34 @@ bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy) * PinBuffer_Locked -- as above, but caller already locked the buffer header. * The spinlock is released before return. * + * As this function is called with the spinlock held, the caller has to + * previously call ReservePrivateRefCountEntry(). + * * Currently, no callers of this function want to modify the buffer's * usage_count at all, so there's no need for a strategy parameter. * Also we don't bother with a BM_VALID test (the caller could check that for * itself). * + * Also all callers only ever use this function when it's known that the + * buffer can't have a preexisting pin by this backend. That allows us to skip + * searching the private refcount array & hash, which is a boon, because the + * spinlock is still held. + * * Note: use of this routine is frequently mandatory, not just an optimization * to save a spin lock/unlock cycle, because we need to pin a buffer before * its state can change under us. */ void PinBuffer_Locked(volatile BufferDesc *buf) { - int b = buf->buf_id; + Buffer b; PrivateRefCountEntry *ref = NULL; uint32 buf_state; - /* if error happend in GetPrivateRefCountEntry , can not do UnlockBufHdr */ - PrivateRefCountEntry *free_entry = NULL; - ref = GetPrivateRefCountEntryFast(b + 1, free_entry); - if (ref == NULL) { - ref = GetPrivateRefCountEntrySlow(b + 1, true, true, free_entry); - } + /* + * As explained, We don't expect any preexisting pins. That allows us to + * manipulate the PrivateRefCount after releasing the spinlock + */ + Assert(GetPrivateRefCountEntry(BufferDescriptorGetBuffer(buf), false) == NULL); /* * Since we hold the buffer spinlock, we can update the buffer state and @@ -3425,14 +3427,15 @@ void PinBuffer_Locked(volatile BufferDesc *buf) */ buf_state = pg_atomic_read_u32(&buf->state); Assert(buf_state & BM_LOCKED); - - if (ref->refcount == 0) { - buf_state += BUF_REFCOUNT_ONE; - } + buf_state += BUF_REFCOUNT_ONE; UnlockBufHdr(buf, buf_state); + + b = BufferDescriptorGetBuffer(buf); + + ref = NewPrivateRefCountEntry(b); ref->refcount++; - Assert(ref->refcount > 0); - ResourceOwnerRememberBuffer(t_thrd.utils_cxt.CurrentResourceOwner, BufferDescriptorGetBuffer(buf)); + + ResourceOwnerRememberBuffer(t_thrd.utils_cxt.CurrentResourceOwner, b); } /* @@ -3446,18 +3449,14 @@ void PinBuffer_Locked(volatile BufferDesc *buf) void UnpinBuffer(BufferDesc *buf, bool fixOwner) { PrivateRefCountEntry *ref = NULL; - int b = buf->buf_id; + Buffer b = BufferDescriptorGetBuffer(buf); - /* if error happend in GetPrivateRefCountEntry , can not do UnlockBufHdr */ - PrivateRefCountEntry *free_entry = NULL; - ref = GetPrivateRefCountEntryFast(b + 1, free_entry); - if (ref == NULL) { - ref = GetPrivateRefCountEntrySlow(b + 1, false, false, free_entry); - } + /* not moving as we're likely deleting it soon anyway */ + ref = GetPrivateRefCountEntry(b, false); Assert(ref != NULL); if (fixOwner) { - ResourceOwnerForgetBuffer(t_thrd.utils_cxt.CurrentResourceOwner, BufferDescriptorGetBuffer(buf)); + ResourceOwnerForgetBuffer(t_thrd.utils_cxt.CurrentResourceOwner, b); } if (ref->refcount <= 0) { ereport(PANIC, (errmsg("[exception] private ref->refcount is %d in UnpinBuffer", ref->refcount))); @@ -4187,6 +4186,9 @@ uint32 SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext* wb_c uint32 result = 0; BufferTag tag; uint32 buf_state; + + ReservePrivateRefCountEntry(); + /* * Check whether buffer needs writing. * @@ -5397,13 +5399,14 @@ void PrintBufferDescs(void) { int i; volatile BufferDesc *buf = t_thrd.storage_cxt.BufferDescriptors; + Buffer b = BufferDescriptorGetBuffer(buf); for (i = 0; i < TOTAL_BUFFER_NUM; ++i, ++buf) { /* theoretically we should lock the bufhdr here */ ereport(LOG, (errmsg("[%02d] (rel=%s, " "blockNum=%u, flags=0x%x, refcount=%u %d)", i, relpathbackend(buf->tag.rnode, InvalidBackendId, buf->tag.forkNum), buf->tag.blockNum, - buf->flags, buf->refcount, GetPrivateRefCount(i + 1)))); + buf->flags, buf->refcount, GetPrivateRefCount(b)))); } } #endif @@ -5413,14 +5416,15 @@ void PrintPinnedBufs(void) { int i; volatile BufferDesc *buf = t_thrd.storage_cxt.BufferDescriptors; + Buffer b = BufferDescriptorGetBuffer(buf); for (i = 0; i < TOTAL_BUFFER_NUM; ++i, ++buf) { - if (GetPrivateRefCount(i + 1) > 0) { + if (GetPrivateRefCount(b) > 0) { /* theoretically we should lock the bufhdr here */ ereport(LOG, (errmsg("[%02d] (rel=%s, " "blockNum=%u, flags=0x%x, refcount=%u %d)", i, relpath(buf->tag.rnode, buf->tag.forkNum), buf->tag.blockNum, buf->flags, - buf->refcount, GetPrivateRefCount(i + 1)))); + buf->refcount, GetPrivateRefCount(b)))); } } } @@ -5483,6 +5487,8 @@ void flush_all_buffers(Relation rel, Oid db_id, HTAB *hashtbl) continue; } + ReservePrivateRefCountEntry(); + buf_state = LockBufHdr(buf_desc); if (!flush_buffer_match(buf_desc, rel, db_id) || !dw_buf_valid_dirty(buf_state)) { UnlockBufHdr(buf_desc, buf_state); @@ -5564,36 +5570,19 @@ void FlushDatabaseBuffers(Oid dbid) */ void ReleaseBuffer(Buffer buffer) { - BufferDesc *buf_desc = NULL; - PrivateRefCountEntry *ref = NULL; - if (!BufferIsValid(buffer)) { ereport(ERROR, (errcode(ERRCODE_INVALID_BUFFER), (errmsg("bad buffer ID: %d", buffer)))); } - ResourceOwnerForgetBuffer(t_thrd.utils_cxt.CurrentResourceOwner, buffer); - if (BufferIsLocal(buffer)) { + ResourceOwnerForgetBuffer(t_thrd.utils_cxt.CurrentResourceOwner, buffer); + Assert(u_sess->storage_cxt.LocalRefCount[-buffer - 1] > 0); u_sess->storage_cxt.LocalRefCount[-buffer - 1]--; return; } - buf_desc = GetBufferDescriptor(buffer - 1); - - PrivateRefCountEntry *free_entry = NULL; - ref = GetPrivateRefCountEntryFast(buffer, free_entry); - if (ref == NULL) { - ref = GetPrivateRefCountEntrySlow(buffer, false, false, free_entry); - } - Assert(ref != NULL); - Assert(ref->refcount > 0); - - if (ref->refcount > 1) { - ref->refcount--; - } else { - UnpinBuffer(buf_desc, false); - } + UnpinBuffer(GetBufferDescriptor(buffer - 1), true); } /* @@ -5624,11 +5613,7 @@ void IncrBufferRefCount(Buffer buffer) u_sess->storage_cxt.LocalRefCount[-buffer - 1]++; } else { PrivateRefCountEntry* ref = NULL; - PrivateRefCountEntry *free_entry = NULL; - ref = GetPrivateRefCountEntryFast(buffer, free_entry); - if (ref == NULL) { - ref = GetPrivateRefCountEntrySlow(buffer, false, true, free_entry); - } + ref = GetPrivateRefCountEntry(buffer, true); Assert(ref != NULL); ref->refcount++; } diff --git a/src/gausskernel/storage/nvm/nvmbuffer.cpp b/src/gausskernel/storage/nvm/nvmbuffer.cpp index a7584b1929..5ade48cab9 100644 --- a/src/gausskernel/storage/nvm/nvmbuffer.cpp +++ b/src/gausskernel/storage/nvm/nvmbuffer.cpp @@ -33,7 +33,7 @@ #include "pgstat.h" static BufferDesc *NvmStrategyGetBuffer(uint32 *buf_state); -extern PrivateRefCountEntry *GetPrivateRefCountEntry(Buffer buffer, bool create, bool do_move); +extern PrivateRefCountEntry *GetPrivateRefCountEntry(Buffer buffer, bool do_move); static const int MILLISECOND_TO_MICROSECOND = 1000; static const int TEN_MILLISECOND = 10; @@ -89,8 +89,11 @@ static bool NvmPinBuffer(BufferDesc *buf, bool *migrate) } } - PrivateRefCountEntry *ref = GetPrivateRefCountEntry(b, true, true); - Assert(ref != NULL); + PrivateRefCountEntry *ref = GetPrivateRefCountEntry(b, true); + if (ref == NULL) { + ReservePrivateRefCountEntry(); + ref = NewPrivateRefCountEntry(b); + } ref->refcount++; Assert(ref->refcount > 0); ResourceOwnerRememberBuffer(t_thrd.utils_cxt.CurrentResourceOwner, b); @@ -103,7 +106,7 @@ static bool NvmPinBufferFast(BufferDesc *buf) PrivateRefCountEntry *ref = NULL; /* When the secondly and thirdly parameter all both true, the ret value must not be NULL. */ - ref = GetPrivateRefCountEntry(b, false, false); + ref = GetPrivateRefCountEntry(b, false); if (ref == NULL) { return false; @@ -397,6 +400,7 @@ restart: return nvmBuf; } else { for (;;) { + ReservePrivateRefCountEntry(); buf = (BufferDesc *)StrategyGetBuffer(strategy, &buf_state); old_flags = buf_state & BUF_FLAG_MASK; @@ -508,6 +512,11 @@ restart: /* Loop here in case we have to try another victim buffer */ for (;;) { bool needGetLock = false; + /* + * Ensure, while the spinlock's not yet held, that there's a free refcount + * entry. + */ + ReservePrivateRefCountEntry(); /* * Select a victim buffer. The buffer is returned with its header * spinlock still held! diff --git a/src/gausskernel/storage/smgr/segment/segbuffer.cpp b/src/gausskernel/storage/smgr/segment/segbuffer.cpp index 2de3fe8396..6f3b191f5a 100644 --- a/src/gausskernel/storage/smgr/segment/segbuffer.cpp +++ b/src/gausskernel/storage/smgr/segment/segbuffer.cpp @@ -50,7 +50,7 @@ static const int TEN_MICROSECOND = 10; static BufferDesc *SegStrategyGetBuffer(uint32 *buf_state); -extern PrivateRefCountEntry *GetPrivateRefCountEntry(Buffer buffer, bool create, bool do_move); +extern PrivateRefCountEntry *GetPrivateRefCountEntry(Buffer buffer, bool do_move); extern void ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref); void SetInProgressFlags(BufferDesc *bufDesc, bool input) @@ -163,14 +163,17 @@ bool SegPinBuffer(BufferDesc *buf) ResourceOwnerEnlargeBuffers(t_thrd.utils_cxt.CurrentResourceOwner); + Buffer b = BufferDescriptorGetBuffer(buf); bool result; - PrivateRefCountEntry * ref = GetPrivateRefCountEntry(BufferDescriptorGetBuffer(buf), true, true); - SegmentCheck(ref != NULL); + PrivateRefCountEntry * ref = GetPrivateRefCountEntry(b, true); - if (ref->refcount == 0) { + if (ref == NULL) { uint32 buf_state; uint32 old_buf_state = pg_atomic_read_u32(&buf->state); + ReservePrivateRefCountEntry(); + ref = NewPrivateRefCountEntry(b); + for (;;) { if (old_buf_state & BM_LOCKED) { old_buf_state = WaitBufHdrUnlocked(buf); @@ -188,7 +191,7 @@ bool SegPinBuffer(BufferDesc *buf) } ref->refcount++; - ResourceOwnerRememberBuffer(t_thrd.utils_cxt.CurrentResourceOwner, BufferDescriptorGetBuffer(buf)); + ResourceOwnerRememberBuffer(t_thrd.utils_cxt.CurrentResourceOwner, b); return result; } @@ -199,20 +202,27 @@ static bool SegPinBufferLocked(BufferDesc *buf, const BufferTag *tag) errmsg("[SegPinBufferLocked] (%u %u %u %d) %d %u ", tag->rnode.spcNode, tag->rnode.dbNode, tag->rnode.relNode, tag->rnode.bucketNode, tag->forkNum, tag->blockNum))); SegmentCheck(BufHdrLocked(buf)); - PrivateRefCountEntry * ref = GetPrivateRefCountEntry(BufferDescriptorGetBuffer(buf), true, true); - SegmentCheck(ref != NULL); + Buffer b; + PrivateRefCountEntry *ref = NULL; - uint32 buf_state = pg_atomic_read_u32(&buf->state); + /* + * As explained, We don't expect any preexisting pins. That allows us to + * manipulate the PrivateRefCount after releasing the spinlock + */ + Assert(GetPrivateRefCountEntry(BufferDescriptorGetBuffer(buf), false) == NULL); - if (ref->refcount == 0) { - buf_state += BUF_REFCOUNT_ONE; - } + uint32 buf_state = pg_atomic_read_u32(&buf->state); + Assert(buf_state & BM_LOCKED); + buf_state += BUF_REFCOUNT_ONE; UnlockBufHdr(buf, buf_state); + b = BufferDescriptorGetBuffer(buf); + + ref = NewPrivateRefCountEntry(b); ref->refcount++; ResourceOwnerEnlargeBuffers(t_thrd.utils_cxt.CurrentResourceOwner); - ResourceOwnerRememberBuffer(t_thrd.utils_cxt.CurrentResourceOwner, BufferDescriptorGetBuffer(buf)); + ResourceOwnerRememberBuffer(t_thrd.utils_cxt.CurrentResourceOwner, b); return buf_state & BM_VALID; } @@ -222,7 +232,7 @@ void SegUnpinBuffer(BufferDesc *buf) ereport(DEBUG5, (errmodule(MOD_SEGMENT_PAGE), errmsg("[SegUnpinBuffer] (%u %u %u %d) %d %u ", buf->tag.rnode.spcNode, buf->tag.rnode.dbNode, buf->tag.rnode.relNode, buf->tag.rnode.bucketNode, buf->tag.forkNum, buf->tag.blockNum))); - PrivateRefCountEntry * ref = GetPrivateRefCountEntry(BufferDescriptorGetBuffer(buf), true, true); + PrivateRefCountEntry * ref = GetPrivateRefCountEntry(BufferDescriptorGetBuffer(buf), true); SegmentCheck(ref != NULL); ResourceOwnerForgetBuffer(t_thrd.utils_cxt.CurrentResourceOwner, BufferDescriptorGetBuffer(buf)); @@ -697,6 +707,8 @@ BufferDesc *SegBufferAlloc(SegSpace *spc, RelFileNode rnode, ForkNumber forkNum, LWLockRelease(new_partition_lock); for (;;) { + ReservePrivateRefCountEntry(); + buf = SegStrategyGetBuffer(&buf_state); SegmentCheck(BUF_STATE_GET_REFCOUNT(buf_state) == 0); diff --git a/src/include/knl/knl_thread.h b/src/include/knl/knl_thread.h index 91053cb90a..d6424ca991 100755 --- a/src/include/knl/knl_thread.h +++ b/src/include/knl/knl_thread.h @@ -2589,8 +2589,8 @@ typedef struct knl_t_storage_context { * are still pinned at the end of transactions and when exiting. * * - * To avoid - as we used to - requiring an array with g_instance.attr.attr_storage.NBuffers entries to keep - * track of local buffers we use a small sequentially searched array + * To avoid - as we used to - requiring an array with NBuffers entries to keep + * track of local buffers, we use a small sequentially searched array * (PrivateRefCountArray) and a overflow hash table (PrivateRefCountHash) to * keep track of backend local pins. * @@ -2601,11 +2601,19 @@ typedef struct knl_t_storage_context { * * Note that in most scenarios the number of pinned buffers will not exceed * REFCOUNT_ARRAY_ENTRIES. + * + * + * To enter a buffer into the refcount tracking mechanism first reserve a free + * entry using ReservePrivateRefCountEntry() and then later, if necessary, + * fill it with NewPrivateRefCountEntry(). That split lets us avoid doing + * memory allocations in NewPrivateRefCountEntry() which can be important + * because in some scenarios it's called with a spinlock held... */ struct PrivateRefCountEntry* PrivateRefCountArray; struct HTAB* PrivateRefCountHash; int32 PrivateRefCountOverflowed; uint32 PrivateRefCountClock; + PrivateRefCountEntry* ReservedRefCountEntry; /* * Information saved between calls so we can determine the strategy * point's advance rate and avoid scanning already-cleaned buffers. diff --git a/src/include/storage/buf/bufmgr.h b/src/include/storage/buf/bufmgr.h index 2f8c01ce39..255f3d5faa 100644 --- a/src/include/storage/buf/bufmgr.h +++ b/src/include/storage/buf/bufmgr.h @@ -290,6 +290,8 @@ void PageCheckWhenChosedElimination(const BufferDesc *buf, uint32 oldFlags); uint32 WaitBufHdrUnlocked(BufferDesc* buf); void WaitIO(BufferDesc *buf); void InvalidateBuffer(BufferDesc *buf); +extern void ReservePrivateRefCountEntry(void); +extern PrivateRefCountEntry* NewPrivateRefCountEntry(Buffer buffer); void LockTwoLWLock(LWLock *new_partition_lock, LWLock *old_partition_lock); extern void InitBufferPool(void); -- Gitee