Skip to content

Commit 2645293

Browse files
AliSQLAliSQL
authored andcommitted
[Feature] Issue#19 BUFFER POOL LIST SCAN OPTIMIZATION
Description: ------------ backport from WebScaleSQL This patch includes: --- backport of upstream work around buffer pool list scan. WL#7047 - Optimize buffer pool list scans and related batch processing code Reduce excessive scanning of pages when doing flush list batches. The fix is to introduce the concept of "Hazard Pointer", this reduces the time complexity of the scan from O(n*n) to O(n). The concept of hazard pointer is reversed in this work. Academically a hazard pointer is a pointer that the thread working on it will declare as such and as long as that thread is not done no other thread is allowed to do anything with it. In this WL we declare the pointer as a hazard pointer and then if any other thread attempts to work on it, it is allowed to do so but it has to adjust the hazard pointer to the next valid value. We use hazard pointer solely for reverse traversal of lists within a buffer pool instance. Add an event to control the background flush thread. The background flush thread wait has been converted to an os event timed wait so that it can be signalled by threads that want to kick start a background flush when the buffer pool is running low on free/dirty pages. --- fix for mysql bug#71411 buf_flush_LRU() returns the number of pages processed. There are two types of processing that can happen. A page can get evicted or a page can get flushed. These two numbers are quite distinct and should not be mixed.
1 parent 0a4817f commit 2645293

12 files changed

Lines changed: 565 additions & 406 deletions

mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ buffer_data_written disabled
4343
buffer_flush_batch_scanned disabled
4444
buffer_flush_batch_num_scan disabled
4545
buffer_flush_batch_scanned_per_call disabled
46-
buffer_flush_batch_rescan disabled
4746
buffer_flush_batch_total_pages disabled
4847
buffer_flush_batches disabled
4948
buffer_flush_batch_pages disabled
@@ -68,9 +67,12 @@ buffer_flush_background_pages disabled
6867
buffer_LRU_batch_scanned disabled
6968
buffer_LRU_batch_num_scan disabled
7069
buffer_LRU_batch_scanned_per_call disabled
71-
buffer_LRU_batch_total_pages disabled
72-
buffer_LRU_batches disabled
73-
buffer_LRU_batch_pages disabled
70+
buffer_LRU_batch_flush_total_pages disabled
71+
buffer_LRU_batches_flush disabled
72+
buffer_LRU_batch_flush_pages disabled
73+
buffer_LRU_batch_evict_total_pages disabled
74+
buffer_LRU_batches_evict disabled
75+
buffer_LRU_batch_evict_pages disabled
7476
buffer_LRU_single_flush_scanned disabled
7577
buffer_LRU_single_flush_num_scan disabled
7678
buffer_LRU_single_flush_scanned_per_call disabled

mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ buffer_data_written disabled
4343
buffer_flush_batch_scanned disabled
4444
buffer_flush_batch_num_scan disabled
4545
buffer_flush_batch_scanned_per_call disabled
46-
buffer_flush_batch_rescan disabled
4746
buffer_flush_batch_total_pages disabled
4847
buffer_flush_batches disabled
4948
buffer_flush_batch_pages disabled
@@ -68,9 +67,12 @@ buffer_flush_background_pages disabled
6867
buffer_LRU_batch_scanned disabled
6968
buffer_LRU_batch_num_scan disabled
7069
buffer_LRU_batch_scanned_per_call disabled
71-
buffer_LRU_batch_total_pages disabled
72-
buffer_LRU_batches disabled
73-
buffer_LRU_batch_pages disabled
70+
buffer_LRU_batch_flush_total_pages disabled
71+
buffer_LRU_batches_flush disabled
72+
buffer_LRU_batch_flush_pages disabled
73+
buffer_LRU_batch_evict_total_pages disabled
74+
buffer_LRU_batches_evict disabled
75+
buffer_LRU_batch_evict_pages disabled
7476
buffer_LRU_single_flush_scanned disabled
7577
buffer_LRU_single_flush_num_scan disabled
7678
buffer_LRU_single_flush_scanned_per_call disabled

mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ buffer_data_written disabled
4343
buffer_flush_batch_scanned disabled
4444
buffer_flush_batch_num_scan disabled
4545
buffer_flush_batch_scanned_per_call disabled
46-
buffer_flush_batch_rescan disabled
4746
buffer_flush_batch_total_pages disabled
4847
buffer_flush_batches disabled
4948
buffer_flush_batch_pages disabled
@@ -68,9 +67,12 @@ buffer_flush_background_pages disabled
6867
buffer_LRU_batch_scanned disabled
6968
buffer_LRU_batch_num_scan disabled
7069
buffer_LRU_batch_scanned_per_call disabled
71-
buffer_LRU_batch_total_pages disabled
72-
buffer_LRU_batches disabled
73-
buffer_LRU_batch_pages disabled
70+
buffer_LRU_batch_flush_total_pages disabled
71+
buffer_LRU_batches_flush disabled
72+
buffer_LRU_batch_flush_pages disabled
73+
buffer_LRU_batch_evict_total_pages disabled
74+
buffer_LRU_batches_evict disabled
75+
buffer_LRU_batch_evict_pages disabled
7476
buffer_LRU_single_flush_scanned disabled
7577
buffer_LRU_single_flush_num_scan disabled
7678
buffer_LRU_single_flush_scanned_per_call disabled

mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ buffer_data_written disabled
4343
buffer_flush_batch_scanned disabled
4444
buffer_flush_batch_num_scan disabled
4545
buffer_flush_batch_scanned_per_call disabled
46-
buffer_flush_batch_rescan disabled
4746
buffer_flush_batch_total_pages disabled
4847
buffer_flush_batches disabled
4948
buffer_flush_batch_pages disabled
@@ -68,9 +67,12 @@ buffer_flush_background_pages disabled
6867
buffer_LRU_batch_scanned disabled
6968
buffer_LRU_batch_num_scan disabled
7069
buffer_LRU_batch_scanned_per_call disabled
71-
buffer_LRU_batch_total_pages disabled
72-
buffer_LRU_batches disabled
73-
buffer_LRU_batch_pages disabled
70+
buffer_LRU_batch_flush_total_pages disabled
71+
buffer_LRU_batches_flush disabled
72+
buffer_LRU_batch_flush_pages disabled
73+
buffer_LRU_batch_evict_total_pages disabled
74+
buffer_LRU_batches_evict disabled
75+
buffer_LRU_batch_evict_pages disabled
7476
buffer_LRU_single_flush_scanned disabled
7577
buffer_LRU_single_flush_num_scan disabled
7678
buffer_LRU_single_flush_scanned_per_call disabled

storage/innobase/buf/buf0buf.cc

Lines changed: 120 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1392,6 +1392,19 @@ buf_pool_init_instance(
13921392

13931393
buf_pool->try_LRU_scan = TRUE;
13941394

1395+
/* Initialize the hazard pointer for flush_list batches */
1396+
new(&buf_pool->flush_hp)
1397+
FlushHp(buf_pool, &buf_pool->flush_list_mutex);
1398+
1399+
/* Initialize the hazard pointer for LRU batches */
1400+
new(&buf_pool->lru_hp) LRUHp(buf_pool, &buf_pool->mutex);
1401+
1402+
/* Initialize the iterator for LRU scan search */
1403+
new(&buf_pool->lru_scan_itr) LRUItr(buf_pool, &buf_pool->mutex);
1404+
1405+
/* Initialize the iterator for single page scan search */
1406+
new(&buf_pool->single_scan_itr) LRUItr(buf_pool, &buf_pool->mutex);
1407+
13951408
buf_pool_mutex_exit(buf_pool);
13961409

13971410
return(DB_SUCCESS);
@@ -1625,6 +1638,10 @@ buf_relocate(
16251638

16261639
memcpy(dpage, bpage, sizeof *dpage);
16271640

1641+
/* Important that we adjust the hazard pointer before
1642+
removing bpage from LRU list. */
1643+
buf_LRU_adjust_hp(buf_pool, bpage);
1644+
16281645
ut_d(bpage->in_LRU_list = FALSE);
16291646
ut_d(bpage->in_page_hash = FALSE);
16301647

@@ -1663,6 +1680,84 @@ buf_relocate(
16631680
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
16641681
}
16651682

1683+
/** Hazard Pointer implementation. */
1684+
1685+
/** Set current value
1686+
@param bpage buffer block to be set as hp */
1687+
void
1688+
HazardPointer::set(buf_page_t* bpage)
1689+
{
1690+
ut_ad(mutex_own(m_mutex));
1691+
ut_ad(!bpage || buf_pool_from_bpage(bpage) == m_buf_pool);
1692+
ut_ad(!bpage || buf_page_in_file(bpage));
1693+
1694+
m_hp = bpage;
1695+
}
1696+
1697+
/** Checks if a bpage is the hp
1698+
@param bpage buffer block to be compared
1699+
@return true if it is hp */
1700+
1701+
bool
1702+
HazardPointer::is_hp(const buf_page_t* bpage)
1703+
{
1704+
ut_ad(mutex_own(m_mutex));
1705+
ut_ad(!m_hp || buf_pool_from_bpage(m_hp) == m_buf_pool);
1706+
ut_ad(!bpage || buf_pool_from_bpage(bpage) == m_buf_pool);
1707+
1708+
return(bpage == m_hp);
1709+
}
1710+
1711+
/** Adjust the value of hp. This happens when some other thread working
1712+
on the same list attempts to remove the hp from the list.
1713+
@param bpage buffer block to be compared */
1714+
1715+
void
1716+
FlushHp::adjust(const buf_page_t* bpage)
1717+
{
1718+
ut_ad(bpage != NULL);
1719+
1720+
/** We only support reverse traversal for now. */
1721+
if (is_hp(bpage)) {
1722+
m_hp = UT_LIST_GET_PREV(list, m_hp);
1723+
}
1724+
1725+
ut_ad(!m_hp || m_hp->in_flush_list);
1726+
}
1727+
1728+
/** Adjust the value of hp. This happens when some other thread working
1729+
on the same list attempts to remove the hp from the list.
1730+
@param bpage buffer block to be compared */
1731+
1732+
void
1733+
LRUHp::adjust(const buf_page_t* bpage)
1734+
{
1735+
ut_ad(bpage);
1736+
1737+
/** We only support reverse traversal for now. */
1738+
if (is_hp(bpage)) {
1739+
m_hp = UT_LIST_GET_PREV(LRU, m_hp);
1740+
}
1741+
1742+
ut_ad(!m_hp || m_hp->in_LRU_list);
1743+
}
1744+
1745+
/** Selects from where to start a scan. If we have scanned too deep into
1746+
the LRU list it resets the value to the tail of the LRU list.
1747+
@return buf_page_t from where to start scan. */
1748+
1749+
buf_page_t*
1750+
LRUItr::start()
1751+
{
1752+
ut_ad(mutex_own(m_mutex));
1753+
1754+
if (!m_hp || m_hp->old) {
1755+
m_hp = UT_LIST_GET_LAST(m_buf_pool->LRU);
1756+
}
1757+
1758+
return(m_hp);
1759+
}
1760+
16661761
/********************************************************************//**
16671762
Determine if a block is a sentinel for a buffer pool watch.
16681763
@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
@@ -4149,7 +4244,10 @@ UNIV_INTERN
41494244
bool
41504245
buf_page_io_complete(
41514246
/*=================*/
4152-
buf_page_t* bpage) /*!< in: pointer to the block in question */
4247+
buf_page_t* bpage, /*!< in: pointer to the block in question */
4248+
bool evict) /*!< in: whether or not to evict the page
4249+
from LRU list. */
4250+
41534251
{
41544252
enum buf_io_fix io_type;
41554253
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
@@ -4331,6 +4429,7 @@ buf_page_io_complete(
43314429
id. */
43324430

43334431
buf_page_set_io_fix(bpage, BUF_IO_NONE);
4432+
buf_page_monitor(bpage, io_type);
43344433

43354434
switch (io_type) {
43364435
case BUF_IO_READ:
@@ -4347,6 +4446,8 @@ buf_page_io_complete(
43474446
BUF_IO_READ);
43484447
}
43494448

4449+
mutex_exit(buf_page_get_mutex(bpage));
4450+
43504451
break;
43514452

43524453
case BUF_IO_WRITE:
@@ -4362,14 +4463,30 @@ buf_page_io_complete(
43624463

43634464
buf_pool->stat.n_pages_written++;
43644465

4466+
/* In case of flush batches i.e.: BUF_FLUSH_LIST and
4467+
BUF_FLUSH_LRU this function is always called from IO
4468+
helper thread. In this case, we decide whether or not
4469+
to evict the page based on flush type. The value
4470+
passed as evict is the default value in function
4471+
definition which is false.
4472+
We always evict in case of LRU batch and never evict
4473+
in case of flush list batch. For single page flush
4474+
the caller sets the appropriate value. */
4475+
if (buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU) {
4476+
evict = true;
4477+
}
4478+
4479+
mutex_exit(buf_page_get_mutex(bpage));
4480+
if (evict) {
4481+
buf_LRU_free_page(bpage, true);
4482+
}
4483+
43654484
break;
43664485

43674486
default:
43684487
ut_error;
43694488
}
43704489

4371-
buf_page_monitor(bpage, io_type);
4372-
43734490
#ifdef UNIV_DEBUG
43744491
if (buf_debug_prints) {
43754492
fprintf(stderr, "Has %s page space %lu page no %lu\n",
@@ -4379,7 +4496,6 @@ buf_page_io_complete(
43794496
}
43804497
#endif /* UNIV_DEBUG */
43814498

4382-
mutex_exit(buf_page_get_mutex(bpage));
43834499
buf_pool_mutex_exit(buf_pool);
43844500

43854501
return(true);

0 commit comments

Comments
 (0)