diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 859b9132067..95b43a0aade 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -47,6 +47,7 @@ add_executable(test_bitcoin blockmanager_tests.cpp bloom_tests.cpp bswap_tests.cpp + chainstate_write_tests.cpp checkqueue_tests.cpp cluster_linearize_tests.cpp coins_tests.cpp diff --git a/src/test/chainstate_write_tests.cpp b/src/test/chainstate_write_tests.cpp new file mode 100644 index 00000000000..ccca2f9be10 --- /dev/null +++ b/src/test/chainstate_write_tests.cpp @@ -0,0 +1,45 @@ +// Copyright (c) The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include + +#include + +BOOST_AUTO_TEST_SUITE(chainstate_write_tests) + +BOOST_FIXTURE_TEST_CASE(chainstate_write_interval, TestingSetup) +{ + struct TestSubscriber final : CValidationInterface { + bool m_did_flush{false}; + void ChainStateFlushed(ChainstateRole, const CBlockLocator&) override + { + m_did_flush = true; + } + }; + + const auto sub{std::make_shared()}; + m_node.validation_signals->RegisterSharedValidationInterface(sub); + auto& chainstate{Assert(m_node.chainman)->ActiveChainstate()}; + BlockValidationState state_dummy{}; + + // The first periodic flush sets m_next_write and does not flush + chainstate.FlushStateToDisk(state_dummy, FlushStateMode::PERIODIC); + m_node.validation_signals->SyncWithValidationInterfaceQueue(); + BOOST_CHECK(!sub->m_did_flush); + + // The periodic flush interval is between 50 and 70 minutes (inclusive) + SetMockTime(GetTime() + 49min); + chainstate.FlushStateToDisk(state_dummy, FlushStateMode::PERIODIC); + m_node.validation_signals->SyncWithValidationInterfaceQueue(); + BOOST_CHECK(!sub->m_did_flush); + + SetMockTime(GetTime() + 70min); + chainstate.FlushStateToDisk(state_dummy, FlushStateMode::PERIODIC); + m_node.validation_signals->SyncWithValidationInterfaceQueue(); + BOOST_CHECK(sub->m_did_flush); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/validation.cpp b/src/validation.cpp index 0384018bc36..b4fbff2afc3 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -90,10 +90,12 @@ using node::SnapshotMetadata; /** Size threshold for warning about slow UTXO set flush to disk. */ static constexpr size_t WARN_FLUSH_COINS_SIZE = 1 << 30; // 1 GiB -/** Time to wait between writing blocks/block index to disk. */ -static constexpr std::chrono::hours DATABASE_WRITE_INTERVAL{1}; -/** Time to wait between flushing chainstate to disk. */ -static constexpr std::chrono::hours DATABASE_FLUSH_INTERVAL{24}; +/** Time window to wait between writing blocks/block index and chainstate to disk. + * Randomize writing time inside the window to prevent a situation where the + * network over time settles into a few cohorts of synchronized writers. +*/ +static constexpr auto DATABASE_WRITE_INTERVAL_MIN{50min}; +static constexpr auto DATABASE_WRITE_INTERVAL_MAX{70min}; /** Maximum age of our tip for us to be considered current for fee estimation */ static constexpr std::chrono::hours MAX_FEE_ESTIMATION_TIP_AGE{3}; const std::vector CHECKLEVEL_DOC { @@ -2833,7 +2835,6 @@ bool Chainstate::FlushStateToDisk( try { { bool fFlushForPrune = false; - bool fDoFullFlush = false; CoinsCacheSizeState cache_state = GetCoinsCacheSizeState(); LOCK(m_blockman.cs_LastBlockFile); @@ -2878,26 +2879,23 @@ bool Chainstate::FlushStateToDisk( } } } - const auto nNow{SteadyClock::now()}; - // Avoid writing/flushing immediately after startup. - if (m_last_write == decltype(m_last_write){}) { - m_last_write = nNow; - } - if (m_last_flush == decltype(m_last_flush){}) { - m_last_flush = nNow; - } + const auto nNow{NodeClock::now()}; // The cache is large and we're within 10% and 10 MiB of the limit, but we have time now (not in the middle of a block processing). bool fCacheLarge = mode == FlushStateMode::PERIODIC && cache_state >= CoinsCacheSizeState::LARGE; // The cache is over the limit, we have to write now. bool fCacheCritical = mode == FlushStateMode::IF_NEEDED && cache_state >= CoinsCacheSizeState::CRITICAL; - // It's been a while since we wrote the block index to disk. Do this frequently, so we don't need to redownload after a crash. - bool fPeriodicWrite = mode == FlushStateMode::PERIODIC && nNow > m_last_write + DATABASE_WRITE_INTERVAL; - // It's been very long since we flushed the cache. Do this infrequently, to optimize cache usage. - bool fPeriodicFlush = mode == FlushStateMode::PERIODIC && nNow > m_last_flush + DATABASE_FLUSH_INTERVAL; - // Combine all conditions that result in a full cache flush. - fDoFullFlush = (mode == FlushStateMode::ALWAYS) || fCacheLarge || fCacheCritical || fPeriodicFlush || fFlushForPrune; - // Write blocks and block index to disk. - if (fDoFullFlush || fPeriodicWrite) { + // It's been a while since we wrote the block index and chain state to disk. Do this frequently, so we don't need to redownload or reindex after a crash. + bool fPeriodicWrite = mode == FlushStateMode::PERIODIC && nNow >= m_next_write; + // Combine all conditions that result in a write to disk. + bool should_write = (mode == FlushStateMode::ALWAYS) || fCacheLarge || fCacheCritical || fPeriodicWrite || fFlushForPrune; + + if (should_write || m_next_write == NodeClock::time_point::max()) { + constexpr auto range{DATABASE_WRITE_INTERVAL_MAX - DATABASE_WRITE_INTERVAL_MIN}; + m_next_write = FastRandomContext().rand_uniform_delay(nNow + DATABASE_WRITE_INTERVAL_MIN, range); + } + + // Write blocks, block index and best chain related state to disk. + if (should_write) { // Ensure we can write block index if (!CheckDiskSpace(m_blockman.m_opts.blocks_dir)) { return FatalError(m_chainman.GetNotifications(), state, _("Disk space is too low!")); @@ -2927,35 +2925,33 @@ bool Chainstate::FlushStateToDisk( m_blockman.UnlinkPrunedFiles(setFilesToPrune); } - m_last_write = nNow; - } - // Flush best chain related state. This can only be done if the blocks / block index write was also done. - if (fDoFullFlush && !CoinsTip().GetBestBlock().IsNull()) { - if (coins_mem_usage >= WARN_FLUSH_COINS_SIZE) LogWarning("Flushing large (%d GiB) UTXO set to disk, it may take several minutes", coins_mem_usage >> 30); - LOG_TIME_MILLIS_WITH_CATEGORY(strprintf("write coins cache to disk (%d coins, %.2fKiB)", - coins_count, coins_mem_usage >> 10), BCLog::BENCH); - // Typical Coin structures on disk are around 48 bytes in size. - // Pushing a new one to the database can cause it to be written - // twice (once in the log, and once in the tables). This is already - // an overestimation, as most will delete an existing entry or - // overwrite one. Still, use a conservative safety factor of 2. - if (!CheckDiskSpace(m_chainman.m_options.datadir, 48 * 2 * 2 * CoinsTip().GetCacheSize())) { - return FatalError(m_chainman.GetNotifications(), state, _("Disk space is too low!")); + if (!CoinsTip().GetBestBlock().IsNull()) { + if (coins_mem_usage >= WARN_FLUSH_COINS_SIZE) LogWarning("Flushing large (%d GiB) UTXO set to disk, it may take several minutes", coins_mem_usage >> 30); + LOG_TIME_MILLIS_WITH_CATEGORY(strprintf("write coins cache to disk (%d coins, %.2fKiB)", + coins_count, coins_mem_usage >> 10), BCLog::BENCH); + + // Typical Coin structures on disk are around 48 bytes in size. + // Pushing a new one to the database can cause it to be written + // twice (once in the log, and once in the tables). This is already + // an overestimation, as most will delete an existing entry or + // overwrite one. Still, use a conservative safety factor of 2. + if (!CheckDiskSpace(m_chainman.m_options.datadir, 48 * 2 * 2 * CoinsTip().GetCacheSize())) { + return FatalError(m_chainman.GetNotifications(), state, _("Disk space is too low!")); + } + // Flush the chainstate (which may refer to block index entries). + const auto empty_cache{(mode == FlushStateMode::ALWAYS) || fCacheLarge || fCacheCritical}; + if (empty_cache ? !CoinsTip().Flush() : !CoinsTip().Sync()) { + return FatalError(m_chainman.GetNotifications(), state, _("Failed to write to coin database.")); + } + full_flush_completed = true; + TRACEPOINT(utxocache, flush, + int64_t{Ticks(NodeClock::now() - nNow)}, + (uint32_t)mode, + (uint64_t)coins_count, + (uint64_t)coins_mem_usage, + (bool)fFlushForPrune); } - // Flush the chainstate (which may refer to block index entries). - const auto empty_cache{(mode == FlushStateMode::ALWAYS) || fCacheLarge || fCacheCritical}; - if (empty_cache ? !CoinsTip().Flush() : !CoinsTip().Sync()) { - return FatalError(m_chainman.GetNotifications(), state, _("Failed to write to coin database.")); - } - m_last_flush = nNow; - full_flush_completed = true; - TRACEPOINT(utxocache, flush, - int64_t{Ticks(SteadyClock::now() - nNow)}, - (uint32_t)mode, - (uint64_t)coins_count, - (uint64_t)coins_mem_usage, - (bool)fFlushForPrune); } } if (full_flush_completed && m_chainman.m_options.signals) { diff --git a/src/validation.h b/src/validation.h index 9e4fdbe6809..7da84b5d894 100644 --- a/src/validation.h +++ b/src/validation.h @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -802,8 +803,7 @@ private: void UpdateTip(const CBlockIndex* pindexNew) EXCLUSIVE_LOCKS_REQUIRED(::cs_main); - SteadyClock::time_point m_last_write{}; - SteadyClock::time_point m_last_flush{}; + NodeClock::time_point m_next_write{NodeClock::time_point::max()}; /** * In case of an invalid snapshot, rename the coins leveldb directory so