From 48a68908ba3d5e077cda7bd1e908b923fbead824 Mon Sep 17 00:00:00 2001 From: Larry Ruane Date: Mon, 24 Oct 2022 12:10:45 -0600 Subject: [PATCH 1/3] Add LoadExternalBlockFile() benchmark --- src/Makefile.bench.include | 1 + src/bench/load_external.cpp | 63 +++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 src/bench/load_external.cpp diff --git a/src/Makefile.bench.include b/src/Makefile.bench.include index e1e2066877..33d66ee109 100644 --- a/src/Makefile.bench.include +++ b/src/Makefile.bench.include @@ -32,6 +32,7 @@ bench_bench_bitcoin_SOURCES = \ bench/examples.cpp \ bench/gcs_filter.cpp \ bench/hashpadding.cpp \ + bench/load_external.cpp \ bench/lockedpool.cpp \ bench/logging.cpp \ bench/mempool_eviction.cpp \ diff --git a/src/bench/load_external.cpp b/src/bench/load_external.cpp new file mode 100644 index 0000000000..be01b2a483 --- /dev/null +++ b/src/bench/load_external.cpp @@ -0,0 +1,63 @@ +// Copyright (c) 2022 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or https://www.opensource.org/licenses/mit-license.php. + +#include +#include +#include +#include +#include + +/** + * The LoadExternalBlockFile() function is used during -reindex and -loadblock. + * + * Create a test file that's similar to a datadir/blocks/blk?????.dat file, + * It contains around 134 copies of the same block (typical size of real block files). + * For each block in the file, LoadExternalBlockFile() won't find its parent, + * and so will skip the block. (In the real system, it will re-read the block + * from disk later when it encounters its parent.) + * + * This benchmark measures the performance of deserializing the block (or just + * its header, beginning with PR 16981). + */ +static void LoadExternalBlockFile(benchmark::Bench& bench) +{ + const auto testing_setup{MakeNoLogFileContext(CBaseChainParams::MAIN)}; + + // Create a single block as in the blocks files (magic bytes, block size, + // block data) as a stream object. + const fs::path blkfile{testing_setup.get()->m_path_root / "blk.dat"}; + CDataStream ss(SER_DISK, 0); + auto params{testing_setup->m_node.chainman->GetParams()}; + ss << params.MessageStart(); + ss << static_cast(benchmark::data::block413567.size()); + // We can't use the streaming serialization (ss << benchmark::data::block413567) + // because that first writes a compact size. + ss.write(MakeByteSpan(benchmark::data::block413567)); + + // Create the test file. + { + // "wb+" is "binary, O_RDWR | O_CREAT | O_TRUNC". + FILE* file{fsbridge::fopen(blkfile, "wb+")}; + // Make the test block file about 128 MB in length. + for (size_t i = 0; i < node::MAX_BLOCKFILE_SIZE / ss.size(); ++i) { + if (fwrite(ss.data(), 1, ss.size(), file) != ss.size()) { + throw std::runtime_error("write to test file failed\n"); + } + } + fclose(file); + } + + Chainstate& chainstate{testing_setup->m_node.chainman->ActiveChainstate()}; + std::multimap blocks_with_unknown_parent; + FlatFilePos pos; + bench.run([&] { + // "rb" is "binary, O_RDONLY", positioned to the start of the file. + // The file will be closed by LoadExternalBlockFile(). + FILE* file{fsbridge::fopen(blkfile, "rb")}; + chainstate.LoadExternalBlockFile(file, &pos, &blocks_with_unknown_parent); + }); + fs::remove(blkfile); +} + +BENCHMARK(LoadExternalBlockFile, benchmark::PriorityLevel::HIGH); From c72de9990ae8f1744006d9c852023b882d5ed80c Mon Sep 17 00:00:00 2001 From: Larry Ruane Date: Mon, 13 Jul 2020 17:11:37 -0600 Subject: [PATCH 2/3] util: add CBufferedFile::SkipTo() to move ahead in the stream SkipTo() reads data from the file into the CBufferedFile object (memory), but, unlike this object's read() method, SkipTo() doesn't transfer data into a caller's memory buffer. This is useful because after skipping forward in the stream in this way, the user can, if needed, rewind the stream (SetPos()) and access the object's memory buffer including ranges that were skipped over (without needing to read from the disk file). --- src/streams.h | 48 ++++++++++++++++++--------- src/test/streams_tests.cpp | 67 +++++++++++++++++++++++++++++++++++--- 2 files changed, 96 insertions(+), 19 deletions(-) diff --git a/src/streams.h b/src/streams.h index 0178df1c49..84b12f65aa 100644 --- a/src/streams.h +++ b/src/streams.h @@ -612,7 +612,6 @@ private: uint64_t nRewind; //!< how many bytes we guarantee to rewind std::vector vchBuf; //!< the buffer -protected: //! read data from the source to fill the buffer bool Fill() { unsigned int pos = nSrcPos % vchBuf.size(); @@ -630,6 +629,28 @@ protected: return true; } + //! Advance the stream's read pointer (m_read_pos) by up to 'length' bytes, + //! filling the buffer from the file so that at least one byte is available. + //! Return a pointer to the available buffer data and the number of bytes + //! (which may be less than the requested length) that may be accessed + //! beginning at that pointer. + std::pair AdvanceStream(size_t length) + { + assert(m_read_pos <= nSrcPos); + if (m_read_pos + length > nReadLimit) { + throw std::ios_base::failure("Attempt to position past buffer limit"); + } + // If there are no bytes available, read from the file. + if (m_read_pos == nSrcPos && length > 0) Fill(); + + size_t buffer_offset{static_cast(m_read_pos % vchBuf.size())}; + size_t buffer_available{static_cast(vchBuf.size() - buffer_offset)}; + size_t bytes_until_source_pos{static_cast(nSrcPos - m_read_pos)}; + size_t advance{std::min({length, buffer_available, bytes_until_source_pos})}; + m_read_pos += advance; + return std::make_pair(&vchBuf[buffer_offset], advance); + } + public: CBufferedFile(FILE* fileIn, uint64_t nBufSize, uint64_t nRewindIn, int nTypeIn, int nVersionIn) : nType(nTypeIn), nVersion(nVersionIn), nSrcPos(0), m_read_pos(0), nReadLimit(std::numeric_limits::max()), nRewind(nRewindIn), vchBuf(nBufSize, std::byte{0}) @@ -667,24 +688,21 @@ public: //! read a number of bytes void read(Span dst) { - if (dst.size() + m_read_pos > nReadLimit) { - throw std::ios_base::failure("Read attempted past buffer limit"); - } while (dst.size() > 0) { - if (m_read_pos == nSrcPos) - Fill(); - unsigned int pos = m_read_pos % vchBuf.size(); - size_t nNow = dst.size(); - if (nNow + pos > vchBuf.size()) - nNow = vchBuf.size() - pos; - if (nNow + m_read_pos > nSrcPos) - nNow = nSrcPos - m_read_pos; - memcpy(dst.data(), &vchBuf[pos], nNow); - m_read_pos += nNow; - dst = dst.subspan(nNow); + auto [buffer_pointer, length]{AdvanceStream(dst.size())}; + memcpy(dst.data(), buffer_pointer, length); + dst = dst.subspan(length); } } + //! Move the read position ahead in the stream to the given position. + //! Use SetPos() to back up in the stream, not SkipTo(). + void SkipTo(const uint64_t file_pos) + { + assert(file_pos >= m_read_pos); + while (m_read_pos < file_pos) AdvanceStream(file_pos - m_read_pos); + } + //! return the current reading position uint64_t GetPos() const { return m_read_pos; diff --git a/src/test/streams_tests.cpp b/src/test/streams_tests.cpp index 0925e2e9ee..b1b262eade 100644 --- a/src/test/streams_tests.cpp +++ b/src/test/streams_tests.cpp @@ -253,7 +253,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file) BOOST_CHECK(false); } catch (const std::exception& e) { BOOST_CHECK(strstr(e.what(), - "Read attempted past buffer limit") != nullptr); + "Attempt to position past buffer limit") != nullptr); } // The default argument removes the limit completely. BOOST_CHECK(bf.SetLimit()); @@ -322,7 +322,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file) BOOST_CHECK(!bf.SetPos(0)); // But we should now be positioned at least as far back as allowed // by the rewind window (relative to our farthest read position, 40). - BOOST_CHECK(bf.GetPos() <= 30); + BOOST_CHECK(bf.GetPos() <= 30U); // We can explicitly close the file, or the destructor will do it. bf.fclose(); @@ -330,6 +330,55 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file) fs::remove(streams_test_filename); } +BOOST_AUTO_TEST_CASE(streams_buffered_file_skip) +{ + fs::path streams_test_filename = m_args.GetDataDirBase() / "streams_test_tmp"; + FILE* file = fsbridge::fopen(streams_test_filename, "w+b"); + // The value at each offset is the byte offset (e.g. byte 1 in the file has the value 0x01). + for (uint8_t j = 0; j < 40; ++j) { + fwrite(&j, 1, 1, file); + } + rewind(file); + + // The buffer is 25 bytes, allow rewinding 10 bytes. + CBufferedFile bf(file, 25, 10, 222, 333); + + uint8_t i; + // This is like bf >> (7-byte-variable), in that it will cause data + // to be read from the file into memory, but it's not copied to us. + bf.SkipTo(7); + BOOST_CHECK_EQUAL(bf.GetPos(), 7U); + bf >> i; + BOOST_CHECK_EQUAL(i, 7); + + // The bytes in the buffer up to offset 7 are valid and can be read. + BOOST_CHECK(bf.SetPos(0)); + bf >> i; + BOOST_CHECK_EQUAL(i, 0); + bf >> i; + BOOST_CHECK_EQUAL(i, 1); + + bf.SkipTo(11); + bf >> i; + BOOST_CHECK_EQUAL(i, 11); + + // SkipTo() honors the transfer limit; we can't position beyond the limit. + bf.SetLimit(13); + try { + bf.SkipTo(14); + BOOST_CHECK(false); + } catch (const std::exception& e) { + BOOST_CHECK(strstr(e.what(), "Attempt to position past buffer limit") != nullptr); + } + + // We can position exactly to the transfer limit. + bf.SkipTo(13); + BOOST_CHECK_EQUAL(bf.GetPos(), 13U); + + bf.fclose(); + fs::remove(streams_test_filename); +} + BOOST_AUTO_TEST_CASE(streams_buffered_file_rand) { // Make this test deterministic. @@ -361,7 +410,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_rand) // sizes; the boundaries of the objects can interact arbitrarily // with the CBufferFile's internal buffer. These first three // cases simulate objects of various sizes (1, 2, 5 bytes). - switch (InsecureRandRange(5)) { + switch (InsecureRandRange(6)) { case 0: { uint8_t a[1]; if (currentPos + 1 > fileSize) @@ -399,6 +448,16 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_rand) break; } case 3: { + // SkipTo is similar to the "read" cases above, except + // we don't receive the data. + size_t skip_length{static_cast(InsecureRandRange(5))}; + if (currentPos + skip_length > fileSize) continue; + bf.SetLimit(currentPos + skip_length); + bf.SkipTo(currentPos + skip_length); + currentPos += skip_length; + break; + } + case 4: { // Find a byte value (that is at or ahead of the current position). size_t find = currentPos + InsecureRandRange(8); if (find >= fileSize) @@ -415,7 +474,7 @@ BOOST_AUTO_TEST_CASE(streams_buffered_file_rand) currentPos++; break; } - case 4: { + case 5: { size_t requestPos = InsecureRandRange(maxPos + 4); bool okay = bf.SetPos(requestPos); // The new position may differ from the requested position From db929893ef0bc86ea2708cdbcf41152240cd7c73 Mon Sep 17 00:00:00 2001 From: Larry Ruane Date: Fri, 7 Aug 2020 15:07:21 -0600 Subject: [PATCH 3/3] Faster -reindex by initially deserializing only headers When a block is initially read from a blk*.dat file during reindexing, it can be added to the block index only if all of its ancestor blocks have been added, which is rare. If the block's ancestors have not been added, the block must be re-read from disk later when it can be added. This commit: During the initial block read, deserialize only its header, rather than the entire block, since this is sufficient to determine if its parent (and thus all its ancestors) has been added. This is a performance improvement. --- src/validation.cpp | 46 ++++++++++++++++----------- test/functional/feature_reindex.py | 50 ++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 18 deletions(-) diff --git a/src/validation.cpp b/src/validation.cpp index 37e68cfe4a..b9105b9090 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -4393,6 +4393,8 @@ void Chainstate::LoadExternalBlockFile( try { // This takes over fileIn and calls fclose() on it in the CBufferedFile destructor CBufferedFile blkdat(fileIn, 2*MAX_BLOCK_SERIALIZED_SIZE, MAX_BLOCK_SERIALIZED_SIZE+8, SER_DISK, CLIENT_VERSION); + // nRewind indicates where to resume scanning in case something goes wrong, + // such as a block fails to deserialize. uint64_t nRewind = blkdat.GetPos(); while (!blkdat.eof()) { if (ShutdownRequested()) return; @@ -4416,28 +4418,30 @@ void Chainstate::LoadExternalBlockFile( continue; } catch (const std::exception&) { // no valid block header found; don't complain + // (this happens at the end of every blk.dat file) break; } try { - // read block - uint64_t nBlockPos = blkdat.GetPos(); + // read block header + const uint64_t nBlockPos{blkdat.GetPos()}; if (dbp) dbp->nPos = nBlockPos; blkdat.SetLimit(nBlockPos + nSize); - std::shared_ptr pblock = std::make_shared(); - CBlock& block = *pblock; - blkdat >> block; - nRewind = blkdat.GetPos(); - - uint256 hash = block.GetHash(); + CBlockHeader header; + blkdat >> header; + const uint256 hash{header.GetHash()}; + // Skip the rest of this block (this may read from disk into memory); position to the marker before the + // next block, but it's still possible to rewind to the start of the current block (without a disk read). + nRewind = nBlockPos + nSize; + blkdat.SkipTo(nRewind); { LOCK(cs_main); // detect out of order blocks, and store them for later - if (hash != params.GetConsensus().hashGenesisBlock && !m_blockman.LookupBlockIndex(block.hashPrevBlock)) { + if (hash != params.GetConsensus().hashGenesisBlock && !m_blockman.LookupBlockIndex(header.hashPrevBlock)) { LogPrint(BCLog::REINDEX, "%s: Out of order block %s, parent %s not known\n", __func__, hash.ToString(), - block.hashPrevBlock.ToString()); + header.hashPrevBlock.ToString()); if (dbp && blocks_with_unknown_parent) { - blocks_with_unknown_parent->emplace(block.hashPrevBlock, *dbp); + blocks_with_unknown_parent->emplace(header.hashPrevBlock, *dbp); } continue; } @@ -4445,13 +4449,19 @@ void Chainstate::LoadExternalBlockFile( // process in case the block isn't known yet const CBlockIndex* pindex = m_blockman.LookupBlockIndex(hash); if (!pindex || (pindex->nStatus & BLOCK_HAVE_DATA) == 0) { - BlockValidationState state; - if (AcceptBlock(pblock, state, nullptr, true, dbp, nullptr, true)) { - nLoaded++; - } - if (state.IsError()) { - break; - } + // This block can be processed immediately; rewind to its start, read and deserialize it. + blkdat.SetPos(nBlockPos); + std::shared_ptr pblock{std::make_shared()}; + blkdat >> *pblock; + nRewind = blkdat.GetPos(); + + BlockValidationState state; + if (AcceptBlock(pblock, state, nullptr, true, dbp, nullptr, true)) { + nLoaded++; + } + if (state.IsError()) { + break; + } } else if (hash != params.GetConsensus().hashGenesisBlock && pindex->nHeight % 1000 == 0) { LogPrint(BCLog::REINDEX, "Block Import: already had block %s at height %d\n", hash.ToString(), pindex->nHeight); } diff --git a/test/functional/feature_reindex.py b/test/functional/feature_reindex.py index 44040f426f..0f6a8fd0d2 100755 --- a/test/functional/feature_reindex.py +++ b/test/functional/feature_reindex.py @@ -7,9 +7,12 @@ - Start a single node and generate 3 blocks. - Stop the node and restart it with -reindex. Verify that the node has reindexed up to block 3. - Stop the node and restart it with -reindex-chainstate. Verify that the node has reindexed up to block 3. +- Verify that out-of-order blocks are correctly processed, see LoadExternalBlockFile() """ +import os from test_framework.test_framework import BitcoinTestFramework +from test_framework.p2p import MAGIC_BYTES from test_framework.util import assert_equal @@ -27,11 +30,58 @@ class ReindexTest(BitcoinTestFramework): assert_equal(self.nodes[0].getblockcount(), blockcount) # start_node is blocking on reindex self.log.info("Success") + # Check that blocks can be processed out of order + def out_of_order(self): + # The previous test created 12 blocks + assert_equal(self.nodes[0].getblockcount(), 12) + self.stop_nodes() + + # In this test environment, blocks will always be in order (since + # we're generating them rather than getting them from peers), so to + # test out-of-order handling, swap blocks 1 and 2 on disk. + blk0 = os.path.join(self.nodes[0].datadir, self.nodes[0].chain, 'blocks', 'blk00000.dat') + with open(blk0, 'r+b') as bf: + # Read at least the first few blocks (including genesis) + b = bf.read(2000) + + # Find the offsets of blocks 2, 3, and 4 (the first 3 blocks beyond genesis) + # by searching for the regtest marker bytes (see pchMessageStart). + def find_block(b, start): + return b.find(MAGIC_BYTES["regtest"], start)+4 + + genesis_start = find_block(b, 0) + assert_equal(genesis_start, 4) + b2_start = find_block(b, genesis_start) + b3_start = find_block(b, b2_start) + b4_start = find_block(b, b3_start) + + # Blocks 2 and 3 should be the same size. + assert_equal(b3_start-b2_start, b4_start-b3_start) + + # Swap the second and third blocks (don't disturb the genesis block). + bf.seek(b2_start) + bf.write(b[b3_start:b4_start]) + bf.write(b[b2_start:b3_start]) + + # The reindexing code should detect and accommodate out of order blocks. + with self.nodes[0].assert_debug_log([ + 'LoadExternalBlockFile: Out of order block', + 'LoadExternalBlockFile: Processing out of order child', + ]): + extra_args = [["-reindex"]] + self.start_nodes(extra_args) + + # All blocks should be accepted and processed. + assert_equal(self.nodes[0].getblockcount(), 12) + def run_test(self): self.reindex(False) self.reindex(True) self.reindex(False) self.reindex(True) + self.out_of_order() + + if __name__ == '__main__': ReindexTest().main()