mirror of
https://github.com/bitcoin/bitcoin.git
synced 2025-02-09 10:43:19 -05:00
Merge #12549: Make prevector::resize() and other prevector operations much faster
5aad635
Use memset() to optimize prevector::resize() (Evan Klitzke)e46be25
Reduce redundant code of prevector and speed it up (Akio Nakamura)f0e7aa7
Add new prevector benchmarks. (Evan Klitzke) Pull request description: This branch optimizes various `prevector` operations, especially resizing vectors. While profiling the `loadblk` thread I noticed that a lot of time was being spent in `prevector::resize()` which led to this work. I have some data here indicating that it takes up **37%** of the time in `ReadBlockFromDisk()`: https://monad.io/readblockfromdisk.svg This branch improves things significantly. For trivial types, the new results for the prevector benchmark are: * `PrevectorClearTrivial` which tests `prevector::clear()` becomes 24.6x faster * `PrevectorDestructorTrivial` which tests `prevector::~prevector()` becomes 20.5x faster * `PrevectorResizeTrivial` which tests `prevector::resize()` becomes 20.3x faster Note that in practice it looks like the prevector is only used to contain `unsigned char` types, which is a trivial type. The benchmarks are testing a bit of an extreme case, but the changes here are motivated by the profiling data for `ReadBlockFromDisk()` I linked to above. The pull request here consists of a series of three commits: * The first adds new benchmarks but does not change the prevector code. * The second is from @AkioNak , and merges some prevector optimizations he submitted in #11988 * The third optimizes `prevector::resize()` to use `memset()` when the prevector contains trivially constructible types Tree-SHA512: 28f7cbb91a19f9f43b6a5942781d7eb2e3197389186b666f086b69df12bee37773140f765426d715bfb8ebff79cb27a5f1206d0325b54b4aa65598b50fb18368
This commit is contained in:
commit
32987d5aeb
5 changed files with 151 additions and 88 deletions
|
@ -27,7 +27,7 @@ bench_bench_bitcoin_SOURCES = \
|
|||
bench/lockedpool.cpp \
|
||||
bench/perf.cpp \
|
||||
bench/perf.h \
|
||||
bench/prevector_destructor.cpp
|
||||
bench/prevector.cpp
|
||||
|
||||
nodist_bench_bench_bitcoin_SOURCES = $(GENERATED_BENCH_FILES)
|
||||
|
||||
|
|
77
src/bench/prevector.cpp
Normal file
77
src/bench/prevector.cpp
Normal file
|
@ -0,0 +1,77 @@
|
|||
// Copyright (c) 2015-2017 The Bitcoin Core developers
|
||||
// Distributed under the MIT software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
#include <compat.h>
|
||||
#include <prevector.h>
|
||||
|
||||
#include <bench/bench.h>
|
||||
|
||||
struct nontrivial_t {
|
||||
int x;
|
||||
nontrivial_t() :x(-1) {}
|
||||
};
|
||||
static_assert(!IS_TRIVIALLY_CONSTRUCTIBLE<nontrivial_t>::value,
|
||||
"expected nontrivial_t to not be trivially constructible");
|
||||
|
||||
typedef unsigned char trivial_t;
|
||||
static_assert(IS_TRIVIALLY_CONSTRUCTIBLE<trivial_t>::value,
|
||||
"expected trivial_t to be trivially constructible");
|
||||
|
||||
template <typename T>
|
||||
static void PrevectorDestructor(benchmark::State& state)
|
||||
{
|
||||
while (state.KeepRunning()) {
|
||||
for (auto x = 0; x < 1000; ++x) {
|
||||
prevector<28, T> t0;
|
||||
prevector<28, T> t1;
|
||||
t0.resize(28);
|
||||
t1.resize(29);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void PrevectorClear(benchmark::State& state)
|
||||
{
|
||||
|
||||
while (state.KeepRunning()) {
|
||||
for (auto x = 0; x < 1000; ++x) {
|
||||
prevector<28, T> t0;
|
||||
prevector<28, T> t1;
|
||||
t0.resize(28);
|
||||
t0.clear();
|
||||
t1.resize(29);
|
||||
t0.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void PrevectorResize(benchmark::State& state)
|
||||
{
|
||||
while (state.KeepRunning()) {
|
||||
prevector<28, T> t0;
|
||||
prevector<28, T> t1;
|
||||
for (auto x = 0; x < 1000; ++x) {
|
||||
t0.resize(28);
|
||||
t0.resize(0);
|
||||
t1.resize(29);
|
||||
t1.resize(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define PREVECTOR_TEST(name, nontrivops, trivops) \
|
||||
static void Prevector ## name ## Nontrivial(benchmark::State& state) { \
|
||||
PrevectorResize<nontrivial_t>(state); \
|
||||
} \
|
||||
BENCHMARK(Prevector ## name ## Nontrivial, nontrivops); \
|
||||
static void Prevector ## name ## Trivial(benchmark::State& state) { \
|
||||
PrevectorResize<trivial_t>(state); \
|
||||
} \
|
||||
BENCHMARK(Prevector ## name ## Trivial, trivops);
|
||||
|
||||
PREVECTOR_TEST(Clear, 28300, 88600)
|
||||
PREVECTOR_TEST(Destructor, 28800, 88900)
|
||||
PREVECTOR_TEST(Resize, 28900, 90300)
|
|
@ -1,36 +0,0 @@
|
|||
// Copyright (c) 2015-2017 The Bitcoin Core developers
|
||||
// Distributed under the MIT software license, see the accompanying
|
||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||
|
||||
#include <bench/bench.h>
|
||||
#include <prevector.h>
|
||||
|
||||
static void PrevectorDestructor(benchmark::State& state)
|
||||
{
|
||||
while (state.KeepRunning()) {
|
||||
for (auto x = 0; x < 1000; ++x) {
|
||||
prevector<28, unsigned char> t0;
|
||||
prevector<28, unsigned char> t1;
|
||||
t0.resize(28);
|
||||
t1.resize(29);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void PrevectorClear(benchmark::State& state)
|
||||
{
|
||||
|
||||
while (state.KeepRunning()) {
|
||||
for (auto x = 0; x < 1000; ++x) {
|
||||
prevector<28, unsigned char> t0;
|
||||
prevector<28, unsigned char> t1;
|
||||
t0.resize(28);
|
||||
t0.clear();
|
||||
t1.resize(29);
|
||||
t0.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BENCHMARK(PrevectorDestructor, 5700);
|
||||
BENCHMARK(PrevectorClear, 5600);
|
10
src/compat.h
10
src/compat.h
|
@ -10,6 +10,16 @@
|
|||
#include <config/bitcoin-config.h>
|
||||
#endif
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
// GCC 4.8 is missing some C++11 type_traits,
|
||||
// https://www.gnu.org/software/gcc/gcc-5/changes.html
|
||||
#if defined(__GNUC__) && __GNUC__ < 5
|
||||
#define IS_TRIVIALLY_CONSTRUCTIBLE std::is_trivial
|
||||
#else
|
||||
#define IS_TRIVIALLY_CONSTRUCTIBLE std::is_trivially_constructible
|
||||
#endif
|
||||
|
||||
#ifdef WIN32
|
||||
#ifdef _WIN32_WINNT
|
||||
#undef _WIN32_WINNT
|
||||
|
|
114
src/prevector.h
114
src/prevector.h
|
@ -10,9 +10,12 @@
|
|||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <iterator>
|
||||
#include <type_traits>
|
||||
|
||||
#include <compat.h>
|
||||
|
||||
#pragma pack(push, 1)
|
||||
/** Implements a drop-in replacement for std::vector<T> which stores up to N
|
||||
* elements directly (without heap allocation). The types Size and Diff are
|
||||
|
@ -194,16 +197,42 @@ private:
|
|||
T* item_ptr(difference_type pos) { return is_direct() ? direct_ptr(pos) : indirect_ptr(pos); }
|
||||
const T* item_ptr(difference_type pos) const { return is_direct() ? direct_ptr(pos) : indirect_ptr(pos); }
|
||||
|
||||
void fill(T* dst, ptrdiff_t count) {
|
||||
if (IS_TRIVIALLY_CONSTRUCTIBLE<T>::value) {
|
||||
// The most common use of prevector is where T=unsigned char. For
|
||||
// trivially constructible types, we can use memset() to avoid
|
||||
// looping.
|
||||
::memset(dst, 0, count * sizeof(T));
|
||||
} else {
|
||||
for (auto i = 0; i < count; ++i) {
|
||||
new(static_cast<void*>(dst + i)) T();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void fill(T* dst, ptrdiff_t count, const T& value) {
|
||||
for (auto i = 0; i < count; ++i) {
|
||||
new(static_cast<void*>(dst + i)) T(value);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename InputIterator>
|
||||
void fill(T* dst, InputIterator first, InputIterator last) {
|
||||
while (first != last) {
|
||||
new(static_cast<void*>(dst)) T(*first);
|
||||
++dst;
|
||||
++first;
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
void assign(size_type n, const T& val) {
|
||||
clear();
|
||||
if (capacity() < n) {
|
||||
change_capacity(n);
|
||||
}
|
||||
while (size() < n) {
|
||||
_size++;
|
||||
new(static_cast<void*>(item_ptr(size() - 1))) T(val);
|
||||
}
|
||||
_size += n;
|
||||
fill(item_ptr(0), n, val);
|
||||
}
|
||||
|
||||
template<typename InputIterator>
|
||||
|
@ -213,11 +242,8 @@ public:
|
|||
if (capacity() < n) {
|
||||
change_capacity(n);
|
||||
}
|
||||
while (first != last) {
|
||||
_size++;
|
||||
new(static_cast<void*>(item_ptr(size() - 1))) T(*first);
|
||||
++first;
|
||||
}
|
||||
_size += n;
|
||||
fill(item_ptr(0), first, last);
|
||||
}
|
||||
|
||||
prevector() : _size(0), _union{{}} {}
|
||||
|
@ -228,31 +254,23 @@ public:
|
|||
|
||||
explicit prevector(size_type n, const T& val = T()) : _size(0) {
|
||||
change_capacity(n);
|
||||
while (size() < n) {
|
||||
_size++;
|
||||
new(static_cast<void*>(item_ptr(size() - 1))) T(val);
|
||||
}
|
||||
_size += n;
|
||||
fill(item_ptr(0), n, val);
|
||||
}
|
||||
|
||||
template<typename InputIterator>
|
||||
prevector(InputIterator first, InputIterator last) : _size(0) {
|
||||
size_type n = last - first;
|
||||
change_capacity(n);
|
||||
while (first != last) {
|
||||
_size++;
|
||||
new(static_cast<void*>(item_ptr(size() - 1))) T(*first);
|
||||
++first;
|
||||
}
|
||||
_size += n;
|
||||
fill(item_ptr(0), first, last);
|
||||
}
|
||||
|
||||
prevector(const prevector<N, T, Size, Diff>& other) : _size(0) {
|
||||
change_capacity(other.size());
|
||||
const_iterator it = other.begin();
|
||||
while (it != other.end()) {
|
||||
_size++;
|
||||
new(static_cast<void*>(item_ptr(size() - 1))) T(*it);
|
||||
++it;
|
||||
}
|
||||
size_type n = other.size();
|
||||
change_capacity(n);
|
||||
_size += n;
|
||||
fill(item_ptr(0), other.begin(), other.end());
|
||||
}
|
||||
|
||||
prevector(prevector<N, T, Size, Diff>&& other) : _size(0) {
|
||||
|
@ -263,14 +281,7 @@ public:
|
|||
if (&other == this) {
|
||||
return *this;
|
||||
}
|
||||
resize(0);
|
||||
change_capacity(other.size());
|
||||
const_iterator it = other.begin();
|
||||
while (it != other.end()) {
|
||||
_size++;
|
||||
new(static_cast<void*>(item_ptr(size() - 1))) T(*it);
|
||||
++it;
|
||||
}
|
||||
assign(other.begin(), other.end());
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -314,16 +325,20 @@ public:
|
|||
}
|
||||
|
||||
void resize(size_type new_size) {
|
||||
if (size() > new_size) {
|
||||
size_type cur_size = size();
|
||||
if (cur_size == new_size) {
|
||||
return;
|
||||
}
|
||||
if (cur_size > new_size) {
|
||||
erase(item_ptr(new_size), end());
|
||||
return;
|
||||
}
|
||||
if (new_size > capacity()) {
|
||||
change_capacity(new_size);
|
||||
}
|
||||
while (size() < new_size) {
|
||||
_size++;
|
||||
new(static_cast<void*>(item_ptr(size() - 1))) T();
|
||||
}
|
||||
ptrdiff_t increase = new_size - cur_size;
|
||||
fill(item_ptr(cur_size), increase);
|
||||
_size += increase;
|
||||
}
|
||||
|
||||
void reserve(size_type new_capacity) {
|
||||
|
@ -346,10 +361,11 @@ public:
|
|||
if (capacity() < new_size) {
|
||||
change_capacity(new_size + (new_size >> 1));
|
||||
}
|
||||
memmove(item_ptr(p + 1), item_ptr(p), (size() - p) * sizeof(T));
|
||||
T* ptr = item_ptr(p);
|
||||
memmove(ptr + 1, ptr, (size() - p) * sizeof(T));
|
||||
_size++;
|
||||
new(static_cast<void*>(item_ptr(p))) T(value);
|
||||
return iterator(item_ptr(p));
|
||||
new(static_cast<void*>(ptr)) T(value);
|
||||
return iterator(ptr);
|
||||
}
|
||||
|
||||
void insert(iterator pos, size_type count, const T& value) {
|
||||
|
@ -358,11 +374,10 @@ public:
|
|||
if (capacity() < new_size) {
|
||||
change_capacity(new_size + (new_size >> 1));
|
||||
}
|
||||
memmove(item_ptr(p + count), item_ptr(p), (size() - p) * sizeof(T));
|
||||
T* ptr = item_ptr(p);
|
||||
memmove(ptr + count, ptr, (size() - p) * sizeof(T));
|
||||
_size += count;
|
||||
for (size_type i = 0; i < count; i++) {
|
||||
new(static_cast<void*>(item_ptr(p + i))) T(value);
|
||||
}
|
||||
fill(item_ptr(p), count, value);
|
||||
}
|
||||
|
||||
template<typename InputIterator>
|
||||
|
@ -373,13 +388,10 @@ public:
|
|||
if (capacity() < new_size) {
|
||||
change_capacity(new_size + (new_size >> 1));
|
||||
}
|
||||
memmove(item_ptr(p + count), item_ptr(p), (size() - p) * sizeof(T));
|
||||
T* ptr = item_ptr(p);
|
||||
memmove(ptr + count, ptr, (size() - p) * sizeof(T));
|
||||
_size += count;
|
||||
while (first != last) {
|
||||
new(static_cast<void*>(item_ptr(p))) T(*first);
|
||||
++p;
|
||||
++first;
|
||||
}
|
||||
fill(ptr, first, last);
|
||||
}
|
||||
|
||||
iterator erase(iterator pos) {
|
||||
|
|
Loading…
Add table
Reference in a new issue