mirror of
https://github.com/bitcoin/bitcoin.git
synced 2025-02-13 11:25:02 -05:00
Merge #12549: Make prevector::resize() and other prevector operations much faster
5aad635
Use memset() to optimize prevector::resize() (Evan Klitzke)e46be25
Reduce redundant code of prevector and speed it up (Akio Nakamura)f0e7aa7
Add new prevector benchmarks. (Evan Klitzke) Pull request description: This branch optimizes various `prevector` operations, especially resizing vectors. While profiling the `loadblk` thread I noticed that a lot of time was being spent in `prevector::resize()` which led to this work. I have some data here indicating that it takes up **37%** of the time in `ReadBlockFromDisk()`: https://monad.io/readblockfromdisk.svg This branch improves things significantly. For trivial types, the new results for the prevector benchmark are: * `PrevectorClearTrivial` which tests `prevector::clear()` becomes 24.6x faster * `PrevectorDestructorTrivial` which tests `prevector::~prevector()` becomes 20.5x faster * `PrevectorResizeTrivial` which tests `prevector::resize()` becomes 20.3x faster Note that in practice it looks like the prevector is only used to contain `unsigned char` types, which is a trivial type. The benchmarks are testing a bit of an extreme case, but the changes here are motivated by the profiling data for `ReadBlockFromDisk()` I linked to above. The pull request here consists of a series of three commits: * The first adds new benchmarks but does not change the prevector code. * The second is from @AkioNak , and merges some prevector optimizations he submitted in #11988 * The third optimizes `prevector::resize()` to use `memset()` when the prevector contains trivially constructible types Tree-SHA512: 28f7cbb91a19f9f43b6a5942781d7eb2e3197389186b666f086b69df12bee37773140f765426d715bfb8ebff79cb27a5f1206d0325b54b4aa65598b50fb18368
This commit is contained in:
commit
32987d5aeb
5 changed files with 151 additions and 88 deletions
|
@ -27,7 +27,7 @@ bench_bench_bitcoin_SOURCES = \
|
||||||
bench/lockedpool.cpp \
|
bench/lockedpool.cpp \
|
||||||
bench/perf.cpp \
|
bench/perf.cpp \
|
||||||
bench/perf.h \
|
bench/perf.h \
|
||||||
bench/prevector_destructor.cpp
|
bench/prevector.cpp
|
||||||
|
|
||||||
nodist_bench_bench_bitcoin_SOURCES = $(GENERATED_BENCH_FILES)
|
nodist_bench_bench_bitcoin_SOURCES = $(GENERATED_BENCH_FILES)
|
||||||
|
|
||||||
|
|
77
src/bench/prevector.cpp
Normal file
77
src/bench/prevector.cpp
Normal file
|
@ -0,0 +1,77 @@
|
||||||
|
// Copyright (c) 2015-2017 The Bitcoin Core developers
|
||||||
|
// Distributed under the MIT software license, see the accompanying
|
||||||
|
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
||||||
|
|
||||||
|
#include <compat.h>
|
||||||
|
#include <prevector.h>
|
||||||
|
|
||||||
|
#include <bench/bench.h>
|
||||||
|
|
||||||
|
struct nontrivial_t {
|
||||||
|
int x;
|
||||||
|
nontrivial_t() :x(-1) {}
|
||||||
|
};
|
||||||
|
static_assert(!IS_TRIVIALLY_CONSTRUCTIBLE<nontrivial_t>::value,
|
||||||
|
"expected nontrivial_t to not be trivially constructible");
|
||||||
|
|
||||||
|
typedef unsigned char trivial_t;
|
||||||
|
static_assert(IS_TRIVIALLY_CONSTRUCTIBLE<trivial_t>::value,
|
||||||
|
"expected trivial_t to be trivially constructible");
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static void PrevectorDestructor(benchmark::State& state)
|
||||||
|
{
|
||||||
|
while (state.KeepRunning()) {
|
||||||
|
for (auto x = 0; x < 1000; ++x) {
|
||||||
|
prevector<28, T> t0;
|
||||||
|
prevector<28, T> t1;
|
||||||
|
t0.resize(28);
|
||||||
|
t1.resize(29);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
static void PrevectorClear(benchmark::State& state)
|
||||||
|
{
|
||||||
|
|
||||||
|
while (state.KeepRunning()) {
|
||||||
|
for (auto x = 0; x < 1000; ++x) {
|
||||||
|
prevector<28, T> t0;
|
||||||
|
prevector<28, T> t1;
|
||||||
|
t0.resize(28);
|
||||||
|
t0.clear();
|
||||||
|
t1.resize(29);
|
||||||
|
t0.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void PrevectorResize(benchmark::State& state)
|
||||||
|
{
|
||||||
|
while (state.KeepRunning()) {
|
||||||
|
prevector<28, T> t0;
|
||||||
|
prevector<28, T> t1;
|
||||||
|
for (auto x = 0; x < 1000; ++x) {
|
||||||
|
t0.resize(28);
|
||||||
|
t0.resize(0);
|
||||||
|
t1.resize(29);
|
||||||
|
t1.resize(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#define PREVECTOR_TEST(name, nontrivops, trivops) \
|
||||||
|
static void Prevector ## name ## Nontrivial(benchmark::State& state) { \
|
||||||
|
PrevectorResize<nontrivial_t>(state); \
|
||||||
|
} \
|
||||||
|
BENCHMARK(Prevector ## name ## Nontrivial, nontrivops); \
|
||||||
|
static void Prevector ## name ## Trivial(benchmark::State& state) { \
|
||||||
|
PrevectorResize<trivial_t>(state); \
|
||||||
|
} \
|
||||||
|
BENCHMARK(Prevector ## name ## Trivial, trivops);
|
||||||
|
|
||||||
|
PREVECTOR_TEST(Clear, 28300, 88600)
|
||||||
|
PREVECTOR_TEST(Destructor, 28800, 88900)
|
||||||
|
PREVECTOR_TEST(Resize, 28900, 90300)
|
|
@ -1,36 +0,0 @@
|
||||||
// Copyright (c) 2015-2017 The Bitcoin Core developers
|
|
||||||
// Distributed under the MIT software license, see the accompanying
|
|
||||||
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
|
||||||
|
|
||||||
#include <bench/bench.h>
|
|
||||||
#include <prevector.h>
|
|
||||||
|
|
||||||
static void PrevectorDestructor(benchmark::State& state)
|
|
||||||
{
|
|
||||||
while (state.KeepRunning()) {
|
|
||||||
for (auto x = 0; x < 1000; ++x) {
|
|
||||||
prevector<28, unsigned char> t0;
|
|
||||||
prevector<28, unsigned char> t1;
|
|
||||||
t0.resize(28);
|
|
||||||
t1.resize(29);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void PrevectorClear(benchmark::State& state)
|
|
||||||
{
|
|
||||||
|
|
||||||
while (state.KeepRunning()) {
|
|
||||||
for (auto x = 0; x < 1000; ++x) {
|
|
||||||
prevector<28, unsigned char> t0;
|
|
||||||
prevector<28, unsigned char> t1;
|
|
||||||
t0.resize(28);
|
|
||||||
t0.clear();
|
|
||||||
t1.resize(29);
|
|
||||||
t0.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
BENCHMARK(PrevectorDestructor, 5700);
|
|
||||||
BENCHMARK(PrevectorClear, 5600);
|
|
10
src/compat.h
10
src/compat.h
|
@ -10,6 +10,16 @@
|
||||||
#include <config/bitcoin-config.h>
|
#include <config/bitcoin-config.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
// GCC 4.8 is missing some C++11 type_traits,
|
||||||
|
// https://www.gnu.org/software/gcc/gcc-5/changes.html
|
||||||
|
#if defined(__GNUC__) && __GNUC__ < 5
|
||||||
|
#define IS_TRIVIALLY_CONSTRUCTIBLE std::is_trivial
|
||||||
|
#else
|
||||||
|
#define IS_TRIVIALLY_CONSTRUCTIBLE std::is_trivially_constructible
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef WIN32
|
#ifdef WIN32
|
||||||
#ifdef _WIN32_WINNT
|
#ifdef _WIN32_WINNT
|
||||||
#undef _WIN32_WINNT
|
#undef _WIN32_WINNT
|
||||||
|
|
114
src/prevector.h
114
src/prevector.h
|
@ -10,9 +10,12 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
|
#include <compat.h>
|
||||||
|
|
||||||
#pragma pack(push, 1)
|
#pragma pack(push, 1)
|
||||||
/** Implements a drop-in replacement for std::vector<T> which stores up to N
|
/** Implements a drop-in replacement for std::vector<T> which stores up to N
|
||||||
* elements directly (without heap allocation). The types Size and Diff are
|
* elements directly (without heap allocation). The types Size and Diff are
|
||||||
|
@ -194,16 +197,42 @@ private:
|
||||||
T* item_ptr(difference_type pos) { return is_direct() ? direct_ptr(pos) : indirect_ptr(pos); }
|
T* item_ptr(difference_type pos) { return is_direct() ? direct_ptr(pos) : indirect_ptr(pos); }
|
||||||
const T* item_ptr(difference_type pos) const { return is_direct() ? direct_ptr(pos) : indirect_ptr(pos); }
|
const T* item_ptr(difference_type pos) const { return is_direct() ? direct_ptr(pos) : indirect_ptr(pos); }
|
||||||
|
|
||||||
|
void fill(T* dst, ptrdiff_t count) {
|
||||||
|
if (IS_TRIVIALLY_CONSTRUCTIBLE<T>::value) {
|
||||||
|
// The most common use of prevector is where T=unsigned char. For
|
||||||
|
// trivially constructible types, we can use memset() to avoid
|
||||||
|
// looping.
|
||||||
|
::memset(dst, 0, count * sizeof(T));
|
||||||
|
} else {
|
||||||
|
for (auto i = 0; i < count; ++i) {
|
||||||
|
new(static_cast<void*>(dst + i)) T();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void fill(T* dst, ptrdiff_t count, const T& value) {
|
||||||
|
for (auto i = 0; i < count; ++i) {
|
||||||
|
new(static_cast<void*>(dst + i)) T(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename InputIterator>
|
||||||
|
void fill(T* dst, InputIterator first, InputIterator last) {
|
||||||
|
while (first != last) {
|
||||||
|
new(static_cast<void*>(dst)) T(*first);
|
||||||
|
++dst;
|
||||||
|
++first;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
void assign(size_type n, const T& val) {
|
void assign(size_type n, const T& val) {
|
||||||
clear();
|
clear();
|
||||||
if (capacity() < n) {
|
if (capacity() < n) {
|
||||||
change_capacity(n);
|
change_capacity(n);
|
||||||
}
|
}
|
||||||
while (size() < n) {
|
_size += n;
|
||||||
_size++;
|
fill(item_ptr(0), n, val);
|
||||||
new(static_cast<void*>(item_ptr(size() - 1))) T(val);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename InputIterator>
|
template<typename InputIterator>
|
||||||
|
@ -213,11 +242,8 @@ public:
|
||||||
if (capacity() < n) {
|
if (capacity() < n) {
|
||||||
change_capacity(n);
|
change_capacity(n);
|
||||||
}
|
}
|
||||||
while (first != last) {
|
_size += n;
|
||||||
_size++;
|
fill(item_ptr(0), first, last);
|
||||||
new(static_cast<void*>(item_ptr(size() - 1))) T(*first);
|
|
||||||
++first;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
prevector() : _size(0), _union{{}} {}
|
prevector() : _size(0), _union{{}} {}
|
||||||
|
@ -228,31 +254,23 @@ public:
|
||||||
|
|
||||||
explicit prevector(size_type n, const T& val = T()) : _size(0) {
|
explicit prevector(size_type n, const T& val = T()) : _size(0) {
|
||||||
change_capacity(n);
|
change_capacity(n);
|
||||||
while (size() < n) {
|
_size += n;
|
||||||
_size++;
|
fill(item_ptr(0), n, val);
|
||||||
new(static_cast<void*>(item_ptr(size() - 1))) T(val);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename InputIterator>
|
template<typename InputIterator>
|
||||||
prevector(InputIterator first, InputIterator last) : _size(0) {
|
prevector(InputIterator first, InputIterator last) : _size(0) {
|
||||||
size_type n = last - first;
|
size_type n = last - first;
|
||||||
change_capacity(n);
|
change_capacity(n);
|
||||||
while (first != last) {
|
_size += n;
|
||||||
_size++;
|
fill(item_ptr(0), first, last);
|
||||||
new(static_cast<void*>(item_ptr(size() - 1))) T(*first);
|
|
||||||
++first;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
prevector(const prevector<N, T, Size, Diff>& other) : _size(0) {
|
prevector(const prevector<N, T, Size, Diff>& other) : _size(0) {
|
||||||
change_capacity(other.size());
|
size_type n = other.size();
|
||||||
const_iterator it = other.begin();
|
change_capacity(n);
|
||||||
while (it != other.end()) {
|
_size += n;
|
||||||
_size++;
|
fill(item_ptr(0), other.begin(), other.end());
|
||||||
new(static_cast<void*>(item_ptr(size() - 1))) T(*it);
|
|
||||||
++it;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
prevector(prevector<N, T, Size, Diff>&& other) : _size(0) {
|
prevector(prevector<N, T, Size, Diff>&& other) : _size(0) {
|
||||||
|
@ -263,14 +281,7 @@ public:
|
||||||
if (&other == this) {
|
if (&other == this) {
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
resize(0);
|
assign(other.begin(), other.end());
|
||||||
change_capacity(other.size());
|
|
||||||
const_iterator it = other.begin();
|
|
||||||
while (it != other.end()) {
|
|
||||||
_size++;
|
|
||||||
new(static_cast<void*>(item_ptr(size() - 1))) T(*it);
|
|
||||||
++it;
|
|
||||||
}
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -314,16 +325,20 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
void resize(size_type new_size) {
|
void resize(size_type new_size) {
|
||||||
if (size() > new_size) {
|
size_type cur_size = size();
|
||||||
|
if (cur_size == new_size) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (cur_size > new_size) {
|
||||||
erase(item_ptr(new_size), end());
|
erase(item_ptr(new_size), end());
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
if (new_size > capacity()) {
|
if (new_size > capacity()) {
|
||||||
change_capacity(new_size);
|
change_capacity(new_size);
|
||||||
}
|
}
|
||||||
while (size() < new_size) {
|
ptrdiff_t increase = new_size - cur_size;
|
||||||
_size++;
|
fill(item_ptr(cur_size), increase);
|
||||||
new(static_cast<void*>(item_ptr(size() - 1))) T();
|
_size += increase;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void reserve(size_type new_capacity) {
|
void reserve(size_type new_capacity) {
|
||||||
|
@ -346,10 +361,11 @@ public:
|
||||||
if (capacity() < new_size) {
|
if (capacity() < new_size) {
|
||||||
change_capacity(new_size + (new_size >> 1));
|
change_capacity(new_size + (new_size >> 1));
|
||||||
}
|
}
|
||||||
memmove(item_ptr(p + 1), item_ptr(p), (size() - p) * sizeof(T));
|
T* ptr = item_ptr(p);
|
||||||
|
memmove(ptr + 1, ptr, (size() - p) * sizeof(T));
|
||||||
_size++;
|
_size++;
|
||||||
new(static_cast<void*>(item_ptr(p))) T(value);
|
new(static_cast<void*>(ptr)) T(value);
|
||||||
return iterator(item_ptr(p));
|
return iterator(ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
void insert(iterator pos, size_type count, const T& value) {
|
void insert(iterator pos, size_type count, const T& value) {
|
||||||
|
@ -358,11 +374,10 @@ public:
|
||||||
if (capacity() < new_size) {
|
if (capacity() < new_size) {
|
||||||
change_capacity(new_size + (new_size >> 1));
|
change_capacity(new_size + (new_size >> 1));
|
||||||
}
|
}
|
||||||
memmove(item_ptr(p + count), item_ptr(p), (size() - p) * sizeof(T));
|
T* ptr = item_ptr(p);
|
||||||
|
memmove(ptr + count, ptr, (size() - p) * sizeof(T));
|
||||||
_size += count;
|
_size += count;
|
||||||
for (size_type i = 0; i < count; i++) {
|
fill(item_ptr(p), count, value);
|
||||||
new(static_cast<void*>(item_ptr(p + i))) T(value);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename InputIterator>
|
template<typename InputIterator>
|
||||||
|
@ -373,13 +388,10 @@ public:
|
||||||
if (capacity() < new_size) {
|
if (capacity() < new_size) {
|
||||||
change_capacity(new_size + (new_size >> 1));
|
change_capacity(new_size + (new_size >> 1));
|
||||||
}
|
}
|
||||||
memmove(item_ptr(p + count), item_ptr(p), (size() - p) * sizeof(T));
|
T* ptr = item_ptr(p);
|
||||||
|
memmove(ptr + count, ptr, (size() - p) * sizeof(T));
|
||||||
_size += count;
|
_size += count;
|
||||||
while (first != last) {
|
fill(ptr, first, last);
|
||||||
new(static_cast<void*>(item_ptr(p))) T(*first);
|
|
||||||
++p;
|
|
||||||
++first;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
iterator erase(iterator pos) {
|
iterator erase(iterator pos) {
|
||||||
|
|
Loading…
Add table
Reference in a new issue