0
0
Fork 0
mirror of https://github.com/bitcoin/bitcoin.git synced 2025-02-09 10:43:19 -05:00

Merge #12549: Make prevector::resize() and other prevector operations much faster

5aad635 Use memset() to optimize prevector::resize() (Evan Klitzke)
e46be25 Reduce redundant code of prevector and speed it up (Akio Nakamura)
f0e7aa7 Add new prevector benchmarks. (Evan Klitzke)

Pull request description:

  This branch optimizes various `prevector` operations, especially resizing vectors. While profiling the `loadblk` thread I noticed that a lot of time was being spent in `prevector::resize()` which led to this work. I have some data here indicating that it takes up **37%** of the time in `ReadBlockFromDisk()`: https://monad.io/readblockfromdisk.svg

  This branch improves things significantly. For trivial types, the new results for the prevector benchmark are:

   * `PrevectorClearTrivial` which tests `prevector::clear()` becomes 24.6x faster
   * `PrevectorDestructorTrivial` which tests `prevector::~prevector()` becomes 20.5x faster
   * `PrevectorResizeTrivial` which tests `prevector::resize()` becomes 20.3x faster

  Note that in practice it looks like the prevector is only used to contain `unsigned char` types, which is a trivial type. The benchmarks are testing a bit of an extreme case, but the changes here are motivated by the profiling data for `ReadBlockFromDisk()` I linked to above.

  The pull request here consists of a series of three commits:
   * The first adds new benchmarks but does not change the prevector code.
   * The second is from @AkioNak , and merges some prevector optimizations he submitted in #11988
   * The third optimizes `prevector::resize()` to use `memset()` when the prevector contains trivially constructible types

Tree-SHA512: 28f7cbb91a19f9f43b6a5942781d7eb2e3197389186b666f086b69df12bee37773140f765426d715bfb8ebff79cb27a5f1206d0325b54b4aa65598b50fb18368
This commit is contained in:
Wladimir J. van der Laan 2018-03-01 12:12:55 +01:00
commit 32987d5aeb
No known key found for this signature in database
GPG key ID: 1E4AED62986CD25D
5 changed files with 151 additions and 88 deletions

View file

@ -27,7 +27,7 @@ bench_bench_bitcoin_SOURCES = \
bench/lockedpool.cpp \
bench/perf.cpp \
bench/perf.h \
bench/prevector_destructor.cpp
bench/prevector.cpp
nodist_bench_bench_bitcoin_SOURCES = $(GENERATED_BENCH_FILES)

77
src/bench/prevector.cpp Normal file
View file

@ -0,0 +1,77 @@
// Copyright (c) 2015-2017 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include <compat.h>
#include <prevector.h>
#include <bench/bench.h>
struct nontrivial_t {
int x;
nontrivial_t() :x(-1) {}
};
static_assert(!IS_TRIVIALLY_CONSTRUCTIBLE<nontrivial_t>::value,
"expected nontrivial_t to not be trivially constructible");
typedef unsigned char trivial_t;
static_assert(IS_TRIVIALLY_CONSTRUCTIBLE<trivial_t>::value,
"expected trivial_t to be trivially constructible");
template <typename T>
static void PrevectorDestructor(benchmark::State& state)
{
while (state.KeepRunning()) {
for (auto x = 0; x < 1000; ++x) {
prevector<28, T> t0;
prevector<28, T> t1;
t0.resize(28);
t1.resize(29);
}
}
}
template <typename T>
static void PrevectorClear(benchmark::State& state)
{
while (state.KeepRunning()) {
for (auto x = 0; x < 1000; ++x) {
prevector<28, T> t0;
prevector<28, T> t1;
t0.resize(28);
t0.clear();
t1.resize(29);
t0.clear();
}
}
}
template <typename T>
void PrevectorResize(benchmark::State& state)
{
while (state.KeepRunning()) {
prevector<28, T> t0;
prevector<28, T> t1;
for (auto x = 0; x < 1000; ++x) {
t0.resize(28);
t0.resize(0);
t1.resize(29);
t1.resize(0);
}
}
}
#define PREVECTOR_TEST(name, nontrivops, trivops) \
static void Prevector ## name ## Nontrivial(benchmark::State& state) { \
PrevectorResize<nontrivial_t>(state); \
} \
BENCHMARK(Prevector ## name ## Nontrivial, nontrivops); \
static void Prevector ## name ## Trivial(benchmark::State& state) { \
PrevectorResize<trivial_t>(state); \
} \
BENCHMARK(Prevector ## name ## Trivial, trivops);
PREVECTOR_TEST(Clear, 28300, 88600)
PREVECTOR_TEST(Destructor, 28800, 88900)
PREVECTOR_TEST(Resize, 28900, 90300)

View file

@ -1,36 +0,0 @@
// Copyright (c) 2015-2017 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
#include <bench/bench.h>
#include <prevector.h>
static void PrevectorDestructor(benchmark::State& state)
{
while (state.KeepRunning()) {
for (auto x = 0; x < 1000; ++x) {
prevector<28, unsigned char> t0;
prevector<28, unsigned char> t1;
t0.resize(28);
t1.resize(29);
}
}
}
static void PrevectorClear(benchmark::State& state)
{
while (state.KeepRunning()) {
for (auto x = 0; x < 1000; ++x) {
prevector<28, unsigned char> t0;
prevector<28, unsigned char> t1;
t0.resize(28);
t0.clear();
t1.resize(29);
t0.clear();
}
}
}
BENCHMARK(PrevectorDestructor, 5700);
BENCHMARK(PrevectorClear, 5600);

View file

@ -10,6 +10,16 @@
#include <config/bitcoin-config.h>
#endif
#include <type_traits>
// GCC 4.8 is missing some C++11 type_traits,
// https://www.gnu.org/software/gcc/gcc-5/changes.html
#if defined(__GNUC__) && __GNUC__ < 5
#define IS_TRIVIALLY_CONSTRUCTIBLE std::is_trivial
#else
#define IS_TRIVIALLY_CONSTRUCTIBLE std::is_trivially_constructible
#endif
#ifdef WIN32
#ifdef _WIN32_WINNT
#undef _WIN32_WINNT

View file

@ -10,9 +10,12 @@
#include <stdint.h>
#include <string.h>
#include <cstddef>
#include <iterator>
#include <type_traits>
#include <compat.h>
#pragma pack(push, 1)
/** Implements a drop-in replacement for std::vector<T> which stores up to N
* elements directly (without heap allocation). The types Size and Diff are
@ -194,16 +197,42 @@ private:
T* item_ptr(difference_type pos) { return is_direct() ? direct_ptr(pos) : indirect_ptr(pos); }
const T* item_ptr(difference_type pos) const { return is_direct() ? direct_ptr(pos) : indirect_ptr(pos); }
void fill(T* dst, ptrdiff_t count) {
if (IS_TRIVIALLY_CONSTRUCTIBLE<T>::value) {
// The most common use of prevector is where T=unsigned char. For
// trivially constructible types, we can use memset() to avoid
// looping.
::memset(dst, 0, count * sizeof(T));
} else {
for (auto i = 0; i < count; ++i) {
new(static_cast<void*>(dst + i)) T();
}
}
}
void fill(T* dst, ptrdiff_t count, const T& value) {
for (auto i = 0; i < count; ++i) {
new(static_cast<void*>(dst + i)) T(value);
}
}
template<typename InputIterator>
void fill(T* dst, InputIterator first, InputIterator last) {
while (first != last) {
new(static_cast<void*>(dst)) T(*first);
++dst;
++first;
}
}
public:
void assign(size_type n, const T& val) {
clear();
if (capacity() < n) {
change_capacity(n);
}
while (size() < n) {
_size++;
new(static_cast<void*>(item_ptr(size() - 1))) T(val);
}
_size += n;
fill(item_ptr(0), n, val);
}
template<typename InputIterator>
@ -213,11 +242,8 @@ public:
if (capacity() < n) {
change_capacity(n);
}
while (first != last) {
_size++;
new(static_cast<void*>(item_ptr(size() - 1))) T(*first);
++first;
}
_size += n;
fill(item_ptr(0), first, last);
}
prevector() : _size(0), _union{{}} {}
@ -228,31 +254,23 @@ public:
explicit prevector(size_type n, const T& val = T()) : _size(0) {
change_capacity(n);
while (size() < n) {
_size++;
new(static_cast<void*>(item_ptr(size() - 1))) T(val);
}
_size += n;
fill(item_ptr(0), n, val);
}
template<typename InputIterator>
prevector(InputIterator first, InputIterator last) : _size(0) {
size_type n = last - first;
change_capacity(n);
while (first != last) {
_size++;
new(static_cast<void*>(item_ptr(size() - 1))) T(*first);
++first;
}
_size += n;
fill(item_ptr(0), first, last);
}
prevector(const prevector<N, T, Size, Diff>& other) : _size(0) {
change_capacity(other.size());
const_iterator it = other.begin();
while (it != other.end()) {
_size++;
new(static_cast<void*>(item_ptr(size() - 1))) T(*it);
++it;
}
size_type n = other.size();
change_capacity(n);
_size += n;
fill(item_ptr(0), other.begin(), other.end());
}
prevector(prevector<N, T, Size, Diff>&& other) : _size(0) {
@ -263,14 +281,7 @@ public:
if (&other == this) {
return *this;
}
resize(0);
change_capacity(other.size());
const_iterator it = other.begin();
while (it != other.end()) {
_size++;
new(static_cast<void*>(item_ptr(size() - 1))) T(*it);
++it;
}
assign(other.begin(), other.end());
return *this;
}
@ -314,16 +325,20 @@ public:
}
void resize(size_type new_size) {
if (size() > new_size) {
size_type cur_size = size();
if (cur_size == new_size) {
return;
}
if (cur_size > new_size) {
erase(item_ptr(new_size), end());
return;
}
if (new_size > capacity()) {
change_capacity(new_size);
}
while (size() < new_size) {
_size++;
new(static_cast<void*>(item_ptr(size() - 1))) T();
}
ptrdiff_t increase = new_size - cur_size;
fill(item_ptr(cur_size), increase);
_size += increase;
}
void reserve(size_type new_capacity) {
@ -346,10 +361,11 @@ public:
if (capacity() < new_size) {
change_capacity(new_size + (new_size >> 1));
}
memmove(item_ptr(p + 1), item_ptr(p), (size() - p) * sizeof(T));
T* ptr = item_ptr(p);
memmove(ptr + 1, ptr, (size() - p) * sizeof(T));
_size++;
new(static_cast<void*>(item_ptr(p))) T(value);
return iterator(item_ptr(p));
new(static_cast<void*>(ptr)) T(value);
return iterator(ptr);
}
void insert(iterator pos, size_type count, const T& value) {
@ -358,11 +374,10 @@ public:
if (capacity() < new_size) {
change_capacity(new_size + (new_size >> 1));
}
memmove(item_ptr(p + count), item_ptr(p), (size() - p) * sizeof(T));
T* ptr = item_ptr(p);
memmove(ptr + count, ptr, (size() - p) * sizeof(T));
_size += count;
for (size_type i = 0; i < count; i++) {
new(static_cast<void*>(item_ptr(p + i))) T(value);
}
fill(item_ptr(p), count, value);
}
template<typename InputIterator>
@ -373,13 +388,10 @@ public:
if (capacity() < new_size) {
change_capacity(new_size + (new_size >> 1));
}
memmove(item_ptr(p + count), item_ptr(p), (size() - p) * sizeof(T));
T* ptr = item_ptr(p);
memmove(ptr + count, ptr, (size() - p) * sizeof(T));
_size += count;
while (first != last) {
new(static_cast<void*>(item_ptr(p))) T(*first);
++p;
++first;
}
fill(ptr, first, last);
}
iterator erase(iterator pos) {