From 2a41f151afb82466486402e250327e22319c754e Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 8 May 2024 18:09:34 -0400 Subject: [PATCH] clusterlin: add SearchCandidateFinder class Similar to AncestorCandidateFinder, this encapsulates the state needed for finding good candidate sets using a search algorithm. --- src/cluster_linearize.h | 171 +++++++++++++++++++++ src/test/fuzz/cluster_linearize.cpp | 221 ++++++++++++++++++++++++++++ 2 files changed, 392 insertions(+) diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index 03ee894ae3..39b6881544 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -5,6 +5,8 @@ #ifndef BITCOIN_CLUSTER_LINEARIZE_H #define BITCOIN_CLUSTER_LINEARIZE_H +#include +#include #include #include #include @@ -176,6 +178,9 @@ struct SetInfo /** Their combined fee and size. */ FeeFrac feerate; + /** Construct a SetInfo for the empty set. */ + SetInfo() noexcept = default; + /** Construct a SetInfo for a specified set and feerate. */ SetInfo(const SetType& txn, const FeeFrac& fr) noexcept : transactions(txn), feerate(fr) {} @@ -183,6 +188,13 @@ struct SetInfo explicit SetInfo(const DepGraph& depgraph, const SetType& txn) noexcept : transactions(txn), feerate(depgraph.FeeRate(txn)) {} + /** Construct a new SetInfo equal to this, with more transactions added (which may overlap + * with the existing transactions in the SetInfo). */ + [[nodiscard]] SetInfo Add(const DepGraph& depgraph, const SetType& txn) const noexcept + { + return {transactions | txn, feerate + depgraph.FeeRate(txn - transactions)}; + } + /** Permit equality testing. */ friend bool operator==(const SetInfo&, const SetInfo&) noexcept = default; }; @@ -283,6 +295,165 @@ public: } }; +/** Class encapsulating the state needed to perform search for good candidate sets. + * + * It is initialized for an entire DepGraph, and parts of the graph can be dropped by calling + * MarkDone(). + * + * As long as any part of the graph remains, FindCandidateSet() can be called to perform a search + * over the set of topologically-valid subsets of that remainder, with a limit on how many + * combinations are tried. + */ +template +class SearchCandidateFinder +{ + /** Internal dependency graph for the cluster. */ + const DepGraph& m_depgraph; + /** Which transactions are left to do (sorted indices). */ + SetType m_todo; + +public: + /** Construct a candidate finder for a graph. + * + * @param[in] depgraph Dependency graph for the to-be-linearized cluster. + * + * Complexity: O(1). + */ + SearchCandidateFinder(const DepGraph& depgraph LIFETIMEBOUND) noexcept : + m_depgraph(depgraph), + m_todo(SetType::Fill(depgraph.TxCount())) {} + + /** Check whether any unlinearized transactions remain. */ + bool AllDone() const noexcept + { + return m_todo.None(); + } + + /** Find a high-feerate topologically-valid subset of what remains of the cluster. + * Requires !AllDone(). + * + * @param[in] max_iterations The maximum number of optimization steps that will be performed. + * @param[in] best A set/feerate pair with an already-known good candidate. This may + * be empty. + * @return A pair of: + * - The best (highest feerate, smallest size as tiebreaker) + * topologically valid subset (and its feerate) that was + * encountered during search. It will be at least as good as the + * best passed in (if not empty). + * - The number of optimization steps that were performed. This will + * be <= max_iterations. If strictly < max_iterations, the + * returned subset is optimal. + * + * Complexity: O(N * min(max_iterations, 2^N)) where N=depgraph.TxCount(). + */ + std::pair, uint64_t> FindCandidateSet(uint64_t max_iterations, SetInfo best) noexcept + { + Assume(!AllDone()); + + /** Type for work queue items. */ + struct WorkItem + { + /** Set of transactions definitely included (and its feerate). This must be a subset + * of m_todo, and be topologically valid (includes all in-m_todo ancestors of + * itself). */ + SetInfo inc; + /** Set of undecided transactions. This must be a subset of m_todo, and have no overlap + * with inc. The set (inc | und) must be topologically valid. */ + SetType und; + + /** Construct a new work item. */ + WorkItem(SetInfo&& i, SetType&& u) noexcept : + inc(std::move(i)), und(std::move(u)) {} + }; + + /** The queue of work items. */ + std::vector queue; + + // Create an initial entry with m_todo as undecided. Also use it as best if not provided, + // so that during the work processing loop below, and during the add_fn/split_fn calls, we + // do not need to deal with the best=empty case. + if (best.feerate.IsEmpty()) best = SetInfo(m_depgraph, m_todo); + queue.emplace_back(SetInfo{}, SetType{m_todo}); + + /** Local copy of the iteration limit. */ + uint64_t iterations_left = max_iterations; + + /** Internal function to add an item to the queue of elements to explore if there are any + * transactions left to split on, and to update best. + * + * - inc: the "inc" value for the new work item (must be topological). + * - und: the "und" value for the new work item ((inc | und) must be topological). + */ + auto add_fn = [&](SetInfo inc, SetType und) noexcept { + if (!inc.feerate.IsEmpty()) { + // If inc's feerate is better than best's, remember it as our new best. + if (inc.feerate > best.feerate) { + best = inc; + } + } else { + Assume(inc.transactions.None()); + } + + // Make sure there are undecided transactions left to split on. + if (und.None()) return; + + // Actually construct a new work item on the queue. + queue.emplace_back(std::move(inc), std::move(und)); + }; + + /** Internal process function. It takes an existing work item, and splits it in two: one + * with a particular transaction (and its ancestors) included, and one with that + * transaction (and its descendants) excluded. */ + auto split_fn = [&](WorkItem&& elem) noexcept { + // Any queue element must have undecided transactions left, otherwise there is nothing + // to explore anymore. + Assume(elem.und.Any()); + // The included and undecided set are all subsets of m_todo. + Assume(elem.inc.transactions.IsSubsetOf(m_todo) && elem.und.IsSubsetOf(m_todo)); + // Included transactions cannot be undecided. + Assume(!elem.inc.transactions.Overlaps(elem.und)); + + // Pick the first undecided transaction as the one to split on. + const ClusterIndex split = elem.und.First(); + + // Add a work item corresponding to exclusion of the split transaction. + const auto& desc = m_depgraph.Descendants(split); + add_fn(/*inc=*/elem.inc, + /*und=*/elem.und - desc); + + // Add a work item corresponding to inclusion of the split transaction. + const auto anc = m_depgraph.Ancestors(split) & m_todo; + add_fn(/*inc=*/elem.inc.Add(m_depgraph, anc), + /*und=*/elem.und - anc); + + // Account for the performed split. + --iterations_left; + }; + + // Work processing loop. + while (!queue.empty()) { + if (!iterations_left) break; + auto elem = queue.back(); + queue.pop_back(); + split_fn(std::move(elem)); + } + + // Return the found best set and the number of iterations performed. + return {std::move(best), max_iterations - iterations_left}; + } + + /** Remove a subset of transactions from the cluster being linearized. + * + * Complexity: O(N) where N=done.Count(). + */ + void MarkDone(const SetType& done) noexcept + { + Assume(done.Any()); + Assume(done.IsSubsetOf(m_todo)); + m_todo -= done; + } +}; + } // namespace cluster_linearize #endif // BITCOIN_CLUSTER_LINEARIZE_H diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index 880fcb79aa..931862b12d 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -19,6 +19,127 @@ using namespace cluster_linearize; namespace { +/** A simple finder class for candidate sets. + * + * This class matches SearchCandidateFinder in interface and behavior, though with fewer + * optimizations. + */ +template +class SimpleCandidateFinder +{ + /** Internal dependency graph. */ + const DepGraph& m_depgraph; + /** Which transaction are left to include. */ + SetType m_todo; + +public: + /** Construct an SimpleCandidateFinder for a given graph. */ + SimpleCandidateFinder(const DepGraph& depgraph LIFETIMEBOUND) noexcept : + m_depgraph(depgraph), m_todo{SetType::Fill(depgraph.TxCount())} {} + + /** Remove a set of transactions from the set of to-be-linearized ones. */ + void MarkDone(SetType select) noexcept { m_todo -= select; } + + /** Determine whether unlinearized transactions remain. */ + bool AllDone() const noexcept { return m_todo.None(); } + + /** Find a candidate set using at most max_iterations iterations, and the number of iterations + * actually performed. If that number is less than max_iterations, then the result is optimal. + * + * Complexity: O(N * M), where M is the number of connected topological subsets of the cluster. + * That number is bounded by M <= 2^(N-1). + */ + std::pair, uint64_t> FindCandidateSet(uint64_t max_iterations) const noexcept + { + uint64_t iterations_left = max_iterations; + // Queue of work units. Each consists of: + // - inc: set of transactions definitely included + // - und: set of transactions that can be added to inc still + std::vector> queue; + // Initially we have just one queue element, with the entire graph in und. + queue.emplace_back(SetType{}, m_todo); + // Best solution so far. + SetInfo best(m_depgraph, m_todo); + // Process the queue. + while (!queue.empty() && iterations_left) { + --iterations_left; + // Pop top element of the queue. + auto [inc, und] = queue.back(); + queue.pop_back(); + // Look for a transaction to consider adding/removing. + bool inc_none = inc.None(); + for (auto split : und) { + // If inc is empty, consider any split transaction. Otherwise only consider + // transactions that share ancestry with inc so far (which means only connected + // sets will be considered). + if (inc_none || inc.Overlaps(m_depgraph.Ancestors(split))) { + // Add a queue entry with split included. + SetInfo new_inc(m_depgraph, inc | (m_todo & m_depgraph.Ancestors(split))); + queue.emplace_back(new_inc.transactions, und - new_inc.transactions); + // Add a queue entry with split excluded. + queue.emplace_back(inc, und - m_depgraph.Descendants(split)); + // Update statistics to account for the candidate new_inc. + if (new_inc.feerate > best.feerate) best = new_inc; + break; + } + } + } + return {std::move(best), max_iterations - iterations_left}; + } +}; + +/** A very simple finder class for optimal candidate sets, which tries every subset. + * + * It is even simpler than SimpleCandidateFinder, and is primarily included here to test the + * correctness of SimpleCandidateFinder, which is then used to test the correctness of + * SearchCandidateFinder. + */ +template +class ExhaustiveCandidateFinder +{ + /** Internal dependency graph. */ + const DepGraph& m_depgraph; + /** Which transaction are left to include. */ + SetType m_todo; + +public: + /** Construct an ExhaustiveCandidateFinder for a given graph. */ + ExhaustiveCandidateFinder(const DepGraph& depgraph LIFETIMEBOUND) noexcept : + m_depgraph(depgraph), m_todo{SetType::Fill(depgraph.TxCount())} {} + + /** Remove a set of transactions from the set of to-be-linearized ones. */ + void MarkDone(SetType select) noexcept { m_todo -= select; } + + /** Determine whether unlinearized transactions remain. */ + bool AllDone() const noexcept { return m_todo.None(); } + + /** Find the optimal remaining candidate set. + * + * Complexity: O(N * 2^N). + */ + SetInfo FindCandidateSet() const noexcept + { + // Best solution so far. + SetInfo best{m_todo, m_depgraph.FeeRate(m_todo)}; + // The number of combinations to try. + uint64_t limit = (uint64_t{1} << m_todo.Count()) - 1; + // Try the transitive closure of every non-empty subset of m_todo. + for (uint64_t x = 1; x < limit; ++x) { + // If bit number b is set in x, then the remaining ancestors of the b'th remaining + // transaction in m_todo are included. + SetType txn; + auto x_shifted{x}; + for (auto i : m_todo) { + if (x_shifted & 1) txn |= m_depgraph.Ancestors(i); + x_shifted >>= 1; + } + SetInfo cur(m_depgraph, txn & m_todo); + if (cur.feerate > best.feerate) best = cur; + } + return best; + } +}; + /** Given a dependency graph, and a todo set, read a topological subset of todo from reader. */ template SetType ReadTopologicalSet(const DepGraph& depgraph, const SetType& todo, SpanReader& reader) @@ -157,3 +278,103 @@ FUZZ_TARGET(clusterlin_ancestor_finder) } assert(anc_finder.AllDone()); } + +static constexpr auto MAX_SIMPLE_ITERATIONS = 300000; + +FUZZ_TARGET(clusterlin_search_finder) +{ + // Verify that SearchCandidateFinder works as expected by sanity checking the results + // and comparing with the results from SimpleCandidateFinder, ExhaustiveCandidateFinder, and + // AncestorCandidateFinder. + + // Retrieve a depgraph from the fuzz input. + SpanReader reader(buffer); + DepGraph depgraph; + try { + reader >> Using(depgraph); + } catch (const std::ios_base::failure&) {} + + // Instantiate ALL the candidate finders. + SearchCandidateFinder src_finder(depgraph); + SimpleCandidateFinder smp_finder(depgraph); + ExhaustiveCandidateFinder exh_finder(depgraph); + AncestorCandidateFinder anc_finder(depgraph); + + auto todo = TestBitSet::Fill(depgraph.TxCount()); + while (todo.Any()) { + assert(!src_finder.AllDone()); + assert(!smp_finder.AllDone()); + assert(!exh_finder.AllDone()); + assert(!anc_finder.AllDone()); + + // For each iteration, read an iteration count limit from the fuzz input. + uint64_t max_iterations = 1; + try { + reader >> VARINT(max_iterations); + } catch (const std::ios_base::failure&) {} + max_iterations &= 0xfffff; + + // Read an initial subset from the fuzz input. + SetInfo init_best(depgraph, ReadTopologicalSet(depgraph, todo, reader)); + + // Call the search finder's FindCandidateSet for what remains of the graph. + auto [found, iterations_done] = src_finder.FindCandidateSet(max_iterations, init_best); + + // Sanity check the result. + assert(iterations_done <= max_iterations); + assert(found.transactions.Any()); + assert(found.transactions.IsSubsetOf(todo)); + assert(depgraph.FeeRate(found.transactions) == found.feerate); + if (!init_best.feerate.IsEmpty()) assert(found.feerate >= init_best.feerate); + // Check that it is topologically valid. + for (auto i : found.transactions) { + assert(found.transactions.IsSupersetOf(depgraph.Ancestors(i) & todo)); + } + + // At most 2^N-1 iterations can be required: the number of non-empty subsets a graph with N + // transactions has. + assert(iterations_done <= ((uint64_t{1} << todo.Count()) - 1)); + + // Perform quality checks only if SearchCandidateFinder claims an optimal result. + if (iterations_done < max_iterations) { + // Compare with SimpleCandidateFinder. + auto [simple, simple_iters] = smp_finder.FindCandidateSet(MAX_SIMPLE_ITERATIONS); + assert(found.feerate >= simple.feerate); + if (simple_iters < MAX_SIMPLE_ITERATIONS) { + assert(found.feerate == simple.feerate); + } + + // Compare with AncestorCandidateFinder; + auto anc = anc_finder.FindCandidateSet(); + assert(found.feerate >= anc.feerate); + + // Compare with ExhaustiveCandidateFinder. This quickly gets computationally expensive + // for large clusters (O(2^n)), so only do it for sufficiently small ones. + if (todo.Count() <= 12) { + auto exhaustive = exh_finder.FindCandidateSet(); + assert(exhaustive.feerate == found.feerate); + // Also compare ExhaustiveCandidateFinder with SimpleCandidateFinder (this is + // primarily a test for SimpleCandidateFinder's correctness). + assert(exhaustive.feerate >= simple.feerate); + if (simple_iters < MAX_SIMPLE_ITERATIONS) { + assert(exhaustive.feerate == simple.feerate); + } + } + } + + // Find a topologically valid subset of transactions to remove from the graph. + auto del_set = ReadTopologicalSet(depgraph, todo, reader); + // If we did not find anything, use found itself, because we should remove something. + if (del_set.None()) del_set = found.transactions; + todo -= del_set; + src_finder.MarkDone(del_set); + smp_finder.MarkDone(del_set); + exh_finder.MarkDone(del_set); + anc_finder.MarkDone(del_set); + } + + assert(src_finder.AllDone()); + assert(smp_finder.AllDone()); + assert(exh_finder.AllDone()); + assert(anc_finder.AllDone()); +}