From 0e2812d2938b933debffba5b873637fa1d348b81 Mon Sep 17 00:00:00 2001 From: Pieter Wuille Date: Wed, 15 May 2024 08:37:12 -0400 Subject: [PATCH] clusterlin: add algorithms for connectedness/connected components Add utility functions to DepGraph for finding connected components. --- src/cluster_linearize.h | 44 ++++++++++++++++ src/test/fuzz/cluster_linearize.cpp | 79 +++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+) diff --git a/src/cluster_linearize.h b/src/cluster_linearize.h index 61b76968cf..b581f01da5 100644 --- a/src/cluster_linearize.h +++ b/src/cluster_linearize.h @@ -171,6 +171,50 @@ public: return ret; } + /** Find some connected component within the subset "todo" of this graph. + * + * Specifically, this finds the connected component which contains the first transaction of + * todo (if any). + * + * Two transactions are considered connected if they are both in `todo`, and one is an ancestor + * of the other in the entire graph (so not just within `todo`), or transitively there is a + * path of transactions connecting them. This does mean that if `todo` contains a transaction + * and a grandparent, but misses the parent, they will still be part of the same component. + * + * Complexity: O(ret.Count()). + */ + SetType FindConnectedComponent(const SetType& todo) const noexcept + { + if (todo.None()) return todo; + auto to_add = SetType::Singleton(todo.First()); + SetType ret; + do { + SetType old = ret; + for (auto add : to_add) { + ret |= Descendants(add); + ret |= Ancestors(add); + } + ret &= todo; + to_add = ret - old; + } while (to_add.Any()); + return ret; + } + + /** Determine if a subset is connected. + * + * Complexity: O(subset.Count()). + */ + bool IsConnected(const SetType& subset) const noexcept + { + return FindConnectedComponent(subset) == subset; + } + + /** Determine if this entire graph is connected. + * + * Complexity: O(TxCount()). + */ + bool IsConnected() const noexcept { return IsConnected(SetType::Fill(TxCount())); } + /** Append the entries of select to list in a topologically valid order. * * Complexity: O(select.Count() * log(select.Count())). diff --git a/src/test/fuzz/cluster_linearize.cpp b/src/test/fuzz/cluster_linearize.cpp index c97d00dea1..1d16432c9a 100644 --- a/src/test/fuzz/cluster_linearize.cpp +++ b/src/test/fuzz/cluster_linearize.cpp @@ -294,6 +294,81 @@ FUZZ_TARGET(clusterlin_depgraph_serialization) assert(IsAcyclic(depgraph)); } +FUZZ_TARGET(clusterlin_components) +{ + // Verify the behavior of DepGraphs's FindConnectedComponent and IsConnected functions. + + // Construct a depgraph. + SpanReader reader(buffer); + DepGraph depgraph; + try { + reader >> Using(depgraph); + } catch (const std::ios_base::failure&) {} + + TestBitSet todo = TestBitSet::Fill(depgraph.TxCount()); + while (todo.Any()) { + // Find a connected component inside todo. + auto component = depgraph.FindConnectedComponent(todo); + + // The component must be a subset of todo and non-empty. + assert(component.IsSubsetOf(todo)); + assert(component.Any()); + + // If todo is the entire graph, and the entire graph is connected, then the component must + // be the entire graph. + if (todo == TestBitSet::Fill(depgraph.TxCount())) { + assert((component == todo) == depgraph.IsConnected()); + } + + // If subset is connected, then component must match subset. + assert((component == todo) == depgraph.IsConnected(todo)); + + // The component cannot have any ancestors or descendants outside of component but in todo. + for (auto i : component) { + assert((depgraph.Ancestors(i) & todo).IsSubsetOf(component)); + assert((depgraph.Descendants(i) & todo).IsSubsetOf(component)); + } + + // Starting from any component element, we must be able to reach every element. + for (auto i : component) { + // Start with just i as reachable. + TestBitSet reachable = TestBitSet::Singleton(i); + // Add in-todo descendants and ancestors to reachable until it does not change anymore. + while (true) { + TestBitSet new_reachable = reachable; + for (auto j : new_reachable) { + new_reachable |= depgraph.Ancestors(j) & todo; + new_reachable |= depgraph.Descendants(j) & todo; + } + if (new_reachable == reachable) break; + reachable = new_reachable; + } + // Verify that the result is the entire component. + assert(component == reachable); + } + + // Construct an arbitrary subset of todo. + uint64_t subset_bits{0}; + try { + reader >> VARINT(subset_bits); + } catch (const std::ios_base::failure&) {} + TestBitSet subset; + for (ClusterIndex i = 0; i < depgraph.TxCount(); ++i) { + if (todo[i]) { + if (subset_bits & 1) subset.Set(i); + subset_bits >>= 1; + } + } + // Which must be non-empty. + if (subset.None()) subset = TestBitSet::Singleton(todo.First()); + // Remove it from todo. + todo -= subset; + } + + // No components can be found in an empty subset. + assert(depgraph.FindConnectedComponent(todo).None()); +} + FUZZ_TARGET(clusterlin_chunking) { // Verify the correctness of the ChunkLinearization function. @@ -357,6 +432,7 @@ FUZZ_TARGET(clusterlin_ancestor_finder) assert(best_anc.transactions.Any()); assert(best_anc.transactions.IsSubsetOf(todo)); assert(depgraph.FeeRate(best_anc.transactions) == best_anc.feerate); + assert(depgraph.IsConnected(best_anc.transactions)); // Check that it is topologically valid. for (auto i : best_anc.transactions) { assert((depgraph.Ancestors(i) & todo).IsSubsetOf(best_anc.transactions)); @@ -443,6 +519,9 @@ FUZZ_TARGET(clusterlin_search_finder) // Perform quality checks only if SearchCandidateFinder claims an optimal result. if (iterations_done < max_iterations) { + // Optimal sets are always connected. + assert(depgraph.IsConnected(found.transactions)); + // Compare with SimpleCandidateFinder. auto [simple, simple_iters] = smp_finder.FindCandidateSet(MAX_SIMPLE_ITERATIONS); assert(found.feerate >= simple.feerate);