diff --git a/contrib/README.md b/contrib/README.md index f375993ac4b..f23d7ac557b 100644 --- a/contrib/README.md +++ b/contrib/README.md @@ -43,3 +43,11 @@ Command Line Tools ### [Completions](/contrib/completions) ### Shell completions for bash and fish. + +UTXO Set Tools +-------------- + +### [UTXO-to-SQLite](/contrib/utxo-tools/utxo_to_sqlite.py) ### +This script converts a compact-serialized UTXO set (as generated by Bitcoin Core with `dumptxoutset`) +to a SQLite3 database. For more details like e.g. the created table name and schema, refer to the +module docstring on top of the script, which is also contained in the command's `--help` output. diff --git a/contrib/utxo-tools/utxo_to_sqlite.py b/contrib/utxo-tools/utxo_to_sqlite.py new file mode 100755 index 00000000000..d53adadede8 --- /dev/null +++ b/contrib/utxo-tools/utxo_to_sqlite.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python3 +# Copyright (c) 2024-present The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +"""Tool to convert a compact-serialized UTXO set to a SQLite3 database. + +The input UTXO set can be generated by Bitcoin Core with the `dumptxoutset` RPC: +$ bitcoin-cli dumptxoutset ~/utxos.dat + +The created database contains a table `utxos` with the following schema: +(txid TEXT, vout INT, value INT, coinbase INT, height INT, scriptpubkey TEXT) +""" +import argparse +import os +import sqlite3 +import sys +import time + + +UTXO_DUMP_MAGIC = b'utxo\xff' +UTXO_DUMP_VERSION = 2 +NET_MAGIC_BYTES = { + b"\xf9\xbe\xb4\xd9": "Mainnet", + b"\x0a\x03\xcf\x40": "Signet", + b"\x0b\x11\x09\x07": "Testnet3", + b"\x1c\x16\x3f\x28": "Testnet4", + b"\xfa\xbf\xb5\xda": "Regtest", +} + + +def read_varint(f): + """Equivalent of `ReadVarInt()` (see serialization module).""" + n = 0 + while True: + dat = f.read(1)[0] + n = (n << 7) | (dat & 0x7f) + if (dat & 0x80) > 0: + n += 1 + else: + return n + + +def read_compactsize(f): + """Equivalent of `ReadCompactSize()` (see serialization module).""" + n = f.read(1)[0] + if n == 253: + n = int.from_bytes(f.read(2), "little") + elif n == 254: + n = int.from_bytes(f.read(4), "little") + elif n == 255: + n = int.from_bytes(f.read(8), "little") + return n + + +def decompress_amount(x): + """Equivalent of `DecompressAmount()` (see compressor module).""" + if x == 0: + return 0 + x -= 1 + e = x % 10 + x //= 10 + n = 0 + if e < 9: + d = (x % 9) + 1 + x //= 9 + n = x * 10 + d + else: + n = x + 1 + while e > 0: + n *= 10 + e -= 1 + return n + + +def decompress_script(f): + """Equivalent of `DecompressScript()` (see compressor module).""" + size = read_varint(f) # sizes 0-5 encode compressed script types + if size == 0: # P2PKH + return bytes([0x76, 0xa9, 20]) + f.read(20) + bytes([0x88, 0xac]) + elif size == 1: # P2SH + return bytes([0xa9, 20]) + f.read(20) + bytes([0x87]) + elif size in (2, 3): # P2PK (compressed) + return bytes([33, size]) + f.read(32) + bytes([0xac]) + elif size in (4, 5): # P2PK (uncompressed) + compressed_pubkey = bytes([size - 2]) + f.read(32) + return bytes([65]) + decompress_pubkey(compressed_pubkey) + bytes([0xac]) + else: # others (bare multisig, segwit etc.) + size -= 6 + assert size <= 10000, f"too long script with size {size}" + return f.read(size) + + +def decompress_pubkey(compressed_pubkey): + """Decompress pubkey by calculating y = sqrt(x^3 + 7) % p + (see functions `secp256k1_eckey_pubkey_parse` and `secp256k1_ge_set_xo_var`). + """ + P = 2**256 - 2**32 - 977 # secp256k1 field size + assert len(compressed_pubkey) == 33 and compressed_pubkey[0] in (2, 3) + x = int.from_bytes(compressed_pubkey[1:], 'big') + rhs = (x**3 + 7) % P + y = pow(rhs, (P + 1)//4, P) # get sqrt using Tonelli-Shanks algorithm (for p % 4 = 3) + assert pow(y, 2, P) == rhs, f"pubkey is not on curve ({compressed_pubkey.hex()})" + tag_is_odd = compressed_pubkey[0] == 3 + y_is_odd = (y & 1) == 1 + if tag_is_odd != y_is_odd: # fix parity (even/odd) if necessary + y = P - y + return bytes([4]) + x.to_bytes(32, 'big') + y.to_bytes(32, 'big') + + +def main(): + parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) + parser.add_argument('infile', help='filename of compact-serialized UTXO set (input)') + parser.add_argument('outfile', help='filename of created SQLite3 database (output)') + parser.add_argument('-v', '--verbose', action='store_true', help='show details about each UTXO') + args = parser.parse_args() + + if not os.path.exists(args.infile): + print(f"Error: provided input file '{args.infile}' doesn't exist.") + sys.exit(1) + + if os.path.exists(args.outfile): + print(f"Error: provided output file '{args.outfile}' already exists.") + sys.exit(1) + + # create database table + con = sqlite3.connect(args.outfile) + con.execute("CREATE TABLE utxos(txid TEXT, vout INT, value INT, coinbase INT, height INT, scriptpubkey TEXT)") + + # read metadata (magic bytes, version, network magic, block height, block hash, UTXO count) + f = open(args.infile, 'rb') + magic_bytes = f.read(5) + version = int.from_bytes(f.read(2), 'little') + network_magic = f.read(4) + block_hash = f.read(32) + num_utxos = int.from_bytes(f.read(8), 'little') + if magic_bytes != UTXO_DUMP_MAGIC: + print(f"Error: provided input file '{args.infile}' is not an UTXO dump.") + sys.exit(1) + if version != UTXO_DUMP_VERSION: + print(f"Error: provided input file '{args.infile}' has unknown UTXO dump version {version} " + f"(only version {UTXO_DUMP_VERSION} supported)") + sys.exit(1) + network_string = NET_MAGIC_BYTES.get(network_magic, f"unknown network ({network_magic.hex()})") + print(f"UTXO Snapshot for {network_string} at block hash " + f"{block_hash[::-1].hex()[:32]}..., contains {num_utxos} coins") + + start_time = time.time() + write_batch = [] + coins_per_hash_left = 0 + prevout_hash = None + max_height = 0 + + for coin_idx in range(1, num_utxos+1): + # read key (COutPoint) + if coins_per_hash_left == 0: # read next prevout hash + prevout_hash = f.read(32)[::-1].hex() + coins_per_hash_left = read_compactsize(f) + prevout_index = read_compactsize(f) + # read value (Coin) + code = read_varint(f) + height = code >> 1 + is_coinbase = code & 1 + amount = decompress_amount(read_varint(f)) + scriptpubkey = decompress_script(f).hex() + write_batch.append((prevout_hash, prevout_index, amount, is_coinbase, height, scriptpubkey)) + if height > max_height: + max_height = height + coins_per_hash_left -= 1 + + if args.verbose: + print(f"Coin {coin_idx}/{num_utxos}:") + print(f" prevout = {prevout_hash}:{prevout_index}") + print(f" amount = {amount}, height = {height}, coinbase = {is_coinbase}") + print(f" scriptPubKey = {scriptpubkey}\n") + + if coin_idx % (16*1024) == 0 or coin_idx == num_utxos: + # write utxo batch to database + con.executemany("INSERT INTO utxos VALUES(?, ?, ?, ?, ?, ?)", write_batch) + con.commit() + write_batch.clear() + + if coin_idx % (1024*1024) == 0: + elapsed = time.time() - start_time + print(f"{coin_idx} coins converted [{coin_idx/num_utxos*100:.2f}%], " + + f"{elapsed:.3f}s passed since start") + con.close() + + print(f"TOTAL: {num_utxos} coins written to {args.outfile}, snapshot height is {max_height}.") + if f.read(1) != b'': # EOF should be reached by now + print(f"WARNING: input file {args.infile} has not reached EOF yet!") + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/test/functional/test_runner.py b/test/functional/test_runner.py index b51e40483e6..a332818e5d6 100755 --- a/test/functional/test_runner.py +++ b/test/functional/test_runner.py @@ -289,6 +289,7 @@ BASE_SCRIPTS = [ 'mempool_package_onemore.py', 'mempool_package_limits.py', 'mempool_package_rbf.py', + 'tool_utxo_to_sqlite.py', 'feature_versionbits_warning.py', 'feature_blocksxor.py', 'rpc_preciousblock.py', diff --git a/test/functional/tool_utxo_to_sqlite.py b/test/functional/tool_utxo_to_sqlite.py new file mode 100755 index 00000000000..2da7c42a86b --- /dev/null +++ b/test/functional/tool_utxo_to_sqlite.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +# Copyright (c) 2024-present The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +"""Test utxo-to-sqlite conversion tool""" +import os.path +try: + import sqlite3 +except ImportError: + pass +import subprocess +import sys + +from test_framework.key import ECKey +from test_framework.messages import ( + COutPoint, + CTxOut, +) +from test_framework.crypto.muhash import MuHash3072 +from test_framework.script import ( + CScript, + CScriptOp, +) +from test_framework.script_util import ( + PAY_TO_ANCHOR, + key_to_p2pk_script, + key_to_p2pkh_script, + key_to_p2wpkh_script, + keys_to_multisig_script, + output_key_to_p2tr_script, + script_to_p2sh_script, + script_to_p2wsh_script, +) +from test_framework.test_framework import BitcoinTestFramework +from test_framework.util import ( + assert_equal, +) +from test_framework.wallet import MiniWallet + + +def calculate_muhash_from_sqlite_utxos(filename): + muhash = MuHash3072() + con = sqlite3.connect(filename) + cur = con.cursor() + for (txid_hex, vout, value, coinbase, height, spk_hex) in cur.execute("SELECT * FROM utxos"): + # serialize UTXO for MuHash (see function `TxOutSer` in the coinstats module) + utxo_ser = COutPoint(int(txid_hex, 16), vout).serialize() + utxo_ser += (height * 2 + coinbase).to_bytes(4, 'little') + utxo_ser += CTxOut(value, bytes.fromhex(spk_hex)).serialize() + muhash.insert(utxo_ser) + con.close() + return muhash.digest()[::-1].hex() + + +class UtxoToSqliteTest(BitcoinTestFramework): + def set_test_params(self): + self.num_nodes = 1 + # we want to create some UTXOs with non-standard output scripts + self.extra_args = [['-acceptnonstdtxn=1']] + + def skip_test_if_missing_module(self): + self.skip_if_no_py_sqlite3() + + def run_test(self): + node = self.nodes[0] + wallet = MiniWallet(node) + key = ECKey() + + self.log.info('Create UTXOs with various output script types') + for i in range(1, 10+1): + key.generate(compressed=False) + uncompressed_pubkey = key.get_pubkey().get_bytes() + key.generate(compressed=True) + pubkey = key.get_pubkey().get_bytes() + + # add output scripts for compressed script type 0 (P2PKH), type 1 (P2SH), + # types 2-3 (P2PK compressed), types 4-5 (P2PK uncompressed) and + # for uncompressed scripts (bare multisig, segwit, etc.) + output_scripts = ( + key_to_p2pkh_script(pubkey), + script_to_p2sh_script(key_to_p2pkh_script(pubkey)), + key_to_p2pk_script(pubkey), + key_to_p2pk_script(uncompressed_pubkey), + + keys_to_multisig_script([pubkey]*i), + keys_to_multisig_script([uncompressed_pubkey]*i), + key_to_p2wpkh_script(pubkey), + script_to_p2wsh_script(key_to_p2pkh_script(pubkey)), + output_key_to_p2tr_script(pubkey[1:]), + PAY_TO_ANCHOR, + CScript([CScriptOp.encode_op_n(i)]*(1000*i)), # large script (up to 10000 bytes) + ) + + # create outputs and mine them in a block + for output_script in output_scripts: + wallet.send_to(from_node=node, scriptPubKey=output_script, amount=i, fee=20000) + self.generate(wallet, 1) + + self.log.info('Dump UTXO set via `dumptxoutset` RPC') + input_filename = os.path.join(self.options.tmpdir, "utxos.dat") + node.dumptxoutset(input_filename, "latest") + + self.log.info('Convert UTXO set from compact-serialized format to sqlite format') + output_filename = os.path.join(self.options.tmpdir, "utxos.sqlite") + base_dir = self.config["environment"]["SRCDIR"] + utxo_to_sqlite_path = os.path.join(base_dir, "contrib", "utxo-tools", "utxo_to_sqlite.py") + subprocess.run([sys.executable, utxo_to_sqlite_path, input_filename, output_filename], + check=True, stderr=subprocess.STDOUT) + + self.log.info('Verify that both UTXO sets match by comparing their MuHash') + muhash_sqlite = calculate_muhash_from_sqlite_utxos(output_filename) + muhash_compact_serialized = node.gettxoutsetinfo('muhash')['muhash'] + assert_equal(muhash_sqlite, muhash_compact_serialized) + + +if __name__ == "__main__": + UtxoToSqliteTest(__file__).main()