From fae7b83eb58d22ed83878561603991131372cdd7 Mon Sep 17 00:00:00 2001 From: MarcoFalke <*~=`'#}+{/-|&$^_@721217.xyz> Date: Tue, 10 Sep 2024 21:55:23 +0200 Subject: [PATCH 1/4] lint: Remove forbidden functions from lint-format-strings.py Given that all of them are forbidden by the test/lint/lint-locale-dependence.py check, they can be removed. --- test/lint/lint-format-strings.py | 8 -------- test/lint/run-lint-format-strings.py | 1 - 2 files changed, 9 deletions(-) diff --git a/test/lint/lint-format-strings.py b/test/lint/lint-format-strings.py index 9f5e0f312ec..9c9fbb2e392 100755 --- a/test/lint/lint-format-strings.py +++ b/test/lint/lint-format-strings.py @@ -17,7 +17,6 @@ import sys FUNCTION_NAMES_AND_NUMBER_OF_LEADING_ARGUMENTS = [ 'FatalErrorf,0', - 'fprintf,1', 'tfm::format,1', # Assuming tfm::::format(std::ostream&, ... 'LogConnectFailure,1', 'LogError,0', @@ -27,14 +26,7 @@ FUNCTION_NAMES_AND_NUMBER_OF_LEADING_ARGUMENTS = [ 'LogTrace,1', 'LogPrintf,0', 'LogPrintLevel,2', - 'printf,0', - 'snprintf,2', - 'sprintf,1', 'strprintf,0', - 'vfprintf,1', - 'vprintf,1', - 'vsnprintf,1', - 'vsprintf,1', 'WalletLogPrintf,0', ] RUN_LINT_FILE = 'test/lint/run-lint-format-strings.py' diff --git a/test/lint/run-lint-format-strings.py b/test/lint/run-lint-format-strings.py index 09a2503452e..9a7386ee64a 100755 --- a/test/lint/run-lint-format-strings.py +++ b/test/lint/run-lint-format-strings.py @@ -13,7 +13,6 @@ import re import sys FALSE_POSITIVES = [ - ("src/dbwrapper.cpp", "vsnprintf(p, limit - p, format, backup_ap)"), ("src/index/base.cpp", "FatalErrorf(const char* fmt, const Args&... args)"), ("src/index/base.h", "FatalErrorf(const char* fmt, const Args&... args)"), ("src/netbase.cpp", "LogConnectFailure(bool manual_connection, const char* fmt, const Args&... args)"), From faa62c0112f2b7ab69c80a5178f5b79217bed0a6 Mon Sep 17 00:00:00 2001 From: MarcoFalke <*~=`'#}+{/-|&$^_@721217.xyz> Date: Tue, 30 Jul 2024 13:23:37 +0200 Subject: [PATCH 2/4] util: Add ConstevalFormatString The type is used to wrap a format string once it has been compile-time checked to contain the right number of format specifiers. --- src/test/CMakeLists.txt | 1 + src/test/util_string_tests.cpp | 86 ++++++++++++++++++++++++++++++++++ src/util/string.h | 63 ++++++++++++++++++++++++- 3 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 src/test/util_string_tests.cpp diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 92b39f04976..afa4addb4ec 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -132,6 +132,7 @@ add_executable(test_bitcoin txvalidation_tests.cpp txvalidationcache_tests.cpp uint256_tests.cpp + util_string_tests.cpp util_tests.cpp util_threadnames_tests.cpp validation_block_tests.cpp diff --git a/src/test/util_string_tests.cpp b/src/test/util_string_tests.cpp new file mode 100644 index 00000000000..8b974ffe6fb --- /dev/null +++ b/src/test/util_string_tests.cpp @@ -0,0 +1,86 @@ +// Copyright (c) 2024-present The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include + +#include + +using namespace util; + +BOOST_AUTO_TEST_SUITE(util_string_tests) + +// Helper to allow compile-time sanity checks while providing the number of +// args directly. Normally PassFmt would be used. +template +inline void PassFmt(util::ConstevalFormatString fmt) +{ + // This was already executed at compile-time, but is executed again at run-time to avoid -Wunused. + decltype(fmt)::Detail_CheckNumFormatSpecifiers(fmt.fmt); +} +template +inline void FailFmtWithError(std::string_view wrong_fmt, std::string_view error) +{ + using ErrType = const char*; + auto check_throw{[error](const ErrType& str) { return str == error; }}; + BOOST_CHECK_EXCEPTION(util::ConstevalFormatString::Detail_CheckNumFormatSpecifiers(wrong_fmt), ErrType, check_throw); +} + +BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec) +{ + PassFmt<0>(""); + PassFmt<0>("%%"); + PassFmt<1>("%s"); + PassFmt<0>("%%s"); + PassFmt<0>("s%%"); + PassFmt<1>("%%%s"); + PassFmt<1>("%s%%"); + PassFmt<0>(" 1$s"); + PassFmt<1>("%1$s"); + PassFmt<1>("%1$s%1$s"); + PassFmt<2>("%2$s"); + PassFmt<2>("%2$s 4$s %2$s"); + PassFmt<129>("%129$s 999$s %2$s"); + PassFmt<1>("%02d"); + PassFmt<1>("%+2s"); + PassFmt<1>("%.6i"); + PassFmt<1>("%5.2f"); + PassFmt<1>("%#x"); + PassFmt<1>("%1$5i"); + PassFmt<1>("%1$-5i"); + PassFmt<1>("%1$.5i"); + // tinyformat accepts almost any "type" spec, even '%', or '_', or '\n'. + PassFmt<1>("%123%"); + PassFmt<1>("%123%s"); + PassFmt<1>("%_"); + PassFmt<1>("%\n"); + + // The `*` specifier behavior is unsupported and can lead to runtime + // errors when used in a ConstevalFormatString. Please refer to the + // note in the ConstevalFormatString docs. + PassFmt<1>("%*c"); + PassFmt<2>("%2$*3$d"); + PassFmt<1>("%.*f"); + + auto err_mix{"Format specifiers must be all positional or all non-positional!"}; + FailFmtWithError<1>("%s%1$s", err_mix); + + auto err_num{"Format specifier count must match the argument count!"}; + FailFmtWithError<1>("", err_num); + FailFmtWithError<0>("%s", err_num); + FailFmtWithError<2>("%s", err_num); + FailFmtWithError<0>("%1$s", err_num); + FailFmtWithError<2>("%1$s", err_num); + + auto err_0_pos{"Positional format specifier must have position of at least 1"}; + FailFmtWithError<1>("%$s", err_0_pos); + FailFmtWithError<1>("%$", err_0_pos); + FailFmtWithError<0>("%0$", err_0_pos); + FailFmtWithError<0>("%0$s", err_0_pos); + + auto err_term{"Format specifier incorrectly terminated by end of string"}; + FailFmtWithError<1>("%", err_term); + FailFmtWithError<1>("%1$", err_term); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/util/string.h b/src/util/string.h index 30c0a0d6c1c..bdf074a9a60 100644 --- a/src/util/string.h +++ b/src/util/string.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019-2022 The Bitcoin Core developers +// Copyright (c) 2019-present The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. @@ -17,6 +17,67 @@ #include namespace util { +/** + * @brief A wrapper for a compile-time partially validated format string + * + * This struct can be used to enforce partial compile-time validation of format + * strings, to reduce the likelihood of tinyformat throwing exceptions at + * run-time. Validation is partial to try and prevent the most common errors + * while avoiding re-implementing the entire parsing logic. + * + * @note Counting of `*` dynamic width and precision fields (such as `%*c`, + * `%2$*3$d`, `%.*f`) is not implemented to minimize code complexity as long as + * they are not used in the codebase. Usage of these fields is not counted and + * can lead to run-time exceptions. Code wanting to use the `*` specifier can + * side-step this struct and call tinyformat directly. + */ +template +struct ConstevalFormatString { + const char* const fmt; + consteval ConstevalFormatString(const char* str) : fmt{str} { Detail_CheckNumFormatSpecifiers(fmt); } + constexpr static void Detail_CheckNumFormatSpecifiers(std::string_view str) + { + unsigned count_normal{0}; // Number of "normal" specifiers, like %s + unsigned count_pos{0}; // Max number in positional specifier, like %8$s + for (auto it{str.begin()}; it < str.end();) { + if (*it != '%') { + ++it; + continue; + } + + if (++it >= str.end()) throw "Format specifier incorrectly terminated by end of string"; + if (*it == '%') { + // Percent escape: %% + ++it; + continue; + } + + unsigned maybe_num{0}; + while ('0' <= *it && *it <= '9') { + maybe_num *= 10; + maybe_num += *it - '0'; + ++it; + }; + + if (*it == '$') { + // Positional specifier, like %8$s + if (maybe_num == 0) throw "Positional format specifier must have position of at least 1"; + count_pos = std::max(count_pos, maybe_num); + if (++it >= str.end()) throw "Format specifier incorrectly terminated by end of string"; + } else { + // Non-positional specifier, like %s + ++count_normal; + ++it; + } + // The remainder "[flags][width][.precision][length]type" of the + // specifier is not checked. Parsing continues with the next '%'. + } + if (count_normal && count_pos) throw "Format specifiers must be all positional or all non-positional!"; + unsigned count{count_normal | count_pos}; + if (num_params != count) throw "Format specifier count must match the argument count!"; + } +}; + void ReplaceAll(std::string& in_out, const std::string& search, const std::string& substitute); /** Split a string on any char found in separators, returning a vector. From fa7087b896c0150c29d7a27c53e0533831a2bf3b Mon Sep 17 00:00:00 2001 From: MarcoFalke <*~=`'#}+{/-|&$^_@721217.xyz> Date: Tue, 30 Jul 2024 14:30:16 +0200 Subject: [PATCH 3/4] util: Use compile-time check for FatalErrorf --- src/index/base.cpp | 5 +++-- src/index/base.h | 5 +++-- src/util/string.h | 9 +++++++++ test/lint/lint-format-strings.py | 1 - test/lint/run-lint-format-strings.py | 2 -- 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/index/base.cpp b/src/index/base.cpp index 955d7b67c9e..6f9860415f3 100644 --- a/src/index/base.cpp +++ b/src/index/base.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022 The Bitcoin Core developers +// Copyright (c) 2017-present The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include // For g_chainman @@ -27,7 +28,7 @@ constexpr auto SYNC_LOG_INTERVAL{30s}; constexpr auto SYNC_LOCATOR_WRITE_INTERVAL{30s}; template -void BaseIndex::FatalErrorf(const char* fmt, const Args&... args) +void BaseIndex::FatalErrorf(util::ConstevalFormatString fmt, const Args&... args) { auto message = tfm::format(fmt, args...); node::AbortNode(m_chain->context()->shutdown, m_chain->context()->exit_status, Untranslated(message), m_chain->context()->warnings.get()); diff --git a/src/index/base.h b/src/index/base.h index 0eb1d9ca3b2..beb1575ab21 100644 --- a/src/index/base.h +++ b/src/index/base.h @@ -1,4 +1,4 @@ -// Copyright (c) 2017-2022 The Bitcoin Core developers +// Copyright (c) 2017-present The Bitcoin Core developers // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -94,7 +95,7 @@ private: virtual bool AllowPrune() const = 0; template - void FatalErrorf(const char* fmt, const Args&... args); + void FatalErrorf(util::ConstevalFormatString fmt, const Args&... args); protected: std::unique_ptr m_chain; diff --git a/src/util/string.h b/src/util/string.h index bdf074a9a60..c5183d6c801 100644 --- a/src/util/string.h +++ b/src/util/string.h @@ -6,6 +6,7 @@ #define BITCOIN_UTIL_STRING_H #include +#include #include #include @@ -234,4 +235,12 @@ template } } // namespace util +namespace tinyformat { +template +std::string format(util::ConstevalFormatString fmt, const Args&... args) +{ + return format(fmt.fmt, args...); +} +} // namespace tinyformat + #endif // BITCOIN_UTIL_STRING_H diff --git a/test/lint/lint-format-strings.py b/test/lint/lint-format-strings.py index 9c9fbb2e392..5dbad3f452f 100755 --- a/test/lint/lint-format-strings.py +++ b/test/lint/lint-format-strings.py @@ -16,7 +16,6 @@ import re import sys FUNCTION_NAMES_AND_NUMBER_OF_LEADING_ARGUMENTS = [ - 'FatalErrorf,0', 'tfm::format,1', # Assuming tfm::::format(std::ostream&, ... 'LogConnectFailure,1', 'LogError,0', diff --git a/test/lint/run-lint-format-strings.py b/test/lint/run-lint-format-strings.py index 9a7386ee64a..f78f356a349 100755 --- a/test/lint/run-lint-format-strings.py +++ b/test/lint/run-lint-format-strings.py @@ -13,8 +13,6 @@ import re import sys FALSE_POSITIVES = [ - ("src/index/base.cpp", "FatalErrorf(const char* fmt, const Args&... args)"), - ("src/index/base.h", "FatalErrorf(const char* fmt, const Args&... args)"), ("src/netbase.cpp", "LogConnectFailure(bool manual_connection, const char* fmt, const Args&... args)"), ("src/clientversion.cpp", "strprintf(_(COPYRIGHT_HOLDERS).translated, COPYRIGHT_HOLDERS_SUBSTITUTION)"), ("src/test/translation_tests.cpp", "strprintf(format, arg)"), From fa5bc450d5d4c1d2daf7b205f1678402c3c21678 Mon Sep 17 00:00:00 2001 From: MarcoFalke <*~=`'#}+{/-|&$^_@721217.xyz> Date: Tue, 10 Sep 2024 22:55:29 +0200 Subject: [PATCH 4/4] util: Use compile-time check for LogConnectFailure --- src/netbase.cpp | 3 ++- test/lint/lint-format-strings.py | 1 - test/lint/run-lint-format-strings.py | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/netbase.cpp b/src/netbase.cpp index 1a96443d4a7..a6de72090e6 100644 --- a/src/netbase.cpp +++ b/src/netbase.cpp @@ -557,7 +557,8 @@ std::unique_ptr CreateSockOS(int domain, int type, int protocol) std::function(int, int, int)> CreateSock = CreateSockOS; template -static void LogConnectFailure(bool manual_connection, const char* fmt, const Args&... args) { +static void LogConnectFailure(bool manual_connection, util::ConstevalFormatString fmt, const Args&... args) +{ std::string error_message = tfm::format(fmt, args...); if (manual_connection) { LogPrintf("%s\n", error_message); diff --git a/test/lint/lint-format-strings.py b/test/lint/lint-format-strings.py index 5dbad3f452f..c30975fea7f 100755 --- a/test/lint/lint-format-strings.py +++ b/test/lint/lint-format-strings.py @@ -17,7 +17,6 @@ import sys FUNCTION_NAMES_AND_NUMBER_OF_LEADING_ARGUMENTS = [ 'tfm::format,1', # Assuming tfm::::format(std::ostream&, ... - 'LogConnectFailure,1', 'LogError,0', 'LogWarning,0', 'LogInfo,0', diff --git a/test/lint/run-lint-format-strings.py b/test/lint/run-lint-format-strings.py index f78f356a349..a32717653aa 100755 --- a/test/lint/run-lint-format-strings.py +++ b/test/lint/run-lint-format-strings.py @@ -13,7 +13,6 @@ import re import sys FALSE_POSITIVES = [ - ("src/netbase.cpp", "LogConnectFailure(bool manual_connection, const char* fmt, const Args&... args)"), ("src/clientversion.cpp", "strprintf(_(COPYRIGHT_HOLDERS).translated, COPYRIGHT_HOLDERS_SUBSTITUTION)"), ("src/test/translation_tests.cpp", "strprintf(format, arg)"), ("src/validationinterface.cpp", "LogDebug(BCLog::VALIDATION, fmt \"\\n\", __VA_ARGS__)"),