From 184f34f2d0fa2e56ad594966b2b99ff4cf840d95 Mon Sep 17 00:00:00 2001 From: Ryan Ofsky Date: Mon, 28 Oct 2024 19:11:16 -0400 Subject: [PATCH 1/2] util: Support dynamic width & precision in ConstevalFormatString This is needed in the next commit to add compile-time checking to strprintf calls, because bitcoin-cli.cpp uses dynamic width in many format strings. This change is easiest to review ignoring whitespace. Co-authored-by: MarcoFalke <*~=`'#}+{/-|&$^_@721217.xyz> Co-authored-by: Hodlinator <172445034+hodlinator@users.noreply.github.com> Co-authored-by: l0rinc --- src/test/util_string_tests.cpp | 44 ++++++++++++++++--- src/util/string.h | 78 ++++++++++++++++++++-------------- 2 files changed, 82 insertions(+), 40 deletions(-) diff --git a/src/test/util_string_tests.cpp b/src/test/util_string_tests.cpp index 1574fe23582..9f5b702acd1 100644 --- a/src/test/util_string_tests.cpp +++ b/src/test/util_string_tests.cpp @@ -20,7 +20,7 @@ inline void PassFmt(util::ConstevalFormatString fmt) decltype(fmt)::Detail_CheckNumFormatSpecifiers(fmt.fmt); } template -inline void FailFmtWithError(std::string_view wrong_fmt, std::string_view error) +inline void FailFmtWithError(const char* wrong_fmt, std::string_view error) { BOOST_CHECK_EXCEPTION(util::ConstevalFormatString::Detail_CheckNumFormatSpecifiers(wrong_fmt), const char*, HasReason(error)); } @@ -44,6 +44,8 @@ BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec) PassFmt<1>("%+2s"); PassFmt<1>("%.6i"); PassFmt<1>("%5.2f"); + PassFmt<1>("%5.f"); + PassFmt<1>("%.f"); PassFmt<1>("%#x"); PassFmt<1>("%1$5i"); PassFmt<1>("%1$-5i"); @@ -54,15 +56,27 @@ BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec) PassFmt<1>("%_"); PassFmt<1>("%\n"); - // The `*` specifier behavior is unsupported and can lead to runtime - // errors when used in a ConstevalFormatString. Please refer to the - // note in the ConstevalFormatString docs. - PassFmt<1>("%*c"); - PassFmt<2>("%2$*3$d"); - PassFmt<1>("%.*f"); + PassFmt<2>("%*c"); + PassFmt<2>("%+*c"); + PassFmt<2>("%.*f"); + PassFmt<3>("%*.*f"); + PassFmt<3>("%2$*3$d"); + PassFmt<3>("%2$*3$.9d"); + PassFmt<3>("%2$.*3$d"); + PassFmt<3>("%2$9.*3$d"); + PassFmt<3>("%2$+9.*3$d"); + PassFmt<4>("%3$*2$.*4$f"); + + // Make sure multiple flag characters "- 0+" are accepted + PassFmt<3>("'%- 0+*.*f'"); + PassFmt<3>("'%1$- 0+*3$.*2$f'"); auto err_mix{"Format specifiers must be all positional or all non-positional!"}; FailFmtWithError<1>("%s%1$s", err_mix); + FailFmtWithError<2>("%2$*d", err_mix); + FailFmtWithError<2>("%*2$d", err_mix); + FailFmtWithError<2>("%.*3$d", err_mix); + FailFmtWithError<2>("%2$.*d", err_mix); auto err_num{"Format specifier count must match the argument count!"}; FailFmtWithError<1>("", err_num); @@ -70,16 +84,32 @@ BOOST_AUTO_TEST_CASE(ConstevalFormatString_NumSpec) FailFmtWithError<2>("%s", err_num); FailFmtWithError<0>("%1$s", err_num); FailFmtWithError<2>("%1$s", err_num); + FailFmtWithError<1>("%*c", err_num); auto err_0_pos{"Positional format specifier must have position of at least 1"}; FailFmtWithError<1>("%$s", err_0_pos); FailFmtWithError<1>("%$", err_0_pos); FailFmtWithError<0>("%0$", err_0_pos); FailFmtWithError<0>("%0$s", err_0_pos); + FailFmtWithError<2>("%2$*$d", err_0_pos); + FailFmtWithError<2>("%2$*0$d", err_0_pos); + FailFmtWithError<3>("%3$*2$.*$f", err_0_pos); + FailFmtWithError<3>("%3$*2$.*0$f", err_0_pos); auto err_term{"Format specifier incorrectly terminated by end of string"}; FailFmtWithError<1>("%", err_term); + FailFmtWithError<1>("%9", err_term); + FailFmtWithError<1>("%9.", err_term); + FailFmtWithError<1>("%9.9", err_term); + FailFmtWithError<1>("%*", err_term); + FailFmtWithError<1>("%+*", err_term); + FailFmtWithError<1>("%.*", err_term); + FailFmtWithError<1>("%9.*", err_term); FailFmtWithError<1>("%1$", err_term); + FailFmtWithError<1>("%1$9", err_term); + FailFmtWithError<2>("%1$*2$", err_term); + FailFmtWithError<2>("%1$.*2$", err_term); + FailFmtWithError<2>("%1$9.*2$", err_term); } BOOST_AUTO_TEST_SUITE_END() diff --git a/src/util/string.h b/src/util/string.h index c5183d6c801..c059f989250 100644 --- a/src/util/string.h +++ b/src/util/string.h @@ -25,53 +25,65 @@ namespace util { * strings, to reduce the likelihood of tinyformat throwing exceptions at * run-time. Validation is partial to try and prevent the most common errors * while avoiding re-implementing the entire parsing logic. - * - * @note Counting of `*` dynamic width and precision fields (such as `%*c`, - * `%2$*3$d`, `%.*f`) is not implemented to minimize code complexity as long as - * they are not used in the codebase. Usage of these fields is not counted and - * can lead to run-time exceptions. Code wanting to use the `*` specifier can - * side-step this struct and call tinyformat directly. */ template struct ConstevalFormatString { const char* const fmt; consteval ConstevalFormatString(const char* str) : fmt{str} { Detail_CheckNumFormatSpecifiers(fmt); } - constexpr static void Detail_CheckNumFormatSpecifiers(std::string_view str) + constexpr static void Detail_CheckNumFormatSpecifiers(const char* str) { unsigned count_normal{0}; // Number of "normal" specifiers, like %s unsigned count_pos{0}; // Max number in positional specifier, like %8$s - for (auto it{str.begin()}; it < str.end();) { - if (*it != '%') { - ++it; - continue; - } + for (auto it{str}; *it != '\0'; ++it) { + if (*it != '%' || *++it == '%') continue; // Skip escaped %% - if (++it >= str.end()) throw "Format specifier incorrectly terminated by end of string"; - if (*it == '%') { - // Percent escape: %% - ++it; - continue; - } + auto add_arg = [&] { + unsigned maybe_num{0}; + while ('0' <= *it && *it <= '9') { + maybe_num *= 10; + maybe_num += *it - '0'; + ++it; + } - unsigned maybe_num{0}; - while ('0' <= *it && *it <= '9') { - maybe_num *= 10; - maybe_num += *it - '0'; - ++it; + if (*it == '$') { + ++it; + // Positional specifier, like %8$s + if (maybe_num == 0) throw "Positional format specifier must have position of at least 1"; + count_pos = std::max(count_pos, maybe_num); + } else { + // Non-positional specifier, like %s + ++count_normal; + } }; - if (*it == '$') { - // Positional specifier, like %8$s - if (maybe_num == 0) throw "Positional format specifier must have position of at least 1"; - count_pos = std::max(count_pos, maybe_num); - if (++it >= str.end()) throw "Format specifier incorrectly terminated by end of string"; - } else { - // Non-positional specifier, like %s - ++count_normal; + // Increase argument count and consume positional specifier, if present. + add_arg(); + + // Consume flags. + while (*it == '#' || *it == '0' || *it == '-' || *it == ' ' || *it == '+') ++it; + + auto parse_size = [&] { + if (*it == '*') { + ++it; + add_arg(); + } else { + while ('0' <= *it && *it <= '9') ++it; + } + }; + + // Consume dynamic or static width value. + parse_size(); + + // Consume dynamic or static precision value. + if (*it == '.') { ++it; + parse_size(); } - // The remainder "[flags][width][.precision][length]type" of the - // specifier is not checked. Parsing continues with the next '%'. + + if (*it == '\0') throw "Format specifier incorrectly terminated by end of string"; + + // Length and type in "[flags][width][.precision][length]type" + // is not checked. Parsing continues with the next '%'. } if (count_normal && count_pos) throw "Format specifiers must be all positional or all non-positional!"; unsigned count{count_normal | count_pos}; From fe39acf88ff552bfc4a502c99774375b91824bb1 Mon Sep 17 00:00:00 2001 From: Ryan Ofsky Date: Mon, 28 Oct 2024 19:13:46 -0400 Subject: [PATCH 2/2] tinyformat: Add compile-time checking for literal format strings Co-authored-by: MarcoFalke <*~=`'#}+{/-|&$^_@721217.xyz> --- src/tinyformat.h | 30 +++++++++++++++++------------- src/util/string.h | 9 --------- 2 files changed, 17 insertions(+), 22 deletions(-) diff --git a/src/tinyformat.h b/src/tinyformat.h index f536306375e..0d0e9149ccd 100644 --- a/src/tinyformat.h +++ b/src/tinyformat.h @@ -145,6 +145,7 @@ namespace tfm = tinyformat; #include #include #include // Added for Bitcoin Core +#include // Added for Bitcoin Core #ifndef TINYFORMAT_ASSERT # include @@ -178,6 +179,18 @@ namespace tfm = tinyformat; namespace tinyformat { +// Added for Bitcoin Core. Wrapper for checking format strings at compile time. +// Unlike ConstevalFormatString this supports std::string for runtime string +// formatting without compile time checks. +template +struct FormatStringCheck { + consteval FormatStringCheck(const char* str) : fmt{util::ConstevalFormatString{str}.fmt} {} + FormatStringCheck(const std::string& str) : fmt{str.c_str()} {} + FormatStringCheck(util::ConstevalFormatString str) : fmt{str.fmt} {} + operator const char*() { return fmt; } + const char* fmt; +}; + // Added for Bitcoin Core class format_error: public std::runtime_error { @@ -1056,7 +1069,7 @@ inline void vformat(std::ostream& out, const char* fmt, FormatListRef list) /// Format list of arguments to the stream according to given format string. template -void format(std::ostream& out, const char* fmt, const Args&... args) +void format(std::ostream& out, FormatStringCheck fmt, const Args&... args) { vformat(out, fmt, makeFormatList(args...)); } @@ -1064,7 +1077,7 @@ void format(std::ostream& out, const char* fmt, const Args&... args) /// Format list of arguments according to the given format string and return /// the result as a string. template -std::string format(const char* fmt, const Args&... args) +std::string format(FormatStringCheck fmt, const Args&... args) { std::ostringstream oss; format(oss, fmt, args...); @@ -1073,13 +1086,13 @@ std::string format(const char* fmt, const Args&... args) /// Format list of arguments to std::cout, according to the given format string template -void printf(const char* fmt, const Args&... args) +void printf(FormatStringCheck fmt, const Args&... args) { format(std::cout, fmt, args...); } template -void printfln(const char* fmt, const Args&... args) +void printfln(FormatStringCheck fmt, const Args&... args) { format(std::cout, fmt, args...); std::cout << '\n'; @@ -1145,15 +1158,6 @@ TINYFORMAT_FOREACH_ARGNUM(TINYFORMAT_MAKE_FORMAT_FUNCS) #endif -// Added for Bitcoin Core -template -std::string format(const std::string &fmt, const Args&... args) -{ - std::ostringstream oss; - format(oss, fmt.c_str(), args...); - return oss.str(); -} - } // namespace tinyformat // Added for Bitcoin Core: diff --git a/src/util/string.h b/src/util/string.h index c059f989250..c9e33e65926 100644 --- a/src/util/string.h +++ b/src/util/string.h @@ -6,7 +6,6 @@ #define BITCOIN_UTIL_STRING_H #include -#include #include #include @@ -247,12 +246,4 @@ template } } // namespace util -namespace tinyformat { -template -std::string format(util::ConstevalFormatString fmt, const Args&... args) -{ - return format(fmt.fmt, args...); -} -} // namespace tinyformat - #endif // BITCOIN_UTIL_STRING_H