mirror of
https://github.com/bitcoin/bitcoin.git
synced 2025-02-24 12:41:41 -05:00

There are no changes to behavior. Changes in this commit are all additions, and are easiest to review using "git diff -U0 --word-diff-regex=." options. Motivation for this change is to keep util functions with really generic names like "Split" and "Join" out of the global namespace so it is easier to see where these functions are defined, and so they don't interfere with function overloading, especially since the util library is a dependency of the kernel library and intended to be used with external code.
87 lines
14 KiB
C++
87 lines
14 KiB
C++
// Copyright (c) 2020-2021 The Bitcoin Core developers
|
|
// Distributed under the MIT software license, see the accompanying
|
|
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
|
|
|
|
#include <test/fuzz/FuzzedDataProvider.h>
|
|
#include <test/fuzz/fuzz.h>
|
|
#include <tinyformat.h>
|
|
#include <util/strencodings.h>
|
|
#include <util/string.h>
|
|
|
|
#include <cassert>
|
|
#include <clocale>
|
|
#include <cstdint>
|
|
#include <locale>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
namespace {
|
|
const std::string locale_identifiers[] = {
|
|
"C", "C.UTF-8", "aa_DJ", "aa_DJ.ISO-8859-1", "aa_DJ.UTF-8", "aa_ER", "aa_ER.UTF-8", "aa_ET", "aa_ET.UTF-8", "af_ZA", "af_ZA.ISO-8859-1", "af_ZA.UTF-8", "agr_PE", "agr_PE.UTF-8", "ak_GH", "ak_GH.UTF-8", "am_ET", "am_ET.UTF-8", "an_ES", "an_ES.ISO-8859-15", "an_ES.UTF-8", "anp_IN", "anp_IN.UTF-8", "ar_AE", "ar_AE.ISO-8859-6", "ar_AE.UTF-8", "ar_BH", "ar_BH.ISO-8859-6", "ar_BH.UTF-8", "ar_DZ", "ar_DZ.ISO-8859-6", "ar_DZ.UTF-8", "ar_EG", "ar_EG.ISO-8859-6", "ar_EG.UTF-8", "ar_IN", "ar_IN.UTF-8", "ar_IQ", "ar_IQ.ISO-8859-6", "ar_IQ.UTF-8", "ar_JO", "ar_JO.ISO-8859-6", "ar_JO.UTF-8", "ar_KW", "ar_KW.ISO-8859-6", "ar_KW.UTF-8", "ar_LB", "ar_LB.ISO-8859-6", "ar_LB.UTF-8", "ar_LY", "ar_LY.ISO-8859-6", "ar_LY.UTF-8", "ar_MA", "ar_MA.ISO-8859-6", "ar_MA.UTF-8", "ar_OM", "ar_OM.ISO-8859-6", "ar_OM.UTF-8", "ar_QA", "ar_QA.ISO-8859-6", "ar_QA.UTF-8", "ar_SA", "ar_SA.ISO-8859-6", "ar_SA.UTF-8", "ar_SD", "ar_SD.ISO-8859-6", "ar_SD.UTF-8", "ar_SS", "ar_SS.UTF-8", "ar_SY", "ar_SY.ISO-8859-6", "ar_SY.UTF-8", "ar_TN", "ar_TN.ISO-8859-6", "ar_TN.UTF-8", "ar_YE", "ar_YE.ISO-8859-6", "ar_YE.UTF-8", "as_IN", "as_IN.UTF-8", "ast_ES", "ast_ES.ISO-8859-15", "ast_ES.UTF-8", "ayc_PE", "ayc_PE.UTF-8", "az_AZ", "az_AZ.UTF-8", "az_IR", "az_IR.UTF-8", "be_BY", "be_BY.CP1251", "be_BY.UTF-8", "bem_ZM", "bem_ZM.UTF-8", "ber_DZ", "ber_DZ.UTF-8", "ber_MA", "ber_MA.UTF-8", "bg_BG", "bg_BG.CP1251", "bg_BG.UTF-8", "bho_IN", "bho_IN.UTF-8", "bho_NP", "bho_NP.UTF-8", "bi_VU", "bi_VU.UTF-8", "bn_BD", "bn_BD.UTF-8", "bn_IN", "bn_IN.UTF-8", "bo_CN", "bo_CN.UTF-8", "bo_IN", "bo_IN.UTF-8", "br_FR", "br_FR.ISO-8859-1", "br_FR.UTF-8", "brx_IN", "brx_IN.UTF-8", "bs_BA", "bs_BA.ISO-8859-2", "bs_BA.UTF-8", "byn_ER", "byn_ER.UTF-8", "ca_AD", "ca_AD.ISO-8859-15", "ca_AD.UTF-8", "ca_ES", "ca_ES.ISO-8859-1", "ca_ES.UTF-8", "ca_FR", "ca_FR.ISO-8859-15", "ca_FR.UTF-8", "ca_IT", "ca_IT.ISO-8859-15", "ca_IT.UTF-8", "ce_RU", "ce_RU.UTF-8", "chr_US", "chr_US.UTF-8", "ckb_IQ", "ckb_IQ.UTF-8", "cmn_TW", "cmn_TW.UTF-8", "crh_UA", "crh_UA.UTF-8", "csb_PL", "csb_PL.UTF-8", "cs_CZ", "cs_CZ.ISO-8859-2", "cs_CZ.UTF-8", "cv_RU", "cv_RU.UTF-8", "cy_GB", "cy_GB.ISO-8859-14", "cy_GB.UTF-8", "da_DK", "da_DK.ISO-8859-1", "da_DK.UTF-8", "de_AT", "de_AT.ISO-8859-1", "de_AT.UTF-8", "de_BE", "de_BE.ISO-8859-1", "de_BE.UTF-8", "de_CH", "de_CH.ISO-8859-1", "de_CH.UTF-8", "de_DE", "de_DE.ISO-8859-1", "de_DE.UTF-8", "de_IT", "de_IT.ISO-8859-1", "de_IT.UTF-8", "de_LU", "de_LU.ISO-8859-1", "de_LU.UTF-8", "doi_IN", "doi_IN.UTF-8", "dv_MV", "dv_MV.UTF-8", "dz_BT", "dz_BT.UTF-8", "el_CY", "el_CY.ISO-8859-7", "el_CY.UTF-8", "el_GR", "el_GR.ISO-8859-7", "el_GR.UTF-8", "en_AG", "en_AG.UTF-8", "en_AU", "en_AU.ISO-8859-1", "en_AU.UTF-8", "en_BW", "en_BW.ISO-8859-1", "en_BW.UTF-8", "en_CA", "en_CA.ISO-8859-1", "en_CA.UTF-8", "en_DK", "en_DK.ISO-8859-1", "en_DK.ISO-8859-15", "en_DK.UTF-8", "en_GB", "en_GB.ISO-8859-1", "en_GB.ISO-8859-15", "en_GB.UTF-8", "en_HK", "en_HK.ISO-8859-1", "en_HK.UTF-8", "en_IE", "en_IE.ISO-8859-1", "en_IE.UTF-8", "en_IL", "en_IL.UTF-8", "en_IN", "en_IN.UTF-8", "en_NG", "en_NG.UTF-8", "en_NZ", "en_NZ.ISO-8859-1", "en_NZ.UTF-8", "en_PH", "en_PH.ISO-8859-1", "en_PH.UTF-8", "en_SG", "en_SG.ISO-8859-1", "en_SG.UTF-8", "en_US", "en_US.ISO-8859-1", "en_US.ISO-8859-15", "en_US.UTF-8", "en_ZA", "en_ZA.ISO-8859-1", "en_ZA.UTF-8", "en_ZM", "en_ZM.UTF-8", "en_ZW", "en_ZW.ISO-8859-1", "en_ZW.UTF-8", "es_AR", "es_AR.ISO-8859-1", "es_AR.UTF-8", "es_BO", "es_BO.ISO-8859-1", "es_BO.UTF-8", "es_CL", "es_CL.ISO-8859-1", "es_CL.UTF-8", "es_CO", "es_CO.ISO-8859-1", "es_CO.UTF-8", "es_CR", "es_CR.ISO-8859-1", "es_CR.UTF-8", "es_CU", "es_CU.UTF-8", "es_DO", "es_DO.ISO-8859-1", "es_DO.UTF-8", "es_EC", "es_EC.ISO-8859-1", "es_EC.UTF-8", "es_ES", "es_ES.ISO-8859-1", "es_ES.UTF-8", "es_GT", "es_GT.ISO-8859-1", "es_GT.UTF-8", "es_HN", "es_HN.ISO-8859-1", "es_HN.UTF-8", "es_MX", "es_MX.ISO-8859-1", "es_MX.UTF-8", "es_NI", "es_NI.ISO-8859-1", "es_NI.UTF-8", "es_PA", "es_PA.ISO-8859-1", "es_PA.UTF-8", "es_PE", "es_PE.ISO-8859-1", "es_PE.UTF-8", "es_PR", "es_PR.ISO-8859-1", "es_PR.UTF-8", "es_PY", "es_PY.ISO-8859-1", "es_PY.UTF-8", "es_SV", "es_SV.ISO-8859-1", "es_SV.UTF-8", "es_US", "es_US.ISO-8859-1", "es_US.UTF-8", "es_UY", "es_UY.ISO-8859-1", "es_UY.UTF-8", "es_VE", "es_VE.ISO-8859-1", "es_VE.UTF-8", "et_EE", "et_EE.ISO-8859-1", "et_EE.ISO-8859-15", "et_EE.UTF-8", "eu_ES", "eu_ES.ISO-8859-1", "eu_ES.UTF-8", "eu_FR", "eu_FR.ISO-8859-1", "eu_FR.UTF-8", "fa_IR", "fa_IR.UTF-8", "ff_SN", "ff_SN.UTF-8", "fi_FI", "fi_FI.ISO-8859-1", "fi_FI.UTF-8", "fil_PH", "fil_PH.UTF-8", "fo_FO", "fo_FO.ISO-8859-1", "fo_FO.UTF-8", "fr_BE", "fr_BE.ISO-8859-1", "fr_BE.UTF-8", "fr_CA", "fr_CA.ISO-8859-1", "fr_CA.UTF-8", "fr_CH", "fr_CH.ISO-8859-1", "fr_CH.UTF-8", "fr_FR", "fr_FR.ISO-8859-1", "fr_FR.UTF-8", "fr_LU", "fr_LU.ISO-8859-1", "fr_LU.UTF-8", "fur_IT", "fur_IT.UTF-8", "fy_DE", "fy_DE.UTF-8", "fy_NL", "fy_NL.UTF-8", "ga_IE", "ga_IE.ISO-8859-1", "ga_IE.UTF-8", "gd_GB", "gd_GB.ISO-8859-15", "gd_GB.UTF-8", "gez_ER", "gez_ER.UTF-8", "gez_ET", "gez_ET.UTF-8", "gl_ES", "gl_ES.ISO-8859-1", "gl_ES.UTF-8", "gu_IN", "gu_IN.UTF-8", "gv_GB", "gv_GB.ISO-8859-1", "gv_GB.UTF-8", "hak_TW", "hak_TW.UTF-8", "ha_NG", "ha_NG.UTF-8", "he_IL", "he_IL.ISO-8859-8", "he_IL.UTF-8", "hif_FJ", "hif_FJ.UTF-8", "hi_IN", "hi_IN.UTF-8", "hne_IN", "hne_IN.UTF-8", "hr_HR", "hr_HR.ISO-8859-2", "hr_HR.UTF-8", "hsb_DE", "hsb_DE.ISO-8859-2", "hsb_DE.UTF-8", "ht_HT", "ht_HT.UTF-8", "hu_HU", "hu_HU.ISO-8859-2", "hu_HU.UTF-8", "hy_AM", "hy_AM.ARMSCII-8", "hy_AM.UTF-8", "ia_FR", "ia_FR.UTF-8", "id_ID", "id_ID.ISO-8859-1", "id_ID.UTF-8", "ig_NG", "ig_NG.UTF-8", "ik_CA", "ik_CA.UTF-8", "is_IS", "is_IS.ISO-8859-1", "is_IS.UTF-8", "it_CH", "it_CH.ISO-8859-1", "it_CH.UTF-8", "it_IT", "it_IT.ISO-8859-1", "it_IT.UTF-8", "iu_CA", "iu_CA.UTF-8", "kab_DZ", "kab_DZ.UTF-8", "ka_GE", "ka_GE.GEORGIAN-PS", "ka_GE.UTF-8", "kk_KZ", "kk_KZ.PT154", "kk_KZ.RK1048", "kk_KZ.UTF-8", "kl_GL", "kl_GL.ISO-8859-1", "kl_GL.UTF-8", "km_KH", "km_KH.UTF-8", "kn_IN", "kn_IN.UTF-8", "kok_IN", "kok_IN.UTF-8", "ks_IN", "ks_IN.UTF-8", "ku_TR", "ku_TR.ISO-8859-9", "ku_TR.UTF-8", "kw_GB", "kw_GB.ISO-8859-1", "kw_GB.UTF-8", "ky_KG", "ky_KG.UTF-8", "lb_LU", "lb_LU.UTF-8", "lg_UG", "lg_UG.ISO-8859-10", "lg_UG.UTF-8", "li_BE", "li_BE.UTF-8", "lij_IT", "lij_IT.UTF-8", "li_NL", "li_NL.UTF-8", "ln_CD", "ln_CD.UTF-8", "lo_LA", "lo_LA.UTF-8", "lt_LT", "lt_LT.ISO-8859-13", "lt_LT.UTF-8", "lv_LV", "lv_LV.ISO-8859-13", "lv_LV.UTF-8", "lzh_TW", "lzh_TW.UTF-8", "mag_IN", "mag_IN.UTF-8", "mai_IN", "mai_IN.UTF-8", "mai_NP", "mai_NP.UTF-8", "mfe_MU", "mfe_MU.UTF-8", "mg_MG", "mg_MG.ISO-8859-15", "mg_MG.UTF-8", "mhr_RU", "mhr_RU.UTF-8", "mi_NZ", "mi_NZ.ISO-8859-13", "mi_NZ.UTF-8", "miq_NI", "miq_NI.UTF-8", "mjw_IN", "mjw_IN.UTF-8", "mk_MK", "mk_MK.ISO-8859-5", "mk_MK.UTF-8", "ml_IN", "ml_IN.UTF-8", "mni_IN", "mni_IN.UTF-8", "mn_MN", "mn_MN.UTF-8", "mr_IN", "mr_IN.UTF-8", "ms_MY", "ms_MY.ISO-8859-1", "ms_MY.UTF-8", "mt_MT", "mt_MT.ISO-8859-3", "mt_MT.UTF-8", "my_MM", "my_MM.UTF-8", "nan_TW", "nan_TW.UTF-8", "nb_NO", "nb_NO.ISO-8859-1", "nb_NO.UTF-8", "nds_DE", "nds_DE.UTF-8", "nds_NL", "nds_NL.UTF-8", "ne_NP", "ne_NP.UTF-8", "nhn_MX", "nhn_MX.UTF-8", "niu_NU", "niu_NU.UTF-8", "niu_NZ", "niu_NZ.UTF-8", "nl_AW", "nl_AW.UTF-8", "nl_BE", "nl_BE.ISO-8859-1", "nl_BE.UTF-8", "nl_NL", "nl_NL.ISO-8859-1", "nl_NL.UTF-8", "nn_NO", "nn_NO.ISO-8859-1", "nn_NO.UTF-8", "nr_ZA", "nr_ZA.UTF-8", "nso_ZA", "nso_ZA.UTF-8", "oc_FR", "oc_FR.ISO-8859-1", "oc_FR.UTF-8", "om_ET", "om_ET.UTF-8", "om_KE", "om_KE.ISO-8859-1", "om_KE.UTF-8", "or_IN", "or_IN.UTF-8", "os_RU", "os_RU.UTF-8", "pa_IN", "pa_IN.UTF-8", "pap_AW", "pap_AW.UTF-8", "pap_CW", "pap_CW.UTF-8", "pa_PK", "pa_PK.UTF-8", "pl_PL", "pl_PL.ISO-8859-2", "pl_PL.UTF-8", "ps_AF", "ps_AF.UTF-8", "pt_BR", "pt_BR.ISO-8859-1", "pt_BR.UTF-8", "pt_PT", "pt_PT.ISO-8859-1", "pt_PT.UTF-8", "quz_PE", "quz_PE.UTF-8", "raj_IN", "raj_IN.UTF-8", "ro_RO", "ro_RO.ISO-8859-2", "ro_RO.UTF-8", "ru_RU", "ru_RU.CP1251", "ru_RU.ISO-8859-5", "ru_RU.KOI8-R", "ru_RU.UTF-8", "ru_UA", "ru_UA.KOI8-U", "ru_UA.UTF-8", "rw_RW", "rw_RW.UTF-8", "sa_IN", "sa_IN.UTF-8", "sat_IN", "sat_IN.UTF-8", "sc_IT", "sc_IT.UTF-8", "sd_IN", "sd_IN.UTF-8", "sd_PK", "sd_PK.UTF-8", "se_NO", "se_NO.UTF-8", "sgs_LT", "sgs_LT.UTF-8", "shn_MM", "shn_MM.UTF-8", "shs_CA", "shs_CA.UTF-8", "sid_ET", "sid_ET.UTF-8", "si_LK", "si_LK.UTF-8", "sk_SK", "sk_SK.ISO-8859-2", "sk_SK.UTF-8", "sl_SI", "sl_SI.ISO-8859-2", "sl_SI.UTF-8", "sm_WS", "sm_WS.UTF-8", "so_DJ", "so_DJ.ISO-8859-1", "so_DJ.UTF-8", "so_ET", "so_ET.UTF-8", "so_KE", "so_KE.ISO-8859-1", "so_KE.UTF-8", "so_SO", "so_SO.ISO-8859-1", "so_SO.UTF-8", "sq_AL", "sq_AL.ISO-8859-1", "sq_AL.UTF-8", "sq_MK", "sq_MK.UTF-8", "sr_ME", "sr_ME.UTF-8", "sr_RS", "sr_RS.UTF-8", "ss_ZA", "ss_ZA.UTF-8", "st_ZA", "st_ZA.ISO-8859-1", "st_ZA.UTF-8", "sv_FI", "sv_FI.ISO-8859-1", "sv_FI.UTF-8", "sv_SE", "sv_SE.ISO-8859-1", "sv_SE.ISO-8859-15", "sv_SE.UTF-8", "sw_KE", "sw_KE.UTF-8", "sw_TZ", "sw_TZ.UTF-8", "szl_PL", "szl_PL.UTF-8", "ta_IN", "ta_IN.UTF-8", "ta_LK", "ta_LK.UTF-8", "te_IN", "te_IN.UTF-8", "tg_TJ", "tg_TJ.KOI8-T", "tg_TJ.UTF-8", "the_NP", "the_NP.UTF-8", "th_TH", "th_TH.TIS-620", "th_TH.UTF-8", "ti_ER", "ti_ER.UTF-8", "ti_ET", "ti_ET.UTF-8", "tig_ER", "tig_ER.UTF-8", "tk_TM", "tk_TM.UTF-8", "tl_PH", "tl_PH.ISO-8859-1", "tl_PH.UTF-8", "tn_ZA", "tn_ZA.UTF-8", "to_TO", "to_TO.UTF-8", "tpi_PG", "tpi_PG.UTF-8", "tr_CY", "tr_CY.ISO-8859-9", "tr_CY.UTF-8", "tr_TR", "tr_TR.ISO-8859-9", "tr_TR.UTF-8", "ts_ZA", "ts_ZA.UTF-8", "tt_RU", "tt_RU.UTF-8", "ug_CN", "ug_CN.UTF-8", "uk_UA", "uk_UA.KOI8-U", "uk_UA.UTF-8", "unm_US", "unm_US.UTF-8", "ur_IN", "ur_IN.UTF-8", "ur_PK", "ur_PK.UTF-8", "uz_UZ", "uz_UZ.ISO-8859-1", "uz_UZ.UTF-8", "ve_ZA", "ve_ZA.UTF-8", "vi_VN", "vi_VN.UTF-8", "wa_BE", "wa_BE.ISO-8859-1", "wa_BE.UTF-8", "wae_CH", "wae_CH.UTF-8", "wal_ET", "wal_ET.UTF-8", "wo_SN", "wo_SN.UTF-8", "xh_ZA", "xh_ZA.ISO-8859-1", "xh_ZA.UTF-8", "yi_US", "yi_US.CP1255", "yi_US.UTF-8", "yo_NG", "yo_NG.UTF-8", "yue_HK", "yue_HK.UTF-8", "yuw_PG", "yuw_PG.UTF-8", "zh_CN", "zh_CN.GB18030", "zh_CN.GB2312", "zh_CN.GBK", "zh_CN.UTF-8", "zh_HK", "zh_HK.BIG5-HKSCS", "zh_HK.UTF-8", "zh_SG", "zh_SG.GB2312", "zh_SG.GBK", "zh_SG.UTF-8", "zh_TW", "zh_TW.BIG5", "zh_TW.EUC-TW", "zh_TW.UTF-8", "zu_ZA", "zu_ZA.ISO-8859-1", "zu_ZA.UTF-8"};
|
|
|
|
std::string ConsumeLocaleIdentifier(FuzzedDataProvider& fuzzed_data_provider)
|
|
{
|
|
return fuzzed_data_provider.PickValueInArray<std::string>(locale_identifiers);
|
|
}
|
|
|
|
bool IsAvailableLocale(const std::string& locale_identifier)
|
|
{
|
|
try {
|
|
(void)std::locale(locale_identifier);
|
|
} catch (const std::runtime_error&) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
} // namespace
|
|
|
|
FUZZ_TARGET(locale)
|
|
{
|
|
FuzzedDataProvider fuzzed_data_provider(buffer.data(), buffer.size());
|
|
const std::string locale_identifier = ConsumeLocaleIdentifier(fuzzed_data_provider);
|
|
if (!IsAvailableLocale(locale_identifier)) {
|
|
return;
|
|
}
|
|
const char* c_locale = std::setlocale(LC_ALL, "C");
|
|
assert(c_locale != nullptr);
|
|
|
|
const std::string random_string = fuzzed_data_provider.ConsumeRandomLengthString(5);
|
|
int32_t parseint32_out_without_locale;
|
|
const bool parseint32_without_locale = ParseInt32(random_string, &parseint32_out_without_locale);
|
|
int64_t parseint64_out_without_locale;
|
|
const bool parseint64_without_locale = ParseInt64(random_string, &parseint64_out_without_locale);
|
|
const int64_t random_int64 = fuzzed_data_provider.ConsumeIntegral<int64_t>();
|
|
const std::string tostring_without_locale = util::ToString(random_int64);
|
|
// The variable `random_int32` is no longer used, but the harness still needs to
|
|
// consume the same data that it did previously to not invalidate existing seeds.
|
|
const int32_t random_int32 = fuzzed_data_provider.ConsumeIntegral<int32_t>();
|
|
(void)random_int32;
|
|
const std::string strprintf_int_without_locale = strprintf("%d", random_int64);
|
|
const double random_double = fuzzed_data_provider.ConsumeFloatingPoint<double>();
|
|
const std::string strprintf_double_without_locale = strprintf("%f", random_double);
|
|
|
|
const char* new_locale = std::setlocale(LC_ALL, locale_identifier.c_str());
|
|
assert(new_locale != nullptr);
|
|
|
|
int32_t parseint32_out_with_locale;
|
|
const bool parseint32_with_locale = ParseInt32(random_string, &parseint32_out_with_locale);
|
|
assert(parseint32_without_locale == parseint32_with_locale);
|
|
if (parseint32_without_locale) {
|
|
assert(parseint32_out_without_locale == parseint32_out_with_locale);
|
|
}
|
|
int64_t parseint64_out_with_locale;
|
|
const bool parseint64_with_locale = ParseInt64(random_string, &parseint64_out_with_locale);
|
|
assert(parseint64_without_locale == parseint64_with_locale);
|
|
if (parseint64_without_locale) {
|
|
assert(parseint64_out_without_locale == parseint64_out_with_locale);
|
|
}
|
|
const std::string tostring_with_locale = util::ToString(random_int64);
|
|
assert(tostring_without_locale == tostring_with_locale);
|
|
const std::string strprintf_int_with_locale = strprintf("%d", random_int64);
|
|
assert(strprintf_int_without_locale == strprintf_int_with_locale);
|
|
const std::string strprintf_double_with_locale = strprintf("%f", random_double);
|
|
assert(strprintf_double_without_locale == strprintf_double_with_locale);
|
|
|
|
const std::locale current_cpp_locale;
|
|
assert(current_cpp_locale == std::locale::classic());
|
|
}
|