From dada605d765db0d8ebffc01615ed995fb0392fe1 Mon Sep 17 00:00:00 2001 From: VemundH Date: Fri, 13 Dec 2019 10:14:41 +0100 Subject: [PATCH 1/2] Fix test build in C++20 --- tests/test_builtin_casters.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_builtin_casters.cpp b/tests/test_builtin_casters.cpp index e026127f89..5e2594345f 100644 --- a/tests/test_builtin_casters.cpp +++ b/tests/test_builtin_casters.cpp @@ -30,7 +30,7 @@ TEST_SUBMODULE(builtin_casters, m) { else { wstr.push_back((wchar_t) mathbfA32); } // 𝐀, utf32 wstr.push_back(0x7a); // z - m.def("good_utf8_string", []() { return std::string(u8"Say utf8\u203d \U0001f382 \U0001d400"); }); // Say utf8β€½ πŸŽ‚ 𝐀 + m.def("good_utf8_string", []() { return std::string((const char*)u8"Say utf8\u203d \U0001f382 \U0001d400"); }); // Say utf8β€½ πŸŽ‚ 𝐀 m.def("good_utf16_string", [=]() { return std::u16string({ b16, ib16, cake16_1, cake16_2, mathbfA16_1, mathbfA16_2, z16 }); }); // bβ€½πŸŽ‚π€z m.def("good_utf32_string", [=]() { return std::u32string({ a32, mathbfA32, cake32, ib32, z32 }); }); // aπ€πŸŽ‚β€½z m.def("good_wchar_string", [=]() { return wstr; }); // a‽𝐀z @@ -69,7 +69,7 @@ TEST_SUBMODULE(builtin_casters, m) { m.def("string_view_chars", [](std::string_view s) { py::list l; for (auto c : s) l.append((std::uint8_t) c); return l; }); m.def("string_view16_chars", [](std::u16string_view s) { py::list l; for (auto c : s) l.append((int) c); return l; }); m.def("string_view32_chars", [](std::u32string_view s) { py::list l; for (auto c : s) l.append((int) c); return l; }); - m.def("string_view_return", []() { return std::string_view(u8"utf8 secret \U0001f382"); }); + m.def("string_view_return", []() { return std::string_view((const char*)u8"utf8 secret \U0001f382"); }); m.def("string_view16_return", []() { return std::u16string_view(u"utf16 secret \U0001f382"); }); m.def("string_view32_return", []() { return std::u32string_view(U"utf32 secret \U0001f382"); }); #endif From a1531e89e13905c8d6c44f1e6ebdddfb3b3e4995 Mon Sep 17 00:00:00 2001 From: VemundH Date: Fri, 13 Dec 2019 10:32:24 +0100 Subject: [PATCH 2/2] Add C++20 char8_t/u8string support --- include/pybind11/cast.h | 16 +++++++++++--- tests/test_builtin_casters.cpp | 18 ++++++++++++++++ tests/test_builtin_casters.py | 39 ++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 3 deletions(-) diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h index ad225312d4..3af6735111 100644 --- a/include/pybind11/cast.h +++ b/include/pybind11/cast.h @@ -32,6 +32,10 @@ #include #endif +#if defined(__cpp_lib_char8_t) && __cpp_lib_char8_t >= 201811L +# define PYBIND11_HAS_U8STRING +#endif + NAMESPACE_BEGIN(PYBIND11_NAMESPACE) NAMESPACE_BEGIN(detail) @@ -988,6 +992,9 @@ template class type_caster> { template using is_std_char_type = any_of< std::is_same, /* std::string */ +#if defined(PYBIND11_HAS_U8STRING) + std::is_same, /* std::u8string */ +#endif std::is_same, /* std::u16string */ std::is_same, /* std::u32string */ std::is_same /* std::wstring */ @@ -1191,6 +1198,9 @@ template struct string_caster { // Simplify life by being able to assume standard char sizes (the standard only guarantees // minimums, but Python requires exact sizes) static_assert(!std::is_same::value || sizeof(CharT) == 1, "Unsupported char size != 1"); +#if defined(PYBIND11_HAS_U8STRING) + static_assert(!std::is_same::value || sizeof(CharT) == 1, "Unsupported char8_t size != 1"); +#endif static_assert(!std::is_same::value || sizeof(CharT) == 2, "Unsupported char16_t size != 2"); static_assert(!std::is_same::value || sizeof(CharT) == 4, "Unsupported char32_t size != 4"); // wchar_t can be either 16 bits (Windows) or 32 (everywhere else) @@ -1209,7 +1219,7 @@ template struct string_caster { #if PY_MAJOR_VERSION >= 3 return load_bytes(load_src); #else - if (sizeof(CharT) == 1) { + if (std::is_same::value) { return load_bytes(load_src); } @@ -1269,7 +1279,7 @@ template struct string_caster { // without any encoding/decoding attempt). For other C++ char sizes this is a no-op. // which supports loading a unicode from a str, doesn't take this path. template - bool load_bytes(enable_if_t src) { + bool load_bytes(enable_if_t::value, handle> src) { if (PYBIND11_BYTES_CHECK(src.ptr())) { // We were passed a Python 3 raw bytes; accept it into a std::string or char* // without any encoding attempt. @@ -1284,7 +1294,7 @@ template struct string_caster { } template - bool load_bytes(enable_if_t) { return false; } + bool load_bytes(enable_if_t::value, handle>) { return false; } }; template diff --git a/tests/test_builtin_casters.cpp b/tests/test_builtin_casters.cpp index 5e2594345f..acb2446912 100644 --- a/tests/test_builtin_casters.cpp +++ b/tests/test_builtin_casters.cpp @@ -60,6 +60,18 @@ TEST_SUBMODULE(builtin_casters, m) { m.def("strlen", [](char *s) { return strlen(s); }); m.def("string_length", [](std::string s) { return s.length(); }); +#ifdef PYBIND11_HAS_U8STRING + m.attr("has_u8string") = true; + m.def("good_utf8_u8string", []() { return std::u8string(u8"Say utf8\u203d \U0001f382 \U0001d400"); }); // Say utf8β€½ πŸŽ‚ 𝐀 + m.def("bad_utf8_u8string", []() { return std::u8string((const char8_t*)"abc\xd0" "def"); }); + + m.def("u8_char8_Z", []() -> char8_t { return u8'Z'; }); + + // test_single_char_arguments + m.def("ord_char8", [](char8_t c) -> int { return static_cast(c); }); + m.def("ord_char8_lv", [](char8_t &c) -> int { return static_cast(c); }); +#endif + // test_string_view #ifdef PYBIND11_HAS_STRING_VIEW m.attr("has_string_view") = true; @@ -72,6 +84,12 @@ TEST_SUBMODULE(builtin_casters, m) { m.def("string_view_return", []() { return std::string_view((const char*)u8"utf8 secret \U0001f382"); }); m.def("string_view16_return", []() { return std::u16string_view(u"utf16 secret \U0001f382"); }); m.def("string_view32_return", []() { return std::u32string_view(U"utf32 secret \U0001f382"); }); + +# ifdef PYBIND11_HAS_U8STRING + m.def("string_view8_print", [](std::u8string_view s) { py::print(s, s.size()); }); + m.def("string_view8_chars", [](std::u8string_view s) { py::list l; for (auto c : s) l.append((std::uint8_t) c); return l; }); + m.def("string_view8_return", []() { return std::u8string_view(u8"utf8 secret \U0001f382"); }); +# endif #endif // test_integer_casting diff --git a/tests/test_builtin_casters.py b/tests/test_builtin_casters.py index abbfcec493..91422588cf 100644 --- a/tests/test_builtin_casters.py +++ b/tests/test_builtin_casters.py @@ -15,6 +15,8 @@ def test_unicode_conversion(): assert m.good_utf16_string() == u"bβ€½πŸŽ‚π€z" assert m.good_utf32_string() == u"aπ€πŸŽ‚β€½z" assert m.good_wchar_string() == u"aβΈ˜π€z" + if hasattr(m, "has_u8string"): + assert m.good_utf8_u8string() == u"Say utf8β€½ πŸŽ‚ 𝐀" with pytest.raises(UnicodeDecodeError): m.bad_utf8_string() @@ -29,12 +31,17 @@ def test_unicode_conversion(): if hasattr(m, "bad_wchar_string"): with pytest.raises(UnicodeDecodeError): m.bad_wchar_string() + if hasattr(m, "has_u8string"): + with pytest.raises(UnicodeDecodeError): + m.bad_utf8_u8string() assert m.u8_Z() == 'Z' assert m.u8_eacute() == u'Γ©' assert m.u16_ibang() == u'β€½' assert m.u32_mathbfA() == u'𝐀' assert m.wchar_heart() == u'β™₯' + if hasattr(m, "has_u8string"): + assert m.u8_char8_Z() == 'Z' def test_single_char_arguments(): @@ -92,6 +99,17 @@ def toobig_message(r): assert m.ord_wchar(u'aa') assert str(excinfo.value) == toolong_message + if hasattr(m, "has_u8string"): + assert m.ord_char8(u'a') == 0x61 # simple ASCII + assert m.ord_char8_lv(u'b') == 0x62 + assert m.ord_char8(u'Γ©') == 0xE9 # requires 2 bytes in utf-8, but can be stuffed in a char + with pytest.raises(ValueError) as excinfo: + assert m.ord_char8(u'Δ€') == 0x100 # requires 2 bytes, doesn't fit in a char + assert str(excinfo.value) == toobig_message(0x100) + with pytest.raises(ValueError) as excinfo: + assert m.ord_char8(u'ab') + assert str(excinfo.value) == toolong_message + def test_bytes_to_string(): """Tests the ability to pass bytes to C++ string-accepting functions. Note that this is @@ -116,10 +134,15 @@ def test_string_view(capture): assert m.string_view_chars("Hi πŸŽ‚") == [72, 105, 32, 0xf0, 0x9f, 0x8e, 0x82] assert m.string_view16_chars("Hi πŸŽ‚") == [72, 105, 32, 0xd83c, 0xdf82] assert m.string_view32_chars("Hi πŸŽ‚") == [72, 105, 32, 127874] + if hasattr(m, "has_u8string"): + assert m.string_view8_chars("Hi") == [72, 105] + assert m.string_view8_chars("Hi πŸŽ‚") == [72, 105, 32, 0xf0, 0x9f, 0x8e, 0x82] assert m.string_view_return() == "utf8 secret πŸŽ‚" assert m.string_view16_return() == "utf16 secret πŸŽ‚" assert m.string_view32_return() == "utf32 secret πŸŽ‚" + if hasattr(m, "has_u8string"): + assert m.string_view8_return() == "utf8 secret πŸŽ‚" with capture: m.string_view_print("Hi") @@ -132,6 +155,14 @@ def test_string_view(capture): utf16 πŸŽ‚ 8 utf32 πŸŽ‚ 7 """ + if hasattr(m, "has_u8string"): + with capture: + m.string_view8_print("Hi") + m.string_view8_print("utf8 πŸŽ‚") + assert capture == """ + Hi 2 + utf8 πŸŽ‚ 9 + """ with capture: m.string_view_print("Hi, ascii") @@ -144,6 +175,14 @@ def test_string_view(capture): Hi, utf16 πŸŽ‚ 12 Hi, utf32 πŸŽ‚ 11 """ + if hasattr(m, "has_u8string"): + with capture: + m.string_view8_print("Hi, ascii") + m.string_view8_print("Hi, utf8 πŸŽ‚") + assert capture == """ + Hi, ascii 9 + Hi, utf8 πŸŽ‚ 13 + """ def test_integer_casting():