diff --git a/Cargo.lock b/Cargo.lock index fa8fdaafd..40c63133b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -267,6 +267,7 @@ dependencies = [ "semver", "smallvec", "tokio", + "widestring", ] [[package]] @@ -534,6 +535,12 @@ dependencies = [ "libc", ] +[[package]] +name = "widestring" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "653f141f39ec16bba3c5abe400a0c60da7468261cc2cbf36805022876bc721a8" + [[package]] name = "winapi" version = "0.3.9" diff --git a/crates/neon/Cargo.toml b/crates/neon/Cargo.toml index 06395027c..f52660a8c 100644 --- a/crates/neon/Cargo.toml +++ b/crates/neon/Cargo.toml @@ -12,8 +12,9 @@ edition = "2018" [dev-dependencies] semver = "1" -psd = "0.3.1" # used for a doc example -anyhow = "1.0.58" # used for a doc example +psd = "0.3.1" # used for a doc example +anyhow = "1.0.58" # used for a doc example +widestring = "1.0.2" # used for a doc example [target.'cfg(not(target = "windows"))'.dev-dependencies] # Avoid `clang` as a dependency on windows diff --git a/crates/neon/src/sys/bindings/functions.rs b/crates/neon/src/sys/bindings/functions.rs index 0c0ef66d5..dc03b5271 100644 --- a/crates/neon/src/sys/bindings/functions.rs +++ b/crates/neon/src/sys/bindings/functions.rs @@ -65,6 +65,17 @@ mod napi1 { result: *mut usize, ) -> Status; + // The `buf` argument is defined as a `char16_t` which _should_ be a `u16` on most + // platforms. When generating bindings with `rust-bindgen` it unconditionally defines + // it as `u16` as well. + fn get_value_string_utf16( + env: Env, + value: Value, + buf: *mut u16, + bufsize: usize, + result: *mut usize, + ) -> Status; + fn create_type_error(env: Env, code: Value, msg: Value, result: *mut Value) -> Status; fn create_range_error(env: Env, code: Value, msg: Value, result: *mut Value) -> Status; diff --git a/crates/neon/src/sys/string.rs b/crates/neon/src/sys/string.rs index 1d80b57b8..a2c2819bf 100644 --- a/crates/neon/src/sys/string.rs +++ b/crates/neon/src/sys/string.rs @@ -11,23 +11,40 @@ pub unsafe fn new(out: &mut Local, env: Env, data: *const u8, len: i32) -> bool status == napi::Status::Ok } -pub unsafe fn utf8_len(env: Env, value: Local) -> isize { +pub unsafe fn utf8_len(env: Env, value: Local) -> usize { let mut len = MaybeUninit::uninit(); let status = napi::get_value_string_utf8(env, value, ptr::null_mut(), 0, len.as_mut_ptr()); assert_eq!(status, napi::Status::Ok); - len.assume_init() as isize + len.assume_init() } -pub unsafe fn data(env: Env, out: *mut u8, len: isize, value: Local) -> isize { +pub unsafe fn data(env: Env, out: *mut u8, len: usize, value: Local) -> usize { let mut read = MaybeUninit::uninit(); - let status = - napi::get_value_string_utf8(env, value, out as *mut _, len as usize, read.as_mut_ptr()); + let status = napi::get_value_string_utf8(env, value, out as *mut _, len, read.as_mut_ptr()); assert_eq!(status, napi::Status::Ok); - read.assume_init() as isize + read.assume_init() +} + +pub unsafe fn utf16_len(env: Env, value: Local) -> usize { + let mut len = MaybeUninit::uninit(); + let status = napi::get_value_string_utf16(env, value, ptr::null_mut(), 0, len.as_mut_ptr()); + + assert_eq!(status, napi::Status::Ok); + + len.assume_init() +} + +pub unsafe fn data_utf16(env: Env, out: *mut u16, len: usize, value: Local) -> usize { + let mut read = MaybeUninit::uninit(); + let status = napi::get_value_string_utf16(env, value, out, len, read.as_mut_ptr()); + + assert_eq!(status, napi::Status::Ok); + + read.assume_init() } pub unsafe fn run_script(out: &mut Local, env: Env, value: Local) -> bool { diff --git a/crates/neon/src/types_impl/mod.rs b/crates/neon/src/types_impl/mod.rs index 047d32722..21a30ce61 100644 --- a/crates/neon/src/types_impl/mod.rs +++ b/crates/neon/src/types_impl/mod.rs @@ -343,22 +343,132 @@ impl private::ValueInternal for JsString { } impl JsString { - pub fn size<'a, C: Context<'a>>(&self, cx: &mut C) -> isize { + /// Return the byte size of this string when converted to a Rust [`String`] with + /// [`JsString::value`]. + /// + /// # Example + /// + /// A function that verifies the length of the passed JavaScript string. The string is assumed + /// to be `hello 🥹` here, which encodes as 10 bytes in UTF-8: + /// + /// - 6 bytes for `hello ` (including the space). + /// - 4 bytes for the emoji `🥹`. + /// + /// ```rust + /// # use neon::prelude::*; + /// fn string_len(mut cx: FunctionContext) -> JsResult { + /// let len = cx.argument::(0)?.size(&mut cx); + /// // assuming the function is called with the JS string `hello 🥹`. + /// assert_eq!(10, len); + /// + /// Ok(cx.undefined()) + /// } + /// ``` + pub fn size<'a, C: Context<'a>>(&self, cx: &mut C) -> usize { let env = cx.env().to_raw(); unsafe { sys::string::utf8_len(env, self.to_raw()) } } + /// Return the size of this string encoded as UTF-16 with [`JsString::to_utf16`]. + /// + /// # Example + /// + /// A function that verifies the length of the passed JavaScript string. The string is assumed + /// to be `hello 🥹` here, which encodes as 8 `u16`s in UTF-16: + /// + /// - 6 `u16`s for `hello ` (including the space). + /// - 2 `u16`s for the emoji `🥹`. + /// + /// ```rust + /// # use neon::prelude::*; + /// fn string_len_utf16(mut cx: FunctionContext) -> JsResult { + /// let len = cx.argument::(0)?.size_utf16(&mut cx); + /// // assuming the function is called with the JS string `hello 🥹`. + /// assert_eq!(8, len); + /// + /// Ok(cx.undefined()) + /// } + /// ``` + pub fn size_utf16<'a, C: Context<'a>>(&self, cx: &mut C) -> usize { + let env = cx.env().to_raw(); + + unsafe { sys::string::utf16_len(env, self.to_raw()) } + } + + /// Convert the JavaScript string into a Rust [`String`]. + /// + /// # Example + /// + /// A function that expects a single JavaScript string as argument and prints it out. + /// + /// ```rust + /// # use neon::prelude::*; + /// fn print_string(mut cx: FunctionContext) -> JsResult { + /// let s = cx.argument::(0)?.value(&mut cx); + /// println!("JavaScript string content: {}", s); + /// + /// Ok(cx.undefined()) + /// } + /// ``` pub fn value<'a, C: Context<'a>>(&self, cx: &mut C) -> String { let env = cx.env().to_raw(); unsafe { let capacity = sys::string::utf8_len(env, self.to_raw()) + 1; - let mut buffer: Vec = Vec::with_capacity(capacity as usize); - let p = buffer.as_mut_ptr(); - std::mem::forget(buffer); - let len = sys::string::data(env, p, capacity, self.to_raw()); - String::from_raw_parts(p, len as usize, capacity as usize) + let mut buffer: Vec = Vec::with_capacity(capacity); + let len = sys::string::data(env, buffer.as_mut_ptr(), capacity, self.to_raw()); + buffer.set_len(len); + String::from_utf8_unchecked(buffer) + } + } + + /// Convert the JavaScript String into a UTF-16 encoded [`Vec`]. + /// + /// The returned vector is guaranteed to be valid UTF-16. Therefore, any external crate that + /// handles UTF-16 encoded strings, can assume the content to be valid and skip eventual + /// validation steps. + /// + /// # Example + /// + /// A function that expects a single JavaScript string as argument and prints it out as a raw + /// vector of `u16`s. + /// + /// ```rust + /// # use neon::prelude::*; + /// fn print_string_as_utf16(mut cx: FunctionContext) -> JsResult { + /// let s = cx.argument::(0)?.to_utf16(&mut cx); + /// println!("JavaScript string as raw UTF-16: {:?}", s); + /// + /// Ok(cx.undefined()) + /// } + /// ``` + /// + /// Again a function that expects a single JavaScript string as argument, but utilizes the + /// [`widestring`](https://crates.io/crates/widestring) crate to handle the raw [`Vec`] as + /// a typical string. + /// + /// ```rust + /// # use neon::prelude::*; + /// fn print_with_widestring(mut cx: FunctionContext) -> JsResult { + /// let s = cx.argument::(0)?.to_utf16(&mut cx); + /// // The returned vector is guaranteed to be valid UTF-16. + /// // Therefore, we can skip the validation step. + /// let s = unsafe { widestring::Utf16String::from_vec_unchecked(s) }; + /// println!("JavaScript string as UTF-16: {}", s); + /// + /// Ok(cx.undefined()) + /// } + /// ``` + pub fn to_utf16<'a, C: Context<'a>>(&self, cx: &mut C) -> Vec { + let env = cx.env().to_raw(); + + unsafe { + let capacity = sys::string::utf16_len(env, self.to_raw()) + 1; + let mut buffer: Vec = Vec::with_capacity(capacity); + let len = sys::string::data_utf16(env, buffer.as_mut_ptr(), capacity, self.to_raw()); + buffer.set_len(len); + buffer } } diff --git a/test/napi/lib/strings.js b/test/napi/lib/strings.js index f1d401cd5..b3f2cc91b 100644 --- a/test/napi/lib/strings.js +++ b/test/napi/lib/strings.js @@ -5,6 +5,18 @@ describe("JsString", function () { it("should return a JsString built in Rust", function () { assert.equal(addon.return_js_string(), "hello node"); }); + it("should return a raw valid UTF-16 string built in Rust", function () { + const decoder = new TextDecoder("utf-16"); + assert.equal(decoder.decode(addon.return_js_string_utf16()), "hello 🥹"); + }); + describe("encoding", function () { + it("should return the UTF-8 string length", function () { + assert.equal(addon.return_length_utf8("hello 🥹"), 10); + }); + it("should return the UTF-16 string length", function () { + assert.equal(addon.return_length_utf16("hello 🥹"), 8); + }); + }); describe("run_as_script", function () { it("should return the evaluated value", function () { assert.equal(addon.run_string_as_script("6 * 7"), 42); diff --git a/test/napi/src/js/strings.rs b/test/napi/src/js/strings.rs index c03f25c41..ad69b0151 100644 --- a/test/napi/src/js/strings.rs +++ b/test/napi/src/js/strings.rs @@ -4,6 +4,21 @@ pub fn return_js_string(mut cx: FunctionContext) -> JsResult { Ok(cx.string("hello node")) } +pub fn return_js_string_utf16(mut cx: FunctionContext) -> JsResult> { + let raw = "hello 🥹".encode_utf16().collect::>(); + JsTypedArray::from_slice(&mut cx, &raw) +} + +pub fn return_length_utf8(mut cx: FunctionContext) -> JsResult { + let value = cx.argument::(0)?.value(&mut cx); + Ok(cx.number(value.len() as f64)) +} + +pub fn return_length_utf16(mut cx: FunctionContext) -> JsResult { + let value = cx.argument::(0)?.to_utf16(&mut cx); + Ok(cx.number(value.len() as f64)) +} + pub fn run_string_as_script(mut cx: FunctionContext) -> JsResult { let string_script = cx.argument::(0)?; eval(&mut cx, string_script) diff --git a/test/napi/src/lib.rs b/test/napi/src/lib.rs index 4739a889e..c2bb89204 100644 --- a/test/napi/src/lib.rs +++ b/test/napi/src/lib.rs @@ -108,6 +108,9 @@ fn main(mut cx: ModuleContext) -> NeonResult<()> { cx.export_function("add1", add1)?; cx.export_function("return_js_string", return_js_string)?; + cx.export_function("return_js_string_utf16", return_js_string_utf16)?; + cx.export_function("return_length_utf8", return_length_utf8)?; + cx.export_function("return_length_utf16", return_length_utf16)?; cx.export_function("run_string_as_script", run_string_as_script)?; cx.export_function("return_js_number", return_js_number)?;