diff --git a/Cargo.lock b/Cargo.lock index 48cfc96f604..d80ffbf12bd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,6 +24,7 @@ dependencies = [ "prost", "prost-build", "protoc-prebuilt", + "rmp-serde", "serde", "serde-big-array", "serde-generate", @@ -4484,6 +4485,28 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rmp" +version = "0.8.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "228ed7c16fa39782c3b3468e974aec2795e9089153cd08ee2e9aefb3613334c4" +dependencies = [ + "byteorder", + "num-traits", + "paste", +] + +[[package]] +name = "rmp-serde" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52e599a477cf9840e92f2cde9a7189e67b42c57532749bf90aea6ec10facd4db" +dependencies = [ + "byteorder", + "rmp", + "serde", +] + [[package]] name = "route-recognizer" version = "0.3.1" diff --git a/Cargo.toml b/Cargo.toml index a0316e171e2..7f4093ba14a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -148,6 +148,10 @@ prost = "0.13" prost-build = "0.13" protoc-prebuilt = "0.3" +# MessagePack +rmp = "0.8.14" +rmp-serde = "1.3.0" + arbitrary = "1.4.1" cfg-if = "1.0.0" dirs = "4" diff --git a/acvm-repo/acir/Cargo.toml b/acvm-repo/acir/Cargo.toml index e4bd2b9c0bb..76172f27981 100644 --- a/acvm-repo/acir/Cargo.toml +++ b/acvm-repo/acir/Cargo.toml @@ -27,6 +27,7 @@ flate2.workspace = true bincode.workspace = true base64.workspace = true prost.workspace = true +rmp-serde.workspace = true serde-big-array = "0.5.1" strum = { workspace = true } strum_macros = { workspace = true } diff --git a/acvm-repo/acir/src/circuit/mod.rs b/acvm-repo/acir/src/circuit/mod.rs index 43eb94959d2..ab04f99a44d 100644 --- a/acvm-repo/acir/src/circuit/mod.rs +++ b/acvm-repo/acir/src/circuit/mod.rs @@ -6,10 +6,9 @@ pub mod opcodes; use crate::{ native_types::{Expression, Witness}, - proto::convert::ProtoSchema, + serialization::{bincode_deserialize, bincode_serialize}, }; use acir_field::AcirField; -use noir_protobuf::ProtoCodec as _; pub use opcodes::Opcode; use thiserror::Error; @@ -249,8 +248,9 @@ impl Circuit { } impl Program { + /// Serialize and compress the [Program] into bytes. fn write(&self, writer: W) -> std::io::Result<()> { - let buf = self.bincode_serialize()?; + let buf = bincode_serialize(self)?; let mut encoder = flate2::write::GzEncoder::new(writer, Compression::default()); encoder.write_all(&buf)?; encoder.finish()?; @@ -274,38 +274,13 @@ impl Program { } } -impl Program { - /// Serialize the program using `bincode`, which is what we have to use until Barretenberg can read another format. - pub(crate) fn bincode_serialize(&self) -> std::io::Result> { - bincode::serialize(self).map_err(std::io::Error::other) - } -} - -impl Deserialize<'a>> Program { - pub(crate) fn bincode_deserialize(buf: &[u8]) -> std::io::Result { - bincode::deserialize(buf) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e)) - } -} - -#[allow(dead_code)] // TODO: Remove once we switch to protobuf -impl Program { - /// Serialize the program using `protobuf`, which is what we try to replace `bincode` with. - pub(crate) fn proto_serialize(&self) -> Vec { - ProtoSchema::::serialize_to_vec(self) - } - pub(crate) fn proto_deserialize(buf: &[u8]) -> std::io::Result { - ProtoSchema::::deserialize_from_vec(buf) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e)) - } -} - impl Deserialize<'a>> Program { + /// Decompress and deserialize bytes into a [Program]. fn read(reader: R) -> std::io::Result { let mut gz_decoder = flate2::read::GzDecoder::new(reader); let mut buf = Vec::new(); gz_decoder.read_to_end(&mut buf)?; - let program = Self::bincode_deserialize(&buf)?; + let program = bincode_deserialize(&buf)?; Ok(program) } @@ -524,6 +499,7 @@ mod tests { use crate::circuit::Program; use crate::native_types::{WitnessMap, WitnessStack}; + use crate::serialization::*; // It's not possible to set the maximum size of collections via `ProptestConfig`, only an env var, // because e.g. the `VecStrategy` uses `Config::default().max_default_size_range`. On top of that, @@ -577,8 +553,8 @@ mod tests { #[test] fn prop_program_proto_roundtrip() { run_with_max_size_range(100, |program: Program| { - let bz = Program::proto_serialize(&program); - let de = Program::proto_deserialize(&bz)?; + let bz = proto_serialize(&program); + let de = proto_deserialize(&bz)?; prop_assert_eq!(program, de); Ok(()) }); @@ -587,8 +563,18 @@ mod tests { #[test] fn prop_program_bincode_roundtrip() { run_with_max_size_range(100, |program: Program| { - let bz = Program::bincode_serialize(&program)?; - let de = Program::bincode_deserialize(&bz)?; + let bz = bincode_serialize(&program)?; + let de = bincode_deserialize(&bz)?; + prop_assert_eq!(program, de); + Ok(()) + }); + } + + #[test] + fn prop_program_msgpack_roundtrip() { + run_with_max_size_range(100, |(program, compact): (Program, bool)| { + let bz = msgpack_serialize(&program, compact)?; + let de = msgpack_deserialize(&bz)?; prop_assert_eq!(program, de); Ok(()) }); @@ -607,8 +593,8 @@ mod tests { #[test] fn prop_witness_stack_proto_roundtrip() { run_with_max_size_range(10, |witness: WitnessStack| { - let bz = WitnessStack::proto_serialize(&witness); - let de = WitnessStack::proto_deserialize(&bz)?; + let bz = proto_serialize(&witness); + let de = proto_deserialize(&bz)?; prop_assert_eq!(witness, de); Ok(()) }); @@ -617,8 +603,18 @@ mod tests { #[test] fn prop_witness_stack_bincode_roundtrip() { run_with_max_size_range(10, |witness: WitnessStack| { - let bz = WitnessStack::bincode_serialize(&witness)?; - let de = WitnessStack::bincode_deserialize(&bz)?; + let bz = bincode_serialize(&witness)?; + let de = bincode_deserialize(&bz)?; + prop_assert_eq!(witness, de); + Ok(()) + }); + } + + #[test] + fn prop_witness_stack_msgpack_roundtrip() { + run_with_max_size_range(10, |(witness, compact): (WitnessStack, bool)| { + let bz = msgpack_serialize(&witness, compact)?; + let de = msgpack_deserialize(&bz)?; prop_assert_eq!(witness, de); Ok(()) }); @@ -637,8 +633,8 @@ mod tests { #[test] fn prop_witness_map_proto_roundtrip() { run_with_max_size_range(10, |witness: WitnessMap| { - let bz = WitnessMap::proto_serialize(&witness); - let de = WitnessMap::proto_deserialize(&bz)?; + let bz = proto_serialize(&witness); + let de = proto_deserialize(&bz)?; prop_assert_eq!(witness, de); Ok(()) }); @@ -647,8 +643,18 @@ mod tests { #[test] fn prop_witness_map_bincode_roundtrip() { run_with_max_size_range(10, |witness: WitnessMap| { - let bz = WitnessMap::bincode_serialize(&witness)?; - let de = WitnessMap::bincode_deserialize(&bz)?; + let bz = bincode_serialize(&witness)?; + let de = bincode_deserialize(&bz)?; + prop_assert_eq!(witness, de); + Ok(()) + }); + } + + #[test] + fn prop_witness_map_msgpack_roundtrip() { + run_with_max_size_range(10, |(witness, compact): (WitnessMap, bool)| { + let bz = msgpack_serialize(&witness, compact)?; + let de = msgpack_deserialize(&bz)?; prop_assert_eq!(witness, de); Ok(()) }); diff --git a/acvm-repo/acir/src/lib.rs b/acvm-repo/acir/src/lib.rs index ae359fd823b..ba65a114bae 100644 --- a/acvm-repo/acir/src/lib.rs +++ b/acvm-repo/acir/src/lib.rs @@ -7,6 +7,7 @@ pub mod circuit; pub mod native_types; mod proto; +mod serialization; pub use acir_field; pub use acir_field::{AcirField, FieldElement}; diff --git a/acvm-repo/acir/src/native_types/witness_map.rs b/acvm-repo/acir/src/native_types/witness_map.rs index edb6047b5db..fc8b1e0c18d 100644 --- a/acvm-repo/acir/src/native_types/witness_map.rs +++ b/acvm-repo/acir/src/native_types/witness_map.rs @@ -8,11 +8,10 @@ use acir_field::AcirField; use flate2::Compression; use flate2::bufread::GzDecoder; use flate2::bufread::GzEncoder; -use noir_protobuf::ProtoCodec as _; use serde::{Deserialize, Serialize}; use thiserror::Error; -use crate::{native_types::Witness, proto::convert::ProtoSchema}; +use crate::native_types::Witness; #[derive(Debug, Error)] enum SerializationError { @@ -96,18 +95,6 @@ impl Deserialize<'a>> WitnessMap { } } -#[allow(dead_code)] -impl WitnessMap { - pub(crate) fn proto_serialize(&self) -> Vec { - ProtoSchema::::serialize_to_vec(self) - } - - pub(crate) fn proto_deserialize(buf: &[u8]) -> Result { - ProtoSchema::::deserialize_from_vec(buf) - .map_err(|e| SerializationError::Deserialize(e.to_string()).into()) - } -} - impl TryFrom> for Vec { type Error = WitnessMapError; diff --git a/acvm-repo/acir/src/native_types/witness_stack.rs b/acvm-repo/acir/src/native_types/witness_stack.rs index 0e23e640893..e1d0639d1d3 100644 --- a/acvm-repo/acir/src/native_types/witness_stack.rs +++ b/acvm-repo/acir/src/native_types/witness_stack.rs @@ -4,12 +4,9 @@ use acir_field::AcirField; use flate2::Compression; use flate2::bufread::GzDecoder; use flate2::bufread::GzEncoder; -use noir_protobuf::ProtoCodec as _; use serde::{Deserialize, Serialize}; use thiserror::Error; -use crate::proto::convert::ProtoSchema; - use super::WitnessMap; #[derive(Debug, Error)] @@ -87,18 +84,6 @@ impl Deserialize<'a>> WitnessStack { } } -#[allow(dead_code)] -impl WitnessStack { - pub(crate) fn proto_serialize(&self) -> Vec { - ProtoSchema::::serialize_to_vec(self) - } - - pub(crate) fn proto_deserialize(buf: &[u8]) -> Result { - ProtoSchema::::deserialize_from_vec(buf) - .map_err(|e| SerializationError::Deserialize(e.to_string()).into()) - } -} - impl TryFrom<&WitnessStack> for Vec { type Error = WitnessStackError; diff --git a/acvm-repo/acir/src/serialization.rs b/acvm-repo/acir/src/serialization.rs new file mode 100644 index 00000000000..1b5407a4cbb --- /dev/null +++ b/acvm-repo/acir/src/serialization.rs @@ -0,0 +1,221 @@ +//! Serialization formats we consider using for the bytecode and the witness stack. +use crate::proto::convert::ProtoSchema; +use acir_field::AcirField; +use noir_protobuf::ProtoCodec; +use serde::{Deserialize, Serialize}; + +/// Serialize a value using `bincode`, based on `serde`. +/// +/// This format is compact, but provides no backwards compatibility. +pub(crate) fn bincode_serialize(value: &T) -> std::io::Result> { + bincode::serialize(value).map_err(std::io::Error::other) +} + +/// Deserialize a value using `bincode`, based on `serde`. +pub(crate) fn bincode_deserialize Deserialize<'a>>(buf: &[u8]) -> std::io::Result { + bincode::deserialize(buf).map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e)) +} + +/// Serialize a value using MessagePack, based on `serde`. +/// +/// This format is compact can be configured to be backwards compatible. +/// +/// When `compact` is `true`, it serializes structs as tuples, otherwise it writes their field names. +/// Enums are always serialized with their variant names (despite what the library comments say, and it's not configurable). +/// +/// Set `compact` to `true` if we want old readers to fail when a new field is added to a struct, +/// that is, if we think that ignoring a new field could lead to incorrect behavior. +#[allow(dead_code)] +pub(crate) fn msgpack_serialize( + value: &T, + compact: bool, +) -> std::io::Result> { + if compact { + // The default behavior encodes struct fields as + rmp_serde::to_vec(value).map_err(std::io::Error::other) + } else { + // Or this to be able to configure the serialization: + // * `Serializer::with_struct_map` encodes structs with field names instead of positions, which is backwards compatible when new fields are added, or optional fields removed. + // * consider using `Serializer::with_bytes` to force buffers to be compact, or use `serde_bytes` on the field. + // * enums have their name encoded in `Serializer::serialize_newtype_variant`, but originally it was done by index instead + let mut buf = Vec::new(); + let mut ser = rmp_serde::Serializer::new(&mut buf).with_struct_map(); + value.serialize(&mut ser).map_err(std::io::Error::other)?; + Ok(buf) + } +} + +/// Deserialize a value using MessagePack, based on `serde`. +#[allow(dead_code)] +pub(crate) fn msgpack_deserialize Deserialize<'a>>(buf: &[u8]) -> std::io::Result { + rmp_serde::from_slice(buf).map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e)) +} + +/// Serialize a value using `protobuf`. +/// +/// This format is forwards and backwards compatible, but requires code generation based on `.proto` schemas. +#[allow(dead_code)] +pub(crate) fn proto_serialize(value: &T) -> Vec +where + F: AcirField, + R: prost::Message, + ProtoSchema: ProtoCodec, +{ + ProtoSchema::::serialize_to_vec(value) +} + +/// Deserialize a value using `protobuf`. +#[allow(dead_code)] +pub(crate) fn proto_deserialize(buf: &[u8]) -> std::io::Result +where + F: AcirField, + R: prost::Message + Default, + ProtoSchema: ProtoCodec, +{ + ProtoSchema::::deserialize_from_slice(buf) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e)) +} + +#[cfg(test)] +mod tests { + use crate::serialization::{msgpack_deserialize, msgpack_serialize}; + + mod version1 { + use serde::{Deserialize, Serialize}; + + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] + pub(crate) enum Foo { + Case0 { d: u32 }, + Case1 { a: u64, b: bool }, + Case2 { a: i32 }, + Case3 { a: bool }, + Case4 { a: Box }, + Case5 { a: u32, b: Option }, + } + } + + mod version2 { + use serde::{Deserialize, Serialize}; + + // Removed variants and fields + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] + pub(crate) enum Foo { + // removed + // Case0 { .. }, + // unchanged, but position shifted + Case1 { + a: u64, + b: bool, + }, + // new prefix field + Case2 { + b: String, + a: i32, + }, + // new suffix field + Case3 { + a: bool, + b: String, + }, + // reordered, optional removed + Case5 { + a: u32, + }, + // reordered, field renamed + Case4 { + #[serde(rename = "a")] + c: Box, + }, + // new + Case6 { + b: i64, + }, + // new, now more variants than before + Case7 { + c: bool, + }, + } + } + + /// Test that the `msgpack_serialize(compact=false)` is backwards compatible: + /// * removal of an enum variant (e.g. opcode no longer in use) + /// * struct fields added: the old reader ignores new fields, but this could potentially lead to invalid behavior + /// * struct fields reordered: trivial because fields are named + /// * struct fields renamed: this would work with positional encoding, or using `#[serde(rename)]` + #[test] + fn msgpack_serialize_backwards_compatibility() { + let cases = vec![ + (version2::Foo::Case1 { b: true, a: 1 }, version1::Foo::Case1 { b: true, a: 1 }), + (version2::Foo::Case2 { b: "prefix".into(), a: 2 }, version1::Foo::Case2 { a: 2 }), + ( + version2::Foo::Case3 { a: true, b: "suffix".into() }, + version1::Foo::Case3 { a: true }, + ), + ( + version2::Foo::Case4 { c: Box::new(version2::Foo::Case1 { a: 4, b: false }) }, + version1::Foo::Case4 { a: Box::new(version1::Foo::Case1 { a: 4, b: false }) }, + ), + (version2::Foo::Case5 { a: 5 }, version1::Foo::Case5 { a: 5, b: None }), + ]; + + for (i, (v2, v1)) in cases.into_iter().enumerate() { + let bz = msgpack_serialize(&v2, false).unwrap(); + let v = msgpack_deserialize::(&bz) + .unwrap_or_else(|e| panic!("case {i} failed: {e}")); + assert_eq!(v, v1); + } + } + + /// Test that the `msgpack_serialize(compact=true)` is backwards compatible for a subset of the cases: + /// * removal of an enum variant (e.g. opcode no longer in use) + /// * struct fields renamed: accepted because position based + /// * adding unused enum variants + /// + /// And rejects cases which could lead to unintended behavior: + /// * struct fields added: rejected because the number of fields change + /// * struct fields reordered: rejected because fields are position based + #[test] + fn msgpack_serialize_compact_backwards_compatibility() { + let cases = vec![ + (version2::Foo::Case1 { b: true, a: 1 }, version1::Foo::Case1 { b: true, a: 1 }, None), + ( + version2::Foo::Case2 { b: "prefix".into(), a: 2 }, + version1::Foo::Case2 { a: 2 }, + Some("wrong msgpack marker FixStr(6)"), + ), + ( + version2::Foo::Case3 { a: true, b: "suffix".into() }, + version1::Foo::Case3 { a: true }, + Some("array had incorrect length, expected 1"), + ), + ( + version2::Foo::Case4 { c: Box::new(version2::Foo::Case1 { a: 4, b: false }) }, + version1::Foo::Case4 { a: Box::new(version1::Foo::Case1 { a: 4, b: false }) }, + None, + ), + ( + version2::Foo::Case5 { a: 5 }, + version1::Foo::Case5 { a: 5, b: None }, + Some("invalid length 1, expected struct variant Foo::Case5 with 2 elements"), + ), + ]; + + for (i, (v2, v1, ex)) in cases.into_iter().enumerate() { + let bz = msgpack_serialize(&v2, true).unwrap(); + let res = msgpack_deserialize::(&bz); + match (res, ex) { + (Ok(v), None) => { + assert_eq!(v, v1); + } + (Ok(_), Some(ex)) => panic!("case {i} expected to fail with {ex}"), + (Err(e), None) => panic!("case {i} expected to pass; got {e}"), + (Err(e), Some(ex)) => { + let e = e.to_string(); + if !e.contains(ex) { + panic!("case {i} error expected to contain {ex}; got {e}") + } + } + } + } + } +} diff --git a/cspell.json b/cspell.json index 810d3163673..0db77d9ef1c 100644 --- a/cspell.json +++ b/cspell.json @@ -165,6 +165,7 @@ "monomorphizes", "monomorphizing", "montcurve", + "msgpack", "MSRV", "multicall", "nand", diff --git a/utils/protobuf/src/lib.rs b/utils/protobuf/src/lib.rs index c3d72086c83..a0a5760d0e6 100644 --- a/utils/protobuf/src/lib.rs +++ b/utils/protobuf/src/lib.rs @@ -113,7 +113,7 @@ pub trait ProtoCodec { Self::encode(value).encode_to_vec() } /// Deserialize a buffer into protobuf and then decode into the domain type. - fn deserialize_from_vec(buf: &[u8]) -> eyre::Result + fn deserialize_from_slice(buf: &[u8]) -> eyre::Result where R: prost::Message + Default, {