diff --git a/Cargo.lock b/Cargo.lock index 92d9906948..509a30dd67 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2594,10 +2594,10 @@ version = "0.6.0" dependencies = [ "cfg-if", "curve25519-dalek", - "digest 0.9.0", "ed25519-dalek", + "generic-array 0.14.4", "mc-crypto-digestible-derive", - "sha3", + "merlin", "x25519-dalek", ] @@ -2614,10 +2614,18 @@ dependencies = [ name = "mc-crypto-digestible-derive-test" version = "0.6.0" dependencies = [ - "digest 0.9.0", "generic-array 0.14.4", "mc-crypto-digestible", "mc-crypto-digestible-derive", + "mc-crypto-digestible-test-utils", +] + +[[package]] +name = "mc-crypto-digestible-test-utils" +version = "0.6.0" +dependencies = [ + "mc-crypto-digestible", + "serde_json", ] [[package]] @@ -2626,6 +2634,7 @@ version = "0.6.0" dependencies = [ "blake2", "digest 0.9.0", + "mc-crypto-digestible", ] [[package]] @@ -2641,6 +2650,7 @@ dependencies = [ "failure", "hex_fmt", "mc-crypto-digestible", + "mc-crypto-hashes", "mc-util-from-random", "mc-util-repr-bytes", "mc-util-serial", @@ -3265,6 +3275,7 @@ dependencies = [ "mc-common", "mc-crypto-box", "mc-crypto-digestible", + "mc-crypto-digestible-test-utils", "mc-crypto-hashes", "mc-crypto-keys", "mc-crypto-rand", diff --git a/Cargo.toml b/Cargo.toml index b1b0d0e335..7a10c1b119 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ members = [ "consensus/service", "crypto/box", "crypto/digestible", + "crypto/digestible/test-utils", "crypto/digestible/derive/test", "crypto/keys", "crypto/noise", diff --git a/consensus/enclave/impl/src/lib.rs b/consensus/enclave/impl/src/lib.rs index 634da3f676..87d2bc8874 100644 --- a/consensus/enclave/impl/src/lib.rs +++ b/consensus/enclave/impl/src/lib.rs @@ -15,7 +15,7 @@ extern crate alloc; mod identity; use alloc::{collections::BTreeSet, format, string::String, vec::Vec}; -use core::convert::{TryFrom, TryInto}; +use core::convert::TryFrom; use identity::Ed25519Identity; use mc_account_keys::PublicAddress; use mc_attest_core::{ @@ -35,8 +35,7 @@ use mc_consensus_enclave_api::{ WellFormedEncryptedTx, WellFormedTxContext, }; use mc_crypto_ake_enclave::AkeEnclaveState; -use mc_crypto_digestible::Digestible; -use mc_crypto_hashes::Blake2b256; +use mc_crypto_digestible::{DigestTranscript, Digestible, MerlinTranscript}; use mc_crypto_keys::{Ed25519Pair, Ed25519Public, RistrettoPrivate, RistrettoPublic, X25519Public}; use mc_crypto_message_cipher::{AesMessageCipher, MessageCipher}; use mc_crypto_rand::McRng; @@ -462,16 +461,15 @@ impl ConsensusEnclave for SgxConsensusEnclave { // Create an aggregate fee output. let fee_tx_private_key = { - let hash_value: [u8; 32] = { - let mut hasher = Blake2b256::new(); - FEES_OUTPUT_PRIVATE_KEY_DOMAIN_TAG.digest(&mut hasher); - parent_block.id.digest(&mut hasher); - transactions.digest(&mut hasher); - hasher - .result() - .as_slice() - .try_into() - .expect("Wrong length.") + let mut hash_value = [0u8; 32]; + { + let mut transcript = + MerlinTranscript::new(FEES_OUTPUT_PRIVATE_KEY_DOMAIN_TAG.as_bytes()); + parent_block + .id + .append_to_transcript(b"parent_block_id", &mut transcript); + transactions.append_to_transcript(b"transactions", &mut transcript); + transcript.extract_digest(&mut hash_value); }; // This private key is generated from the hash of all transactions in this block. diff --git a/consensus/enclave/trusted/Cargo.lock b/consensus/enclave/trusted/Cargo.lock index c4f675caac..d2c5f22a90 100644 --- a/consensus/enclave/trusted/Cargo.lock +++ b/consensus/enclave/trusted/Cargo.lock @@ -817,9 +817,10 @@ version = "0.6.0" dependencies = [ "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", "curve25519-dalek 3.0.0 (registry+https://github.com/rust-lang/crates.io-index)", - "digest 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", "ed25519-dalek 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "generic-array 0.14.4 (registry+https://github.com/rust-lang/crates.io-index)", "mc-crypto-digestible-derive 0.6.0", + "merlin 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "x25519-dalek 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -838,6 +839,7 @@ version = "0.6.0" dependencies = [ "blake2 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", "digest 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", + "mc-crypto-digestible 0.6.0", ] [[package]] diff --git a/consensus/service/src/consensus_service.rs b/consensus/service/src/consensus_service.rs index 7b68be8671..066c4e6ee7 100644 --- a/consensus/service/src/consensus_service.rs +++ b/consensus/service/src/consensus_service.rs @@ -607,7 +607,7 @@ impl ConsensusSer block_height = Some(b); latest_block_hash = ledger_db .get_block(b - 1) - .map(|x| format!("{:X}", x.id.0)) + .map(|x| format!("{:X?}", x.id.0)) .map_err(|e| log::error!(logger, "Error getting block {} {:?}", b - 1, e)) .ok(); latest_block_timestamp = ledger_db diff --git a/crypto/digestible/Cargo.toml b/crypto/digestible/Cargo.toml index 00356d696a..812756e2d7 100644 --- a/crypto/digestible/Cargo.toml +++ b/crypto/digestible/Cargo.toml @@ -6,13 +6,15 @@ edition = "2018" [dependencies] cfg-if = "0.1" -digest = { version = "0.9", default-features = false } +merlin = { version = "2.0", default-features = false } +generic-array = { version = "0.14", default-features = false } # For derive support mc-crypto-digestible-derive = { path = "./derive", optional = true } # Built-in support for dalek primitives -ed25519-dalek = { version = "1.0.0", default-features = false, optional = true } +# ed25519-dalek doesn't build without rand feature +ed25519-dalek = { version = "1.0.0", default-features = false, features = ["rand"], optional = true } x25519-dalek = { version = "1.0.1", default-features = false, optional = true } [target.'cfg(any(target_feature = "avx2", target_feature = "avx"))'.dependencies] @@ -21,11 +23,8 @@ curve25519-dalek = { version = "3.0", default-features = false, features = ["sim [target.'cfg(not(any(target_feature = "avx2", target_feature = "avx")))'.dependencies] curve25519-dalek = { version = "3.0", default-features = false, features = ["nightly", "u64_backend"], optional = true } -[dev-dependencies] -sha3 = "0.9" - [features] -default=["alloc", "derive"] +default=["alloc", "derive", "dalek"] # Enables support for types in alloc crate alloc=[] # Enables re-export of derive(Digestible) macro diff --git a/crypto/digestible/README.md b/crypto/digestible/README.md index 0291accf0c..90350e245e 100644 --- a/crypto/digestible/README.md +++ b/crypto/digestible/README.md @@ -1,175 +1,497 @@ mc-crypto-digestible ========== -NOTE: This crate is WIP, use at your own risk! - `mc-crypto-digestible` and its companion crate `mc-crypto-digestible-derive`, -represent a scheme for secure (nonmalleable) hashing of common rust objects using -a collision-resistant hash function. This represents critical infrastructure for +represent a scheme for secure (nonmalleable) hashing of structured data, using +protocol transcript objects like Merlin. + +This represents critical infrastructure for blockchain projects, because if e.g. an attacker can find two different blocks with the same hash, they can subvert the integrity of the blockchain. -The `Digestible` trait is provided, which allows that the contents of the entire -object can be hashed together in a non-malleable way, after an implementation of -`Digest` trait is provided. - -The scheme implicitly defines an "encoding" whereby your object is turned into a -byte sequence which is fed into the hasher. - -This has a few benefits: -- The encoding is stable and canonical, not depending on implementation details of - a serialization library, which typically do not provide byte-for-byte stability - guarantees. -- Bringing the bytes directly to the hasher as they are produced is faster than - marshalling them to a temporary buffer and then hashing the buffer. -- Digestible trait is not rooted in serde -- since many types implement serde traits - without concern for cryptographic issues, basing digestible trait on serde risks - creating problems. Secure hashing really is a different issue from serialization, - close though they may seem. - -Typically, serialization libraries offer a stable wire format, and then -progressively try to improve the efficiency of serialization and deserialization -over time, without breaking the wire format. This generally means that the byte -representation is not canonical, which makes this a bad way to hash objects. - -Overview --------- - -To achieve its goals, `mc-crypto-digestible` must specify an encoding for any type which -you derive `Digestible` on. - -Ultimately, the engineering requirement is to show that the encoding function, -as it acts on objects of any particular type, is a "faithful" encoding -- that is, -no two objects correspond to the same bytes. If this is true, then under the assumption -that your `Digest` algo is second pre-image resistant, it is also hard to find two -instances of any structure with the same hash. - -Our strategy is to work "inductively" over the structure of your types. - -- We take as our correctness property called "prefix-free". Prefix-free is stronger - than saying that an encoding is one-to-one / faithful, so if we have this for - all types that we implement Digestible for, then we have achieved our goal. - For a good overview of prefix codes, see wikipedia: https://en.wikipedia.org/wiki/Prefix_code -- We implement `Digestible` for primitive types in a way that accomplishes this. - This is generally easy because most primitive types of interest have fixed length encodings, - which are trivially prefix-free. -- For "compound" types like structures, enums, sequences, etc. we appeal to one of several - abstract rules specifying how a prefix-free encoding can be built assuming that the children - have prefix-free encodings. - - These rules will be explained in detail in a separate document, but roughly, we think - of each compound type as either a "product type" or a "sum type" and apply the corresponding - rule. - - This actual mapping is done either by generic implementations of `trait Digestible` e.g. - for slices or `Vec`, or it is done in the proc-macro logic in `mc-crypto-digestible-derive`, - e.g. for `struct` and `enum`. - -Roughly, the five categories that everything gets interpretted as, in this analysis, are: +This approach is based on two ideas: +- Use a trait, and define the hashing strategy on a per-type basis. + This is the `Digestible` trait provided by this crate. +- Use code-gen to generate correct implementations of the trait, to reduce the + amount of manually-written security critical code to a minimum. + This is the proc-macro offered by the `mc-crypto-digestible-derive` crate. + +Engineering goals: +- The hash must be stable and canonical, not depending on implementation details + of e.g. a serialization library, which typically do not provide byte-for-byte stability. +- The hash should be portable. It should be straightforward for an engineer with a schema + for our blockchain and to write a C program, or a python program that loads our blockchain and + computes the hash successfully. (They might have to call out to the C implementation of Merlin.) + The hash should certainly not be dependent on details of the Rust programming language. +- The hash scheme should support protobuf-style *schema evolution*. This means, + it should be possible to add a new optional field to a transaction-related structure, + without changing the hash of transactions that don't have the field. This gives us a way + to add new features without breaking ledger compatibility. (We use the rust core `Option` type + for this, similarly to how `prost` uses `Option` for optional proto members.) + Similarly, it should be possible to add `repeated` elements to structs without breaking the hash. + (We use the rust core `Vec` type for this, similarly to how `prost` uses `Vec` for repeated proto members.) + Simliarly, it must be possible to add new types to enums without changing the hashes of + the old enum values. + +Comparison with other approaches +-------------------------------- + +Traditionally, blockchains are based directly on a cryptographic hash function, +such as SHA256-d in the case of bitcoin. This hash function takes only bytes. +A canonical method for marshalling a block into bytes-on-the-wire before the hash, +is defined in an RFC, and reviewed by cryptographers. Usually this spec explicitly +mentions every datatype in the blockchain and how to handle it. Then, this spec must +be implemented by engineers. + +Because implementing such a spec involves significant labor, and requires +manual changes whenever members are added or removed to the schema, this creates +a lot of friction for development. + +In our approach, we have tried to create the spec with the idea that it will be +easily implemented using rust proc macros that won't have to change as the schema +evolves. This makes it a lot easier for non-cryptographers to make significant +changes to the blockchain and transaction data-structures, without creating +tech debt / security problems, and generally eases development. + +To achieve this goal, we have to create a simple schema to which all blockchain data structures +are mapped. This schema is language agnostic. + +There are 4 types of elements in the schema, which we will discuss later: + - Primitives -- Aggregates (structs, tuples, fixed-length arrays) -- Sequences (variable length arrays, strings) -- Variant (including `Option`, `enum`) -- "Custom primitives" (generally means external types with canonical fixed-width representations, - e.g. curve25519-dalek curvepoints and scalars.) - -(It's possible to extend this to include something like e.g. protobuf map types, but we haven't -implemented it and won't describe it here.) - -If one applies this strategy naively and considers the results, it turns out that it corresponds -roughly to a "canonical" version of bincode, and is thus very efficient, adding very little "fluff" -to the encoding. This also makes it very believable that no two objects *of exactly the same type* -have the same encoding unless they are semantically equal, since bincode can actually be -deserialized, which is another way to demonstrate that the encoding is faithful. -https://docs.rs/bincode/1.2.1/bincode/ - -However, bincode would not normally be considered a suitable encoding for non-malleable hashing. -It's more common to use "self-describing" data-formats based on ASN.1 in cryptographic contexts, -where the serialized data essentially carries a schema that could be used to interpret it. The -purpose of this is to try to ensure that objects that have different schema are guaranteed to have -different hashes, and get the "semantics" of the data into the hash. - -This is sometimes called the "Horton Principle": https://en.wikipedia.org/wiki/Horton_Principle - -In the DER encoding rules, a strict Type-Length-Value protocol is used when encoding structures. -Types are mapped in some standardized way to a "type code", typically a fixed small number of bytes, -and this becomes part of the ASN.1 module specification. A struct is treated as a "group" and a -specific protocol is used for opening and closing a group, and listing the TLV bytes for its -members consecutively. - -Creating type codes on a per-type basis generally has to be done manually, and so creates a maintanence -burden. There is very little tooling in rust (or indeed, most programming language ecosystems) to support -this. - -One useful insight is that while DER is required to support deserialization, and so minimizing size-on-the-wire -is of critical interest, in the case when the encoding is only being made in order to hash the structure, -size on the wire is much less important. Modern hash functions are generally much faster on a per-byte -basis than elliptic curve operations. In our context, hashing transactions, hashing blocks, etc. is generally -not a performance bottleneck if done reasonably efficiently -- transaction validation is. So it's not -extremely important here to get an optimal or even near-optimal encoded representation in terms of the rate, -or number of bytes on the wire. It's much more important to get a non-malleable encoding. - -In many modern crypto libraries based on the `dalek-cryptography` ecosystem, the merlin library is used to -generate hashes of the cryptographic transcript to use as challenges, when employing the Fiat-Shamir heuristic. -- https://merlin.cool/use/protocol.html -This means roughly that the "contents to be hashed" are visited and mapped to a STROBE `AD` operation: - -``` -AD[label || LE32(message.len())](message); -``` -- https://merlin.cool/transcript/ops.html#appending-messages - -Inspecting the actual source code shows that at the STROBE layer, this actually looks roughly like -``` -strobe.meta_AD(label); -strobe.meta_AD(LE32(...)); -strobe.AD(message); -``` - -This has some of the characteristics of a type-length-value encoding, in that the "label" -is often playing the role of the data-type descriptor. However, here the label is not escaped using -length encoding, even though it is a user-provided variable-length string, nor is there any standardized list -of labels to be synchronized across applications. So, none of these strategies is really producing a -"distinguished encoding", and it's possible that other protocols with pathologically chosen labels and -pathologically chosen values could happen to have merlin transcript hashes that collide with theirs. - -Rather, the idea is that as long as the labels are "descriptive" and hence unlikely to collide by chance -with labels from another application, and all of the bytes in the actual application in question which -are potentially controlled by an adversary are properly framed, this represents "sound domain separation". -As long as the encodings of each actual message object are canonical, the overall protocol hash will be -canonical (and non-malleable), and so it should be hard for an adversary to trick the user's programs -by finding different values that those programs think are the same due to the hashes. - -At present revision, `digestible` incorporates this idea in the following way: -- The naive "canonical bincode"-like strategy is extended to incorporate fixed labels. - It is easy to see that adding fixed string constants as labels does not impact the prefix-free property, - so this can do no harm. -- Fixed labels are used whenever a `struct` is encoded: The rust name of the structure is the outer label. - Every struct member is also prefixed with a label corresponding to the member name. -- Digestible always incorporates proper framing: fixed-size objects are generally not framed, - but anything not known statically to have a fixed size is framed, at any layer of the hierarchy where - that is the case. - -This domain separation scheme is not perfect -- it is not as good as e.g. hashing a DER encoding. -There are several straightforward ways to improve it, at the cost of somewhat increased complexity. -At the same time, it seems not much different from the ideas around domain separation in Bulletproofs and -Schnorrkel, which are critical dependencies of ours. It also seems difficult to imagine a realistic way to -create a collision that would impact the application. - -We would like to improve this over time so that a more rigorous statement can be made -- we would like to -provably prevent collisions in the encoding even when the structure types are distinct in an appropriate sense, -and it's not clear that that is provable at this revision. It merely seems likely. - -It would also be of interest to integrate with merlin, if only to support the `dalek-cryptography` ecosystem -which is building up around it. Structure hashing is full of pitfalls and yet it is a common need. - -`Digestible` is potentially valuable because it provides a principled, systematic approach to the problem, seems comparable -to other practical solutions, and is easy to use especially by non-cryptographers, who can easily modify -structures and make new ones which derive digestible, and don't have to try to -determine appropriate domain separation labels, or figure out when to insert framing, manually. -Nevertheless it is WIP and the present revision is not going to be the ending state of the project. +- Aggregates +- Sequences +- Variants + +The origin of these four types is, the different cases in which we had to add some kind of padding +or framing, in order to prove the security of early drafts of the system. +Primitives are type-length-value encodings of fixed sized elements with +canoncial representations as bytes. The other three are various kinds of compound datatypes. + +To implement `Digestible` on a type correctly, first we have to choose which of +these things to interpret it as. Usually, there is only one possibility. Sometimes, it could +have multiple interpretations, but there is usually only one good choice. +The trait implementations and proc macros that come with the crate are meant to do the right thing. + +The spec then explains how exactly we should hash any particular AST following this schema. If the +type is a complex type, then this will be specified recursively in terms of how +we hash its constituent elements. This brings us to the next point. + +Use of merlin instead of a cryptographic hash function +-------------------------------------------------- + +Traditionally, blockchain hashing is based on a cryptographic hash function, +which is assumed to be "collision resistant". +This means that it is infeasible for an adversary to find two +strings `x`, `y`, such that `SHA256(x) == SHA256(y)`. + +Ultimately, the security of the blockchain hashing strategy must reduce to this +property. If two blocks are not logically equal, then they must not marshall down to the +same bytes before being fed into SHA256. As long as this is the case, then it is +infeasible for the adversary to find two different logical block values with the same hash. + +One of the most difficult things in such marshalling schemes is the requirement to +systematically apply domain separation and framing to prevent ambiguity from arising. +It is difficult to create a test that this has been done correctly. + +In the `dalek-cryptography` ecosystem, `merlin` has emerged as an alternative approach +to these kinds of domain-separation issues when hashing complex objects. + +A *merlin transcript* is superficially like a `Digest` object from the `digest` crate. +It represents a stateful cryptographic primitive of fixed size. Just as bytes can be +fed as "input" to a `Digest`, bytes can be appended to a merlin transcript, which is combined +irreversibly with the current state, using the Keccak primitive. + +A `Digest` object generally only produces output when it is finalized. A merlin transcript +in principle can produce output ("challenge bytes") at any time. This is mainly useful +when attempting to perform the Fiat-Shamir transform to create Zero-Knowledge proofs. + +Whereas there are many possible cryptographic hash functions that implement `Digest`, +and a hash function is fundamentally a function on byte sequences, a Merlin transcript is not. +Merlin requires the use of context strings whenever bytes are added, and automatically +puts "framing" around the byte string, by prepending the length as a little-endian 4 byte number. +Appending "abcd" to a Merlin transcript is never the same as appending "ab" and then appending "cd", +no matter what context strings are used. + +The notion that we recursively walk the AST when computing the digest of a structure, +is referred to more generally as "protocol composition" in the merlin documentation -- the idea +that a larger protocol can define its transcript by recursviely including the transcripts of sub protocols. + +`merlin` is also well-integrated with the `schnorrkel` signature crate. Instead of signing +hashes of messages, `Schnorrkel` can consume a merlin transcript and produce a signature of +that without again hashing it, which is arguably simpler overall and more efficient. + +Security assumptions around merlin +---------------------------------- + +The digestible crate requires only the following security assumption around merlin transcripts: + +For any merlin transcript (in any particular internal state), it is infeasible to find +two distinct sequences of `append_message` calls such that a final call to `challenge_bytes` +(producing at least 32 challenge bytes, with a particular context string), yields +the same challenge bytes. + +We note that +(1) This assumption underlies the use of Merlin for the Fiat-Shamir transform. If this +property doesn't hold, then likely creates a source of malleability in any zero-knoweldge proof schemes based on it. +That is, Merlin was specifically designed to do something stronger than this. +(2) A primitive that does this can be built from any collision resistant hash function. +For instance, if we assume that SHA3 is collision resistant, then a valid implementation +of `DigestTranscript` would be: +- Implement `append_bytes(context, message)` by + - Encoding context and message using any particular prefix-free encoding of the set of all pairs of byte strings, + - Inputting this result into the Sha3 digest object. +- Implement `extract_digest` by finalizing the digest object. + +Since there are primitives that do this under reasonable assumptions, it is not unreasonable +to assume that Merlin does this. Ultimately Merlin is itself based on the STROBE protocol framework, +which is in turn based on Keccak. If SHA3 has this property at all it seems very likely that merlin +does as well. + +In fact, there are certain cases when we WANT to be able to use a traditional digest function +with the `digestible` crate. For instance, if we need to create an ed25519ph signature, then the +API requires us to provide a SHA512 hasher to which the message has already been marshalled. If you need to do this, +you can use the `digestible` trait with the `PseudoMerlin` object in `mc-crypto-hashes`. `PseudoMerlin` +carefully emulates the API of merlin for appending bytes, on top of an arbitrary cryptographic hash function. +If the chosen hash function is strongly collision resistant in the classical sense, then `PseudoMerlin` is suitable +for use with the `Digestible` crate to create non-malleable hashes. + +It is recommended not to use `PseudoMerlin`, and to prefer `Merlin` unless something compels you +to use `PseudoMerlin`. + +Specification: +=============== + +First, we must describe the AST which represents what goes into the digest. + +- A primitive is a "simple" type (as opposed to a compound type that has a canonical representation as bytes. +- A sequence is a variable length sequence of values of some other type. A sequence has a length known at runtime. +- An aggregate is a fixed-length sequence of values ("fields"), of different types. Each field has a field name. +- A variant is a single value which may be one of several different types. Each possibility has an associated name, in the context of this variant. In the sequel we call this the "variant possibility name". + +For each AST node, there is a protocol for adding it to the digest transcript. +(For comparison, see [Merlin transcript protocols](https://merlin.cool/use/protocol.html), or [ASN.1](https://en.wikipedia.org/wiki/ASN.1).) + +Recall that the fundamental operation of Merlin is `append_message` which takes a `&'static [u8]` context string, which should generally be a string literal, +and a `&[u8]` data value. + +In our protocol, *whenever* an AST node is appended to the transcript, _a context string must be supplied by the caller_. +At the root node of the AST, the user supplies this. When the root node is a compound node, the protocol specifies the context strings +when appending its children. + +A *primitive* with given typename is appended to the Merlin transcript as follows: -FIXME: Find and mention the bitcoin and cryptonote and merkle tree examples re: framing and domain separation +``` +impl DigestTrancript { + fn append_primitive(&mut self, context: &[u8], typename: &[u8], data: impl AsRef<[u8]>) { + self.append_bytes(context, "prim"); + self.append_bytes(typename, data); + } +} + +impl Digestible for u32 { + fn append_to_transcript(&self, context: &[u8], transcript: &mut impl DigestTranscript) { + transcript.append_primitive(context, b"uint", &self.to_le_bytes()) + } +} +``` + +An *aggregate* is appended to the Merlin transcript by first appending an aggregate header (which includes the typename), +then appending each field, then appending an aggregate closer. When appending each field, the _field name is used as the context string_. + +``` +impl DigestTrancript { + fn append_agg_header(&mut self, context: &[u8], type_name: &[u8]) { + self.append_bytes(context, "agg"); + self.append_bytes("name", type_name); + } + fn append_agg_closer(&mut self, context: &[u8], type_name: &[u8]) { + self.append_bytes(context, "agg-end"); + self.append_bytes("name", type_name); + } +} + +impl Digestible for MyAggregate { + fn append_to_transcript(&self, context: &[u8], transcript: &mut DigestTranscript) { + transcript.append_agg_header(context, "%aggregate_type_name"); + self.%field0.append_to_transcript("%field0", transcript); + self.%field1.append_to_transcript("%field1", transcript); + ... + transcript.append_agg_closer(context, "%aggregate_type_name"); + } +} +``` + +The choice to use an explicit closer, rather than encoding the +number of members in the aggregate, permits *schema evolution* by the addition of new optional members, without changing the hash +of old objects. + +A *sequence* is appended to the Merlin transcript by first appending a sequence header (which includes the length), +and then appending each sequence member. When appending each member, _the empty string is used as the context string_. + +``` +impl DigestTrancript { + fn append_seq_header(&mut self, context: &[u8], len: usize) { + self.append_bytes(context, "seq"); + self.append_bytes("len", (len as u64).to_le_bytes()); + } +} + +impl Digestible for MySequence { + fn append_to_transcript(&self, context: &[u8], transcript: &mut impl DigestTranscript) { + if !self.is_empty() { + transcript.append_seq_header(context, self.len()); + for elem in self.iter() { + elem.append_to_transcript(b"", transcript); + } + } else { + transcript.append_none(context); + } + } +} +``` + +A *none* is a special primitive used in a few corner cases. It can be thought of as a special kind of primitive. +Its role is to help with support for schema evolution. Types like empty sequences and empty optional's can map to None +in the cases when it isn't possible to completely omit them from the hash. + +``` +impl DigestTrnacript { + fn append_none(&mut self, context: &[u8]) { + self.append_bytes(context, ""); + } +} +``` + +A *variant* is appended to the Merlin transcript by first appending a variant header. Then, the value of the variant is appended to the transcript, and the _variant possibility name is used as the context string_. + +The possibilities of a variant are each assigned a distinct number. A runtime value of the variant type has a number called the *discriminant* which indicates +which possibility is present. + +``` +impl DigestTranscript { + fn append_var_header(&mut self, context: &[u8], type_ident &[u8], which: u32) { + self.append_bytes(context, "var"); + self.append_bytes("name", type_name); + self.append_bytes("which", which.to_le_bytes()); + } +} + +impl Digestible for MyVariant { + fn append_to_transcript(&self, context: &[u8], transcript: &mut impl DigestTranscript) { + match self { + ... + Self::%variant_possibility_name(val) => { + transcript.append_var_header(context, "%variant_name", %discriminant); + val.append_to_transcript("%variant_possibility_name", transcript); + }, + ... + } + } +``` + +Accessing the discriminant is different in different contexts -- in protobuf `.case()` is often +the API for getting this number. In C++ `boost::variant` and similar libraries, `.which()` is used. Stable rust does not expose an API for +getting this number directly, rust considers it an unspecified implementation detail for now. When `derive(Digestible)` is used with a +rust enum, the generated code obtains this value, by using the declaration order of the enumerators. + +Correctness: +------------ + +The correctness of the protocol means that, for any two distinct ASTs, the two corresponding sequences of `append_bytes` calls to Merlin +are different. With this property in hand, we can be sure that two distinct ASTs have different hashes, assuming the collision resitance property of Merlin. + +It is beyond the scope of this README to establish this property formally, but we refer the reader to a separate document which will establish this (TODO). +The main idea is to show that the map from possible ASTs to possible sequences of `(context, data)` pairs is a prefix-free map. +We prove this by induction on the structure of the AST. + +Implementation notes and examples: +================================== + +Primitives: +----------- + +For this discussion, a *primitive* is a type which *has a canonical, portable representation as bytes*. + +`Digestible` is implemented in this crate for *built-in integer types*, *byte slices and arrays*, and *strings*. + +For built-in integer types, we specify the use of little-endian byte encoding, as merlin uses internally +for the encoding of buffer lengths. The type signifier is `"uint"` for unsigned and `"int"` for signed. +For integer types like `size_t` which have different possible sizes on different platforms, we specify that +they should be converted to 64-bit integers and then encoded, for portability. +For `bool`, the type signifier is `"bool"` and the data is `[0u8]` in case of `false` and `[1u8]` in case of `true`. + +For buffers of bytes e.g. `Vec` the bytes themselves are the canonical representation. The type signifier is `"bytes"`. + +For a UTF-8 string, the canonical byte representation is used. The type signifier is `"str"`. + +For curve25519-scalars, the canonical byte representation is used, and the type signifier is `"scalar"`. +For Ristretto curve points, the canonical byte representation is used, and the type signifier is `"ristretto"`. +For ed25519 curve points, the canonical byte representation is used, and the type signifier is `"ed25519"`. +For x25519 curve points, the canonical byte representation is used, and the type signifier is `"x25519"`. + +You can add custom primitives by implementing `Digestible` and making `append_to_transcript` call `append_primitive`. +You should choose a new type signifier if appropriate, and the data must be a portable, canonical representation of the +value as bytes. + +Sequences: +---------- + +For this discussion, a *sequence* is a type representing a variable length sequence of elements +of a type which is digestible. + +In rust sequences are iterable. +In protobuf sequences are usually represented using the `repeated` modifier. + +In this crate, rust slices, and `Vec` are mapped to `seq` AST nodes. + +For ordered sets, we specify that the ordered set should be treated as a `seq` AST node, +and the elements visited in increasing order. + +In this crate, we implement `Digestible` for `BTreeSet` in this way. +It would be acceptable to implement `Digestible` for `BTreeMap` as well, thinking +of the BTreeMap as an ordered sequence of pairs, and mapping it to `seq` AST node. + +Note that byte sequences are NOT treated as `seq` AST nodes, they are treated as primitives, +which significantly improves efficiency. + +Aggregates: +----------- + +For this discussion, an *aggregate* is a type consisting of a fixed sequence of members, +possibly of different types, which are themselves `digestible`. + +An aggregate has a name (identifier for the type in source code). +The members have associated identifiers (identifier for the member in source code). + +In Rust, a struct or tuple is typically an aggregate. +In protobuf, a message is an aggregate. +In type theory is this is sometimes called a product type. + +For an aggregate, we specify that `append_to_transcript` shall be implemented as: + +``` + fn append_to_transcript(&self, context: &[u8], transcript: &mut Transcript) { + transcript.append_agg_header(context, "%aggregate_type_name"); + self.%field0.append_to_transcript("%field0", transcript); + self.%field1.append_to_transcript("%field1", transcript); + ... + transcript.append_agg_closer(context, "%aggregate_type_name"); + } +``` + +For rust tuples, we treat the index of the element in the tuple as its field name, counting from 0, +and the aggregate type name should be the stringification of the tokens representing the type. + +Variants: +--------- + +For this discussion a *variant* is a type whose values are values from one-of a fixed number of other types, +which are themselves `digestible`. + +In rust, `enums` are `variant` types. +In protobuf, `OneOf` types are `variants`. +In type theory is this is sometimes called a sum type. + +A variant type has a name, and each possibility for the variant also has a name. + +A variant value has a `discriminant` which is an integer indicating which of the possibilities +is present. Often there is a function `.which()` which obtains this number. + +For a variant, we specify that `append_to_transcript` shall be implemented as: + +``` + fn append_to_transcript(&self, context: &[u8], transcript: &mut Transcript) { + match self { + ... + Self::%variant_possibility_name(val) => { + transcript.append_bytes(context, "var"); + transcript.append_bytes("name", "%variant_name"); + (self.which() as u64).append_to_transcript("which", transcript); + val.append_to_transcript("%variant_possibility_name", transcript); + }, + ... + } + } +``` + +In rust, an `enum` may have no associated data. (In documentation they call this a unit variant). +In this case, `val.append_to_transcript("%variant_possibility_name", transcript)` should simplify to +``` +transcript.append_bytes("%variant_possbility_name", ""); +``` + +In rust, a definition of an `enum` may implicitly declare anonymous structs and tuples associated +to an enumerator. In this case, we follow the rules for an aggregate when appending +the anonymous struct to the transcript, and use the empty string for its name. + +In rust, an enum where every variant has no associated data can be tagged with e.g. `repr(u32)` +and interpretted directly as a `u32`, as in C enums. An implementor may reasonably decide +to implement `digestible` for such an enum by converting to a `u32` and then appending that +as a primitive. In this case `derive(Digestible)` should not be used, and this choice should be documented +to allow cross-language implementations to do the same. + +Examples: +... + + + +Schema Evolution: +================= + +One of the main goals of this scheme is to support *schema evolution*, which means +that just as with protobuf, we can add fields to our structures without breaking +compatibillity. In this case, this means *ledger-compatibility* -- we would like to +be able to add new fields to e.g. the `TxOut` structure or the `BlockContents` structure +without changing the hashes of transactions or old blocks that don't have the new fields. + +The main ideas that we have to support this are: + +- Rust `core::option::Option` type is treated specially -- it *does not* get mapped +to a `var` AST node, as rust enums with `derive(Digestible)` do. Instead, when +an `Option` is visited, we append nothing if the value is `None`, and simply append the +value when the option is `Some`. So it is "transparent" from the point of view of the +digestible AST. + +- The `agg` AST node *does not* include the number of fields as part of the digest. + Instead, as many fields as needed are appended, and then there is a `closer` that + is appended to the transcript. + +Together this means that: +- New `Option` fields may be added to existing structures without breaking ledger compatibility. +- Old fields that were not `Option` may be made optional without breaking ledger compatibility. + +Similarly, `Vec` is treated specially -- when `Vec` is a member of a struct and is empty, we treat +it the same as we would an empty `Option`, and append nothing. This is analogous to how new +`repeated` elements may be added to protobufs without breaking compatibility. + +Note that struct fields may not be re-ordered or renamed. + +As a compatibility tool, we allow a proc-macro attribute to change the name of a struct or enum, +for purpose of hashing. + +For example, this might look like + +``` +#[derive(Digestible)] +#[digestible(name = "LedgerType")] +pub struct LegacyLedgerType { + field1: Foo, + field2: Option, +} +``` + +This would cause the digestible proc-macro to use `LedgerType` as the name of the structure +for purposes of appending it to the transcript. + +Additionally, rust enum's are another point of extensibility. +New enum possibilities may be added to an existing rust enum without breaking the hashes +for the other possibilities. Note that enum names cannot be changed and old enums cannot be +removed. The index of the enum possibility within the list does become part of the hash. References ---------- +FIXME: Find and mention the bitcoin and cryptonote and merkle tree examples re: framing and domain separation + TODO diff --git a/crypto/digestible/derive/README.md b/crypto/digestible/derive/README.md index 458f024537..cda3babc38 100644 --- a/crypto/digestible/derive/README.md +++ b/crypto/digestible/derive/README.md @@ -1,9 +1,9 @@ mc-crypto-digestible-derive ================= -This proc macro crate allows the use of `derive(Digestible)` in user structs. +This proc macro crate allows the use of `derive(Digestible)` in user structs and enums. -The intended code gen is: +The intended code-gen for a struct is: ``` #[derive(Digestible)] @@ -18,17 +18,99 @@ expands to something like: ``` impl Digestible for Foo { - fn digest(&self, hasher: &mut D) { - hasher.input(b"a"); - self.a.digest(hasher); - hasher.input(b"b"); - self.b.digest(hasher); - hasher.input(b"c"); - self.c.digest(hasher); + fn append_to_transcript(&self, context: &'static [u8], transcript: &mut DT) { + transcript.append_agg_header(context, "Foo".as_bytes()); + self.a.append_to_transcript_allow_omit("a".as_bytes(), transcript); + self.b.append_to_transcript_allow_omit("b".as_bytes(), transcript); + self.c.append_to_transcript_allow_omit("c".as_bytes(), transcript); + transcript.append_agg_closer(context, "Foo".as_bytes()); } } ``` +The intended code-gen for an enum is: + +``` +#[derive(Digestible)] +struct Foo { + a: A, + b: B, + c: C +} +``` + +expands to something like: + +``` +impl Digestible for Foo { + fn append_to_transcript(&self, context: &'static [u8], transcript: &mut DT) { + transcript.append_agg_header(context, "Foo".as_bytes()); + self.a.append_to_transcript_allow_omit("a".as_bytes(), transcript); + self.b.append_to_transcript_allow_omit("b".as_bytes(), transcript); + self.c.append_to_transcript_allow_omit("c".as_bytes(), transcript); + transcript.append_agg_closer(context, "Foo".as_bytes()); + } +} +``` + + +Configuration +------------- + +`derive(Digestible)` can be configured by adding an attribute to the struct or enum, +of the form `#[digestible(...)]` + +`#[digestible(transparent)]` can be used with any struct that contains exactly one member. +As `#[repr(transparent)]` in rust, this enables the use of newtype wrappers without impacting +the way that the value is treated by `digestible`. + +``` +#[derive(Digestible)] +#[digestible(transparent)] +struct Foo(A) +``` + +expands to something like: + +``` +impl Digestible for Foo { + fn append_to_transcript(&self, context: &'static [u8], transcript: &mut DT) { + self.0.append_to_transcript(context, transcript); + } + fn append_to_transcript_allow_omit(&self, context: &'static [u8], transcript: &mut DT) { + self.0.append_to_transcript_allow_omit(context, transcript); + } +} +``` + +`#[digestible(name = "new_name")]` can be used to make the type identifier used for hashing different +from the actual rust identifier for the struct. This may be useful if we need to have two "versions" of +a struct that hash in the same way. + +``` +#[derive(Digestible)] +#[digestible(name = "Foo")] +struct FooV2 { + a: A, + b: B, + c: C +} +``` + +expands to the same codegen as we had for `struct Foo` earlier. + +Future improvements +------------------- + +Patches that would welcome include: +- Allow to rename individual fields in a struct, using `#[digestible(...)]` attribute +- Allow to unconditionally skip individual fields in a struct from the digest. + This may be useful if e.g. you want to add a time-stamp to some record but you don't + want it to become a part of the hash. + +Implementation notes +-------------------- + For comparison, the `derive(Hash)` stuff is implemented in `libsyntax_ext` in `rust/rust`, however, that is implemented directly in the compiler and not in a proc_macro or even in libcore, -so we can't use the same code. Instead this is based most directly on the `prost-derive` crate. +so we can't use the same code. Instead, the `derive(Digestible)` proc-macro code is based most directly on the `prost-derive` crate. diff --git a/crypto/digestible/derive/src/lib.rs b/crypto/digestible/derive/src/lib.rs index 59272e18f1..2233989c34 100644 --- a/crypto/digestible/derive/src/lib.rs +++ b/crypto/digestible/derive/src/lib.rs @@ -8,30 +8,148 @@ extern crate proc_macro; use quote::{format_ident, quote}; +use core::convert::TryFrom; use proc_macro::TokenStream; +use proc_macro2::Span; use syn::{ - Data, DataEnum, DataStruct, DeriveInput, Fields, FieldsNamed, FieldsUnnamed, Generics, Ident, + Attribute, Data, DataEnum, DataStruct, DeriveInput, Fields, FieldsNamed, FieldsUnnamed, + Generics, Ident, Lit, Meta, NestedMeta, }; +/// These are configuration options that are selected by #[digestible(..)] attributes +/// at struct or enum declaration. They are parsed from the DeriveInput::attrs field. +#[derive(Default, Clone)] +struct AttributeConfig { + /// Whether digestible should be derived "transparently", meaning, + /// this is e.g. new-type wrapper around some other digestible type, + /// and we should call through directly to the implementation on that type + pub transparent: bool, + /// Whether we should rename of the struct or enum, and use a user-provided + /// string for the name, for purpose of hashing. + /// This is a backwards compatibility tool. + pub rename: Option, +} + +impl AttributeConfig { + // Apply a nested meta item from syn to the current config state + pub fn apply_meta(&mut self, nested_meta: &NestedMeta) -> Result<(), &'static str> { + match nested_meta { + NestedMeta::Lit(_) => { + return Err("Unexpected digestible literal attribute"); + } + NestedMeta::Meta(meta) => match meta { + Meta::Path(path) => { + if path.is_ident("transparent") { + if !self.transparent { + self.transparent = true; + } else { + return Err("transparent cannot appear twice as an attribute"); + } + } else { + return Err("unexpected digestible path attribute"); + } + } + Meta::NameValue(mnv) => { + if mnv.path.is_ident("name") { + if self.rename.is_some() { + return Err("name = cannot appear twice in digestible attributes"); + } else { + self.rename = match &mnv.lit { + Lit::Str(litstr) => Some(litstr.value()), + _ => { + return Err("name = must be set to string literal in digestible attributes"); + } + } + } + } else { + return Err("unexpected digestible feature attribute"); + } + } + _ => { + return Err("unexpected digestible attribute"); + } + }, + } + Ok(()) + } +} + +// Parse AttributeConfig from syn attribute list +impl TryFrom<&[Attribute]> for AttributeConfig { + type Error = &'static str; + + fn try_from(src: &[Attribute]) -> Result { + let mut result = AttributeConfig::default(); + + for attr in src { + if attr.path.is_ident("digestible") { + if let Meta::List(meta) = attr.parse_meta().unwrap() { + for meta_item in meta.nested.iter() { + result.apply_meta(meta_item)?; + } + } + } + } + + if result.transparent && result.rename.is_some() { + return Err("It is meaningless to combine digestible(transparent) and digestible(name=) features"); + } + + Ok(result) + } +} + +// This is the main entrypoint for `derive(Digestible)` fn try_digestible(input: TokenStream) -> Result { let input: DeriveInput = syn::parse(input).unwrap(); + // The rust identifier for this struct or enum let ident = input.ident; + // The generics associated to this struct or enum let generics = &input.generics; - match input.data { - Data::Struct(variant_data) => try_digestible_struct(&ident, generics, &variant_data), - Data::Enum(variant_data) => try_digestible_enum(&ident, generics, &variant_data), - Data::Union(..) => Err("Digestible can not be derived for a union"), + // Read any #[digestible(...)]` attributes on this struct or enum and parse them + let attr_config = AttributeConfig::try_from(&input.attrs[..])?; + + if attr_config.transparent { + // Handle the `digestible(transparent)` option + match input.data { + Data::Struct(variant_data) => { + try_digestible_struct_transparent(&ident, generics, &variant_data) + } + Data::Enum(_) => Err("Digestible cannot be derived transparently for an enum"), + Data::Union(..) => Err("Digestible cannot be derived for a union"), + } + } else { + // If the user specified a name, that's the custom name, otherwise use the rust ident + let custom_name = if let Some(name) = attr_config.rename { + Ident::new(name.as_ref(), Span::call_site()) + } else { + ident.clone() + }; + match input.data { + Data::Struct(variant_data) => { + try_digestible_struct(&ident, &custom_name, generics, &variant_data) + } + Data::Enum(variant_data) => { + try_digestible_enum(&ident, &custom_name, generics, &variant_data) + } + Data::Union(..) => Err("Digestible cannot be derived for a union"), + } } } +// Implement digestible for a struct, by creating an agg node for it, +// and making each struct field a child. +// Children are appended to transcript using `append_to_transcript_allow_omit`, +// because the allow omit is what permits schema evolution to occur. fn try_digestible_struct( ident: &Ident, + custom_name: &Ident, generics: &Generics, variant_data: &DataStruct, ) -> Result { - // fields is a Vec (I think) - let fields = match &variant_data.fields { + // Get the sequence of fields out of syn, as a Vec<&syn::Field> + let fields: Vec<&syn::Field> = match &variant_data.fields { Fields::Named(FieldsNamed { named: fields, .. }) | Fields::Unnamed(FieldsUnnamed { unnamed: fields, .. @@ -39,25 +157,24 @@ fn try_digestible_struct( Fields::Unit => Vec::new(), }; - // call is a Vec (I think) - let call = fields + // This is the tokens representing, bringing the transcript to each field + let call : Vec = fields .into_iter() .enumerate() .map(|(idx, field)| { match &field.ident { - // this is a regular struct + // this is a regular struct, and the field has an identifier Some(field_ident) => { quote! { - hasher.update(stringify!(#field_ident).as_bytes()); - self.#field_ident.digest(hasher); + self.#field_ident.append_to_transcript_allow_omit(stringify!(#field_ident).as_bytes(), transcript); } } - // this is a tuple struct, and the member doesn't have an identifier + // this is a tuple struct, and the field doesn't have an identifier + // we have to make a syn object corresponding to the index, and use it in the quote! macro None => { let index = syn::Index::from(idx); quote! { - hasher.update(stringify!(#index).as_bytes()); - self.#index.digest(hasher); + self.#index.append_to_transcript_allow_omit(stringify!(#index).as_bytes(), transcript); } } } @@ -67,24 +184,74 @@ fn try_digestible_struct( // Final expanded result let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); - // Hash the concrete type of each of the generics. - let generic_type_digests = generics - .type_params() - .map(|type_param| { - let ident = &type_param.ident; - quote! { - hasher.update(std::any::type_name::<#ident>().as_bytes()); - } - }) - .collect::>(); - + // We implement append_to_transcript for the struct by + // first creating an agg header, then appending each field, + // then creating a matching agg closer let expanded = quote! { impl #impl_generics mc_crypto_digestible::Digestible for #ident #ty_generics #where_clause { - fn digest(&self, hasher: &mut D) { - hasher.update(stringify!(#ident).as_bytes()); - hasher.update(stringify!(#impl_generics).as_bytes()); - #(#generic_type_digests)* + fn append_to_transcript(&self, context: &'static [u8], transcript: &mut DT) { + transcript.append_agg_header(context, stringify!(#custom_name).as_bytes()); #(#call)* + transcript.append_agg_closer(context, stringify!(#custom_name).as_bytes()); + } + } + }; + + Ok(expanded.into()) +} + +// digestible(transparent) means that, this struct is a "wrapper" around a single +// value, and when digesting it, we don't create an agg node. +// Instead, we forward calls to `append_to_transcript` +// and `append_to_transcript_allow_omit` directly to the inner value. +// +// This is only allowed when the struct has exactly one field +fn try_digestible_struct_transparent( + ident: &Ident, + generics: &Generics, + variant_data: &DataStruct, +) -> Result { + // Get the sequence of fields out of syn, as a Vec + let fields: Vec<&syn::Field> = match &variant_data.fields { + Fields::Named(FieldsNamed { named: fields, .. }) + | Fields::Unnamed(FieldsUnnamed { + unnamed: fields, .. + }) => fields.into_iter().collect(), + Fields::Unit => { + return Err("digestible cannot be derived transparently for a unit struct"); + } + }; + + if fields.is_empty() { + return Err("digestible cannot be derived transparently for a struct with no fields"); + } + if fields.len() > 1 { + return Err("digestible cannot be derived transparently for a struct or tuple with more than one field"); + } + + // Final expanded result + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + + let expanded = if let Some(field_ident) = &fields[0].ident { + quote! { + impl #impl_generics mc_crypto_digestible::Digestible for #ident #ty_generics #where_clause { + fn append_to_transcript(&self, context: &'static [u8], transcript: &mut DT) { + self.#field_ident.append_to_transcript(context, transcript); + } + fn append_to_transcript_allow_omit(&self, context: &'static [u8], transcript: &mut DT) { + self.#field_ident.append_to_transcript_allow_omit(context, transcript); + } + } + } + } else { + quote! { + impl #impl_generics mc_crypto_digestible::Digestible for #ident #ty_generics #where_clause { + fn append_to_transcript(&self, context: &'static [u8], transcript: &mut DT) { + self.0.append_to_transcript(context, transcript); + } + fn append_to_transcript_allow_omit(&self, context: &'static [u8], transcript: &mut DT) { + self.0.append_to_transcript_allow_omit(context, transcript); + } } } }; @@ -94,14 +261,15 @@ fn try_digestible_struct( fn try_digestible_enum( ident: &Ident, + custom_name: &Ident, generics: &Generics, variant_data: &DataEnum, ) -> Result { - let call = variant_data + let call : Vec = variant_data .variants .iter() .enumerate() - .map(|(idx, variant)| { + .map(|(which, variant)| { let variant_ident = &variant.ident; // Our behavior differs based on whether the enum variant is a unit (has no data @@ -109,90 +277,117 @@ fn try_digestible_enum( // tuple data assocated with it). match &variant.fields { // For an enum variant that doesn't have associated data (e.g. SomeEnum::MyVariant) - // we generate code that looks like this: - // Self::MyVariant => { - // hasher.update(&(0 as u64).to_le_bytes()); // This is the variant's index. - // hasher.update("MyVariant").as_bytes()); - // } + // we append an appropriate variant header, then append a "none" node to be its child. + // There must be a child node, even if it is None, to prevent ambiguity Fields::Unit => { quote! { Self::#variant_ident => { - hasher.update(&(#idx as u64).to_le_bytes()); - hasher.update(stringify!(#variant_ident).as_bytes()); + transcript.append_var_header(context, stringify!(#custom_name).as_bytes(), #which as u32); + transcript.append_none(stringify!(#variant_ident).as_bytes()); }, } } - // For an enum variant that has anonymous fields (e.g. SomeEnum::MyVariant(u32, - // u64)) we generate code that looks like this: + // For an enum variant with one nameless member, e.g. SomeEnum::Possibility(u32), which is the 3rd possibility + // we generate code like this, which appends a var_header, and then immediately the child value. + // The child value may not be omitted. + // + // Self::Possibility(val) => { + // transcript.append_var_header(context, "SomeEnum".as_bytes(), 3 as u32); + // val.append_to_transcript("Possibility".as_bytes(), transcript); + // } + // + // For an enum variant that multiple anonymous fields (e.g. SomeEnum::MyVariant(u32, + // u64)) we generate code that creates an anonymous aggregate as the child of the variant, + // and makes the fields children of that aggregate. + // This child node is the same as what we would get if handling a struct tuple, whose name + // was the empty string. + // For example: + // // Self::MyVariant(field_0, field_1) => { - // hasher.update(&(0 as u64).to_le_bytes()); // This is the variant's index. - // hasher.update("MyVariant").as_bytes()); - // hasher.update("0").as_bytes()); - // field_0.digest(hasher); - // hasher.update("1").as_bytes()); - // field_1.digest(hasher); + // transcript.append_var_header(context, "SomeEnum".as_bytes(), 3 as u32); + // transcript.append_agg_header("MyVariant".as_bytes(), b""); + // field_0.append_to_transcript_allow_omit("0".as_bytes(), transcript); + // field_1.append_to_transcript_allow_omit("1".as_bytes(), transcript); + // transcript.append_agg_closer("MyVariant".as_bytes(), b""); // } Fields::Unnamed(FieldsUnnamed { unnamed: fields, .. }) => { - let field_idents = fields - .iter() - .enumerate() - .map(|(idx, _field)| format_ident!("field_{}", idx)) - .collect::>(); - - let per_field_digest = fields - .iter() - .enumerate() - .map(|(idx, _field)| { - let index = syn::Index::from(idx); - let field_ident = format_ident!("field_{}", idx); - quote! { - hasher.update(stringify!(#index).as_bytes()); - #field_ident.digest(hasher); + if fields.len() == 1 { + quote! { + Self::#variant_ident(val) => { + transcript.append_var_header(context, stringify!(#custom_name).as_bytes(), #which as u32); + val.append_to_transcript(stringify!(#variant_ident).as_bytes(), transcript); } - }) - .collect::>(); + } + } else { - quote! { - Self::#variant_ident(#(#field_idents),*) => { - hasher.update(&(#idx as u64).to_le_bytes()); - hasher.update(stringify!(#variant_ident).as_bytes()); - #(#per_field_digest)* + let field_idents = fields + .iter() + .enumerate() + .map(|(idx, _field)| format_ident!("field_{}", idx)) + .collect::>(); + + // These are allow_omit, because they are appearing inside an aggregate (the anonymous struct) + let per_field_digest = fields + .iter() + .enumerate() + .map(|(idx, _field)| { + let index = syn::Index::from(idx); + let field_ident = format_ident!("field_{}", idx); + quote! { + #field_ident.append_to_transcript_allow_omit(stringify!(#index).as_bytes(), transcript); + } + }) + .collect::>(); + + quote! { + Self::#variant_ident(#(#field_idents),* ) => { + transcript.append_var_header(context, stringify!(#custom_name).as_bytes(), #which as u32); + transcript.append_agg_header(stringify!(#variant_ident).as_bytes(), b""); + #(#per_field_digest)*; + transcript.append_agg_closer(stringify!(#variant_ident).as_bytes(), b""); + } } } } - // For an enum variant that has anonymous fields (e.g. SomeEnum::MyVariant { a: u64, b: u64 } - // we generate code that looks like this: + // For an enum variant that has named fields (e.g. SomeEnum::MyVariant { a: u64, b: u64 } + // we generate code that creates an anonymous aggregate as the child of the variant, + // and makes the fields children of that aggregate. + // This child node is the same as what we would get if handling a struct, whose name + // was the empty string. + // + // For example: + // // Self::MyVariant { a, b } => { - // hasher.update(&(0 as u64).to_le_bytes()); // This is the variant's index. - // hasher.update("MyVariant").as_bytes()); - // hasher.update("a").as_bytes()); - // a.digest(hasher); - // hasher.update("b").as_bytes()); - // b.digest(hasher); + // transcript.append_var_header(context, "SomeEnum".as_bytes(), 3 as u32); + // transcript.append_agg_header("MyVariant".as_bytes(), b""); + // a.append_to_transcript_allow_omit("a".as_bytes(), transcript); + // b.append_to_transcript_allow_omit("b".as_bytes(), transcript); + // transcript.append_agg_closer("MyVariant".as_bytes(), b""); // } Fields::Named(FieldsNamed { named: fields, .. }) => { let field_idents = fields.iter().map(|field| &field.ident).collect::>(); + // These are allow_omit, because they are appearing inside an aggregate (the anonymous struct) let per_field_digest = fields .iter() .map(|field| { let field_ident = &field.ident; quote! { - hasher.update(stringify!(#field_ident).as_bytes()); - #field_ident.digest(hasher); + #field_ident.append_to_transcript_allow_omit(stringify!(#field_ident).as_bytes(), transcript); } }) .collect::>(); quote! { - Self::#variant_ident { #(#field_idents),* } => { - hasher.update(&(#idx as u64).to_le_bytes()); - hasher.update(stringify!(#variant_ident).as_bytes()); - #(#per_field_digest)* + Self::#variant_ident{ #(#field_idents),* } => { + transcript.append_var_header(context, stringify!(#custom_name).as_bytes(), #which as u32); + transcript.append_agg_header(stringify!(#variant_ident).as_bytes(), b""); + #(#per_field_digest)*; + transcript.append_agg_closer(stringify!(#variant_ident).as_bytes(), b""); } } } @@ -202,25 +397,9 @@ fn try_digestible_enum( let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); - // Hash the concrete type of each of the generics. - let generic_type_digests = generics - .type_params() - .map(|type_param| { - let ident = &type_param.ident; - quote! { - hasher.update(std::any::type_name::<#ident>().as_bytes()); - } - }) - .collect::>(); - let expanded = quote! { impl #impl_generics mc_crypto_digestible::Digestible for #ident #ty_generics #where_clause { - fn digest(&self, hasher: &mut D) { - // Hash the name of the enum and generic specializations. - hasher.update(stringify!(#ident).as_bytes()); - hasher.update(stringify!(#impl_generics).as_bytes()); - #(#generic_type_digests)* - + fn append_to_transcript(&self, context: &'static [u8], transcript: &mut DT) { // Per-variant hashing. match self { #(#call)* @@ -232,7 +411,7 @@ fn try_digestible_enum( Ok(expanded.into()) } -#[proc_macro_derive(Digestible)] +#[proc_macro_derive(Digestible, attributes(digestible))] pub fn digestible(input: TokenStream) -> TokenStream { try_digestible(input).unwrap() } diff --git a/crypto/digestible/derive/test/Cargo.toml b/crypto/digestible/derive/test/Cargo.toml index b6c4c77235..eaf710e16e 100644 --- a/crypto/digestible/derive/test/Cargo.toml +++ b/crypto/digestible/derive/test/Cargo.toml @@ -7,5 +7,5 @@ edition = "2018" [dependencies] mc-crypto-digestible = { path = "../.." } mc-crypto-digestible-derive = { path = ".." } +mc-crypto-digestible-test-utils = { path = "../../test-utils" } generic-array = "0.14" -digest = { version = "0.9", default-features = false } diff --git a/crypto/digestible/derive/test/tests/behavior.rs b/crypto/digestible/derive/test/tests/behavior.rs index 165fef8b04..a9be6124b5 100644 --- a/crypto/digestible/derive/test/tests/behavior.rs +++ b/crypto/digestible/derive/test/tests/behavior.rs @@ -1,273 +1,547 @@ // Copyright (c) 2018-2020 MobileCoin Inc. -/// Tests against the behavior of the generated Digestible traits -use generic_array::{typenum, GenericArray}; -use mc_crypto_digestible::{Digest, Digestible}; - -// A struct implementing Digest that remembers all its historical inputs -#[derive(Clone, Default)] -struct Tester { - pub args: Vec>, -} - -impl Digest for Tester { - type OutputSize = typenum::U1; - - fn new() -> Self { - Default::default() - } - - fn update(&mut self, src: impl AsRef<[u8]>) { - self.args.push(src.as_ref().to_vec()) - } - - // Unused stuff - fn chain(self, _src: impl AsRef<[u8]>) -> Self { - unimplemented!() - } - fn finalize(self) -> GenericArray { - unimplemented!() - } - fn finalize_reset(&mut self) -> GenericArray { - unimplemented!() - } - fn reset(&mut self) {} - fn output_size() -> usize { - unimplemented!() - } - fn digest(_data: &[u8]) -> GenericArray { - unimplemented!() - } -} +/// Tests of the behavior of the macro-generated Digestible implementations +use mc_crypto_digestible::{Digestible, MerlinTranscript}; +use mc_crypto_digestible_test_utils::*; // Test structs #[derive(Digestible)] -struct Foo { +struct ExampleStruct { a: u16, b: u16, c: u16, } +#[derive(Digestible)] +#[digestible(name = "ExampleStruct")] +struct ExampleStruct2 { + c: u16, + b: u16, + a: u16, +} + #[derive(Digestible)] struct Blob(Vec); +// A structure equivalent to Blob that has been marked transparent +#[derive(Digestible)] +#[digestible(transparent)] +struct TransBlob(Vec); + #[derive(Digestible)] struct Bar { d: Blob, e: u32, - f: Foo, + f: ExampleStruct, } +// A Bar with a transparent field (but a different structure name) #[derive(Digestible)] -struct GenericFoo { +struct BarWithTransparent { + d: TransBlob, + e: u32, + f: ExampleStruct, +} + +// A struct with a generic parameter and members +#[derive(Digestible)] +struct GenericExampleStruct { a: X, b: X, } +// Test that an instance of ExampleStruct is being mapped to AST and hashed as expected +#[test] +fn example_struct1() { + let arg = ExampleStruct { a: 0, b: 1, c: 2 }; + let expected_ast = ASTNode::from(ASTAggregate { + context: b"foo1", + name: b"ExampleStruct".to_vec(), + elems: vec![ + ASTNode::from(ASTPrimitive { + context: b"a", + type_name: b"uint", + data: vec![0u8, 0u8], + }), + ASTNode::from(ASTPrimitive { + context: b"b", + type_name: b"uint", + data: vec![1u8, 0u8], + }), + ASTNode::from(ASTPrimitive { + context: b"c", + type_name: b"uint", + data: vec![2u8, 0u8], + }), + ], + is_completed: true, + }); + digestible_test_case_ast("foo1", &arg, expected_ast); + assert_eq!( + arg.digest32::(b"foo1"), + [ + 19, 53, 9, 198, 156, 34, 144, 43, 162, 78, 50, 32, 131, 61, 167, 17, 13, 139, 228, 70, + 4, 145, 120, 36, 160, 118, 131, 86, 224, 154, 10, 110 + ] + ); +} + +// Test that ExampleStruct2 has fields in given order and not alphabetical order #[test] -fn foo1() { - let arg = Foo { a: 0, b: 1, c: 2 }; - let mut hasher = Tester::new(); - arg.digest(&mut hasher); +fn example_struct2() { + let arg = ExampleStruct2 { a: 0, b: 1, c: 2 }; + let expected_ast = ASTNode::from(ASTAggregate { + context: b"foo1", + name: b"ExampleStruct".to_vec(), + elems: vec![ + ASTNode::from(ASTPrimitive { + context: b"c", + type_name: b"uint", + data: vec![2u8, 0u8], + }), + ASTNode::from(ASTPrimitive { + context: b"b", + type_name: b"uint", + data: vec![1u8, 0u8], + }), + ASTNode::from(ASTPrimitive { + context: b"a", + type_name: b"uint", + data: vec![0u8, 0u8], + }), + ], + is_completed: true, + }); + digestible_test_case_ast("foo1", &arg, expected_ast); + assert_eq!( + arg.digest32::(b"foo1"), + [ + 72, 44, 56, 17, 86, 202, 143, 191, 204, 74, 217, 227, 133, 204, 8, 16, 47, 75, 178, + 109, 202, 254, 222, 249, 89, 196, 247, 8, 140, 14, 167, 182 + ] + ); +} - let expected: Vec> = vec![ - b"Foo".to_vec(), - b"".to_vec(), - b"a".to_vec(), - vec![0u8, 0u8], - b"b".to_vec(), - vec![1u8, 0u8], - b"c".to_vec(), - vec![2u8, 0u8], - ]; +// Test that ExampleStruct2 is not interchangeable with ExampleStruct +#[test] +fn example_struct_interchangeability() { + let arg = ExampleStruct2 { a: 7, b: 5, c: 19 }; + let arg2 = ExampleStruct { a: 7, b: 5, c: 19 }; + assert_ne!( + arg.digest32::(b"test"), + arg2.digest32::(b"test") + ); +} - assert_eq!(hasher.args, expected); +#[derive(Digestible)] +struct Tricky { + field: Vec, + fi_eld: Vec, + _field: Vec, + prim: bool, + agg: String, + seq: Vec, + var: Option, +} + +// Test that a struct with tricky field names is being parsed and hashed as expected +#[test] +fn tricky_struct() { + let arg = Tricky { + field: vec![1], + fi_eld: vec![2], + _field: vec![3], + prim: false, + agg: "var".to_string(), + seq: Default::default(), + var: Some(ExampleStruct2 { a: 0, b: 1, c: 2 }), + }; + let expected_var_ast = ASTNode::from(ASTAggregate { + context: b"var", + name: b"ExampleStruct".to_vec(), + elems: vec![ + ASTNode::from(ASTPrimitive { + context: b"c", + type_name: b"uint", + data: vec![2u8, 0u8], + }), + ASTNode::from(ASTPrimitive { + context: b"b", + type_name: b"uint", + data: vec![1u8, 0u8], + }), + ASTNode::from(ASTPrimitive { + context: b"a", + type_name: b"uint", + data: vec![0u8, 0u8], + }), + ], + is_completed: true, + }); + let expected_ast = ASTNode::from(ASTAggregate { + context: b"tricky", + name: b"Tricky".to_vec(), + elems: vec![ + ASTNode::from(ASTSequence { + context: b"field", + len: 1, + elems: vec![ASTNode::from(ASTPrimitive { + context: b"", + type_name: b"int", + data: vec![1u8, 0u8, 0u8, 0u8], + })], + }), + ASTNode::from(ASTSequence { + context: b"fi_eld", + len: 1, + elems: vec![ASTNode::from(ASTPrimitive { + context: b"", + type_name: b"int", + data: vec![2u8, 0u8, 0u8, 0u8], + })], + }), + ASTNode::from(ASTSequence { + context: b"_field", + len: 1, + elems: vec![ASTNode::from(ASTPrimitive { + context: b"", + type_name: b"int", + data: vec![3u8, 0u8, 0u8, 0u8], + })], + }), + ASTNode::from(ASTPrimitive { + context: b"prim", + type_name: b"bool", + data: vec![0u8], + }), + ASTNode::from(ASTPrimitive { + context: b"agg", + type_name: b"str", + data: b"var".to_vec(), + }), + expected_var_ast.clone(), + ], + is_completed: true, + }); + digestible_test_case_ast("tricky", &arg, expected_ast); + assert_eq!( + arg.digest32::(b"tricky"), + [ + 7, 77, 36, 165, 11, 239, 19, 38, 44, 127, 117, 48, 130, 150, 9, 58, 103, 36, 174, 126, + 78, 182, 101, 201, 194, 14, 47, 227, 220, 99, 6, 143 + ] + ); } +// Test that an instance of Blob is being mapped to AST and hashed as expected #[test] fn blob1() { let arg = Blob(vec![1, 2, 3, 4]); - let mut hasher = Tester::new(); - arg.digest(&mut hasher); - - let expected: Vec> = vec![ - b"Blob".to_vec(), - b"".to_vec(), - b"0".to_vec(), - 4usize.to_le_bytes().to_vec(), - vec![1u8, 2u8, 3u8, 4u8], - ]; + let expected_ast = ASTNode::Aggregate(ASTAggregate { + context: b"blob1", + name: b"Blob".to_vec(), + elems: vec![ASTNode::Primitive(ASTPrimitive { + context: b"0", + type_name: b"bytes", + data: vec![1u8, 2u8, 3u8, 4u8], + })], + is_completed: true, + }); + digestible_test_case_ast("blob1", &arg, expected_ast); + assert_eq!( + arg.digest32::(b"blob1"), + [ + 59, 63, 205, 99, 4, 221, 221, 230, 90, 1, 135, 226, 106, 52, 210, 105, 88, 37, 182, 26, + 208, 240, 152, 4, 226, 0, 204, 11, 10, 187, 14, 48 + ] + ); +} - assert_eq!(hasher.args, expected); +// Test that an instance of TransBlob is being mapped to AST and hashed as expected +#[test] +fn blob2() { + let arg = TransBlob(vec![1, 2, 3, 4]); + let expected_ast = ASTNode::Primitive(ASTPrimitive { + context: b"blob2", + type_name: b"bytes", + data: vec![1u8, 2u8, 3u8, 4u8], + }); + digestible_test_case_ast("blob2", &arg, expected_ast); + assert_eq!( + arg.digest32::(b"blob2"), + [ + 221, 88, 184, 210, 180, 30, 40, 40, 89, 37, 221, 90, 185, 33, 199, 133, 99, 102, 67, + 196, 197, 85, 67, 234, 151, 160, 111, 230, 234, 125, 181, 1 + ] + ); + assert_eq!( + arg.digest32::(b"blob2"), + vec![1u8, 2u8, 3u8, 4u8].digest32::(b"blob2") + ); } +// Test that an instance of Bar is being mapped to AST and hashed as expected #[test] fn bar1() { let arg = Bar { d: Blob(b"Koala".to_vec()), e: u32::max_value(), - f: Foo { a: 5, b: 6, c: 7 }, + f: ExampleStruct { a: 5, b: 6, c: 7 }, }; - let mut hasher = Tester::new(); - arg.digest(&mut hasher); - - let expected: Vec> = vec![ - b"Bar".to_vec(), - b"".to_vec(), - b"d".to_vec(), - b"Blob".to_vec(), - b"".to_vec(), - b"0".to_vec(), - 5usize.to_le_bytes().to_vec(), - b"Koala".to_vec(), - b"e".to_vec(), - vec![255u8, 255u8, 255u8, 255u8], - b"f".to_vec(), - b"Foo".to_vec(), - b"".to_vec(), - b"a".to_vec(), - vec![5u8, 0u8], - b"b".to_vec(), - vec![6u8, 0u8], - b"c".to_vec(), - vec![7u8, 0u8], - ]; + let expected_ast = ASTNode::from(ASTAggregate { + context: b"bar1", + name: b"Bar".to_vec(), + elems: vec![ + ASTNode::from(ASTAggregate { + context: b"d", + name: b"Blob".to_vec(), + elems: vec![ASTNode::from(ASTPrimitive { + context: b"0", + type_name: b"bytes", + data: b"Koala".to_vec(), + })], + is_completed: true, + }), + ASTNode::from(ASTPrimitive { + context: b"e", + type_name: b"uint", + data: vec![255u8; 4], + }), + ASTNode::from(ASTAggregate { + context: b"f", + name: b"ExampleStruct".to_vec(), + elems: vec![ + ASTNode::from(ASTPrimitive { + context: b"a", + type_name: b"uint", + data: vec![5u8, 0u8], + }), + ASTNode::from(ASTPrimitive { + context: b"b", + type_name: b"uint", + data: vec![6u8, 0u8], + }), + ASTNode::from(ASTPrimitive { + context: b"c", + type_name: b"uint", + data: vec![7u8, 0u8], + }), + ], + is_completed: true, + }), + ], + is_completed: true, + }); + digestible_test_case_ast("bar1", &arg, expected_ast); + assert_eq!( + arg.digest32::(b"bar1"), + [ + 214, 103, 124, 244, 227, 71, 218, 40, 112, 211, 130, 16, 139, 166, 53, 222, 255, 143, + 99, 32, 21, 17, 93, 118, 15, 237, 67, 161, 33, 130, 76, 65 + ] + ); +} - assert_eq!(hasher.args, expected); +// Test that an instance of BarWithTransparent is being mapped to AST and hashed as expected +#[test] +fn bar2() { + let arg = BarWithTransparent { + d: TransBlob(b"Koala".to_vec()), + e: u32::max_value(), + f: ExampleStruct { a: 5, b: 6, c: 7 }, + }; + let expected_ast = ASTNode::Aggregate(ASTAggregate { + context: b"bar2", + name: b"BarWithTransparent".to_vec(), + elems: vec![ + ASTNode::from(ASTPrimitive { + context: b"d", + type_name: b"bytes", + data: b"Koala".to_vec(), + }), + ASTNode::from(ASTPrimitive { + context: b"e", + type_name: b"uint", + data: vec![255u8; 4], + }), + ASTNode::from(ASTAggregate { + context: b"f", + name: b"ExampleStruct".to_vec(), + elems: vec![ + ASTNode::from(ASTPrimitive { + context: b"a", + type_name: b"uint", + data: vec![5u8, 0u8], + }), + ASTNode::from(ASTPrimitive { + context: b"b", + type_name: b"uint", + data: vec![6u8, 0u8], + }), + ASTNode::from(ASTPrimitive { + context: b"c", + type_name: b"uint", + data: vec![7u8, 0u8], + }), + ], + is_completed: true, + }), + ], + is_completed: true, + }); + digestible_test_case_ast("bar2", &arg, expected_ast); + assert_eq!( + arg.digest32::(b"bar2"), + [ + 191, 9, 66, 251, 105, 132, 21, 123, 90, 28, 40, 211, 231, 168, 150, 16, 148, 48, 82, + 65, 4, 141, 187, 101, 72, 238, 241, 197, 85, 34, 142, 249 + ] + ); } +// Test cases for GenericExampleStruct:: and GenericExampleStruct::> #[test] -fn generic_foo1() { - let arg = GenericFoo { +fn generic_example_struct1() { + let arg = GenericExampleStruct { a: 123 as u32, b: 456 as u32, }; - let mut hasher = Tester::new(); - arg.digest(&mut hasher); - - let expected: Vec> = vec![ - b"GenericFoo".to_vec(), - b"< X : Digestible >".to_vec(), - b"u32".to_vec(), - b"a".to_vec(), - (123 as u32).to_le_bytes().to_vec(), - b"b".to_vec(), - (456 as u32).to_le_bytes().to_vec(), - ]; + let expected_ast = ASTNode::from(ASTAggregate { + context: b"genfoo1", + name: b"GenericExampleStruct".to_vec(), + elems: vec![ + ASTNode::from(ASTPrimitive { + context: b"a", + type_name: b"uint", + data: 123u32.to_le_bytes().to_vec(), + }), + ASTNode::from(ASTPrimitive { + context: b"b", + type_name: b"uint", + data: 456u32.to_le_bytes().to_vec(), + }), + ], + is_completed: true, + }); + digestible_test_case_ast("genfoo1", &arg, expected_ast.clone()); + assert_eq!( + arg.digest32::(b"genfoo1"), + [ + 77, 201, 127, 225, 56, 107, 48, 148, 235, 56, 108, 130, 31, 185, 54, 31, 82, 211, 48, + 94, 227, 85, 8, 161, 189, 241, 84, 171, 69, 0, 95, 109 + ] + ); + + let arg2 = GenericExampleStruct { + a: Some(123 as u32), + b: Some(456 as u32), + }; - assert_eq!(hasher.args, expected); + digestible_test_case_ast("genfoo1", &arg2, expected_ast); + assert_eq!( + arg2.digest32::(b"genfoo1"), + [ + 77, 201, 127, 225, 56, 107, 48, 148, 235, 56, 108, 130, 31, 185, 54, 31, 82, 211, 48, + 94, 227, 85, 8, 161, 189, 241, 84, 171, 69, 0, 95, 109 + ] + ); } +// Test cases for GenericExampleStruct:: and GenericExampleStruct::> #[test] -fn generic_foo2() { - let arg = GenericFoo { - a: String::from("str1"), - b: String::from("str2"), +fn generic_example_struct2() { + let arg = GenericExampleStruct { + a: 123 as i32, + b: 456 as i32, }; - let mut hasher = Tester::new(); - arg.digest(&mut hasher); - - let expected: Vec> = vec![ - b"GenericFoo".to_vec(), - b"< X : Digestible >".to_vec(), - b"alloc::string::String".to_vec(), - b"a".to_vec(), - (4 as usize).to_le_bytes().to_vec(), - b"str1".to_vec(), - b"b".to_vec(), - (4 as usize).to_le_bytes().to_vec(), - b"str2".to_vec(), - ]; + let expected_ast = ASTNode::from(ASTAggregate { + context: b"genfoo2", + name: b"GenericExampleStruct".to_vec(), + elems: vec![ + ASTNode::from(ASTPrimitive { + context: b"a", + type_name: b"int", + data: 123u32.to_le_bytes().to_vec(), + }), + ASTNode::from(ASTPrimitive { + context: b"b", + type_name: b"int", + data: 456u32.to_le_bytes().to_vec(), + }), + ], + is_completed: true, + }); + digestible_test_case_ast("genfoo2", &arg, expected_ast.clone()); + assert_eq!( + arg.digest32::(b"genfoo2"), + [ + 27, 164, 2, 106, 152, 28, 209, 36, 245, 234, 252, 175, 99, 43, 159, 210, 187, 204, 78, + 238, 220, 43, 143, 239, 232, 89, 245, 87, 170, 14, 217, 198 + ] + ); + + let arg2 = GenericExampleStruct { + a: Some(123 as i32), + b: Some(456 as i32), + }; - assert_eq!(hasher.args, expected); + digestible_test_case_ast("genfoo2", &arg2, expected_ast); + assert_eq!( + arg2.digest32::(b"genfoo2"), + [ + 27, 164, 2, 106, 152, 28, 209, 36, 245, 234, 252, 175, 99, 43, 159, 210, 187, 204, 78, + 238, 220, 43, 143, 239, 232, 89, 245, 87, 170, 14, 217, 198 + ] + ); } -// Test digesting an enum. +// Test cases for GenericExampleStruct:: and GenericExampleStruct::> #[test] -fn test_digest_enum() { - #[derive(Digestible)] - enum TestEnum { - Option1, - Option2(V), - Option3(u32, String), - Option4 { a: V, b: V }, - } - - { - let obj = TestEnum::::Option1; - - let expected: Vec> = vec![ - b"TestEnum".to_vec(), - b"< V : Digestible >".to_vec(), - b"u64".to_vec(), - (0 as u64).to_le_bytes().to_vec(), - b"Option1".to_vec(), - ]; - - let mut hasher = Tester::new(); - obj.digest(&mut hasher); - assert_eq!(hasher.args, expected); - } - - { - let obj = TestEnum::::Option2(123); - - let expected: Vec> = vec![ - b"TestEnum".to_vec(), - b"< V : Digestible >".to_vec(), - b"u64".to_vec(), - (1 as u64).to_le_bytes().to_vec(), - b"Option2".to_vec(), - b"0".to_vec(), - (123 as u64).to_le_bytes().to_vec(), - ]; - - let mut hasher = Tester::new(); - obj.digest(&mut hasher); - assert_eq!(hasher.args, expected); - } - - { - let s: &str = "a string"; - let obj = TestEnum::::Option3(1234, s.to_owned()); - - let expected: Vec> = vec![ - b"TestEnum".to_vec(), - b"< V : Digestible >".to_vec(), - b"u64".to_vec(), - (2 as u64).to_le_bytes().to_vec(), - b"Option3".to_vec(), - b"0".to_vec(), - (1234 as u32).to_le_bytes().to_vec(), - b"1".to_vec(), - s.len().to_le_bytes().to_vec(), - s.as_bytes().to_vec(), - ]; - - let mut hasher = Tester::new(); - obj.digest(&mut hasher); - assert_eq!(hasher.args, expected); - } - - { - let obj = TestEnum::::Option4 { a: 123, b: 456 }; +fn generic_example_struct3() { + let arg = GenericExampleStruct { + a: String::from("str1"), + b: String::from("str2"), + }; - let expected: Vec> = vec![ - b"TestEnum".to_vec(), - b"< V : Digestible >".to_vec(), - b"u64".to_vec(), - (3 as u64).to_le_bytes().to_vec(), - b"Option4".to_vec(), - b"a".to_vec(), - (123 as u64).to_le_bytes().to_vec(), - b"b".to_vec(), - (456 as u64).to_le_bytes().to_vec(), - ]; + let expected_ast = ASTNode::from(ASTAggregate { + context: b"genfoo3", + name: b"GenericExampleStruct".to_vec(), + elems: vec![ + ASTNode::from(ASTPrimitive { + context: b"a", + type_name: b"str", + data: "str1".as_bytes().to_vec(), + }), + ASTNode::from(ASTPrimitive { + context: b"b", + type_name: b"str", + data: "str2".as_bytes().to_vec(), + }), + ], + is_completed: true, + }); + digestible_test_case_ast("genfoo3", &arg, expected_ast.clone()); + assert_eq!( + arg.digest32::(b"genfoo3"), + [ + 93, 6, 80, 35, 32, 166, 252, 185, 172, 99, 15, 69, 157, 45, 10, 1, 56, 227, 232, 229, + 16, 90, 97, 138, 80, 139, 46, 11, 243, 66, 11, 169 + ] + ); + + let arg2 = GenericExampleStruct { + a: Some(String::from("str1")), + b: Some(String::from("str2")), + }; - let mut hasher = Tester::new(); - obj.digest(&mut hasher); - assert_eq!(hasher.args, expected); - } + digestible_test_case_ast("genfoo3", &arg2, expected_ast); + assert_eq!( + arg2.digest32::(b"genfoo3"), + [ + 93, 6, 80, 35, 32, 166, 252, 185, 172, 99, 15, 69, 157, 45, 10, 1, 56, 227, 232, 229, + 16, 90, 97, 138, 80, 139, 46, 11, 243, 66, 11, 169 + ] + ); } diff --git a/crypto/digestible/derive/test/tests/schema_evolution.rs b/crypto/digestible/derive/test/tests/schema_evolution.rs new file mode 100644 index 0000000000..cfbc234ff3 --- /dev/null +++ b/crypto/digestible/derive/test/tests/schema_evolution.rs @@ -0,0 +1,736 @@ +// Copyright (c) 2018-2020 MobileCoin Inc. + +/// Tests of the schema evolution properties of derive(Digestible) implementations +use mc_crypto_digestible::{Digestible, MerlinTranscript}; +use mc_crypto_digestible_test_utils::*; + +// An example structure +#[derive(Digestible)] +struct Thing { + a: u64, +} + +// A new field is added which is marked optional +#[derive(Digestible)] +#[digestible(name = "Thing")] +struct ThingV2 { + a: u64, + b: Option, +} + +// An old field which was not optional is marked optional +#[derive(Digestible)] +#[digestible(name = "Thing")] +struct ThingV3 { + a: Option, + b: Option, +} + +// An new repeated field appears +#[derive(Digestible)] +#[digestible(name = "Thing")] +struct ThingV4 { + a: Option, + b: Option, + c: Vec, +} + +// Test vectors for a few instances of the Thing struct, and versions of it +#[test] +fn thing_struct() { + assert_eq!( + Thing { a: 19 }.digest32::(b"thing"), + [ + 129, 172, 63, 2, 11, 236, 144, 45, 86, 222, 142, 172, 125, 149, 244, 67, 141, 193, 126, + 52, 249, 50, 226, 15, 239, 255, 253, 28, 212, 67, 215, 138 + ] + ); + assert_eq!( + ThingV2 { a: 19, b: Some(11) }.digest32::(b"thing"), + [ + 215, 162, 90, 161, 25, 42, 100, 213, 214, 162, 132, 209, 46, 150, 200, 229, 152, 101, + 152, 177, 103, 24, 152, 188, 51, 4, 26, 117, 184, 235, 117, 189 + ] + ); + assert_eq!( + ThingV4 { + a: Some(19), + b: None, + c: vec![true, false] + } + .digest32::(b"thing"), + [ + 10, 34, 82, 129, 5, 30, 197, 99, 66, 246, 191, 25, 96, 23, 84, 249, 228, 156, 252, 247, + 30, 194, 152, 147, 221, 244, 220, 46, 23, 236, 213, 203 + ] + ); +} + +// Tests for struct_schema evolution +#[test] +fn struct_schema_evolution() { + assert_eq!( + calculate_digest_ast(b"test", &Thing { a: 7 }), + calculate_digest_ast(b"test", &ThingV2 { a: 7, b: None }) + ); + + assert_eq!( + calculate_digest_ast(b"test", &ThingV2 { a: 7, b: None }), + calculate_digest_ast( + b"test", + &ThingV3 { + a: Some(7), + b: None + } + ) + ); + + assert_eq!( + calculate_digest_ast(b"test", &ThingV2 { a: 7, b: Some(11) }), + calculate_digest_ast( + b"test", + &ThingV3 { + a: Some(7), + b: Some(11) + } + ) + ); + + assert_eq!( + calculate_digest_ast( + b"test", + &ThingV3 { + a: Some(7), + b: Some(11) + } + ), + calculate_digest_ast( + b"test", + &ThingV4 { + a: Some(7), + b: Some(11), + c: Default::default() + } + ) + ); + + assert_eq!( + Thing { a: 3 }.digest32::(b"test"), + ThingV2 { a: 3, b: None }.digest32::(b"test") + ); + + assert_eq!( + ThingV2 { a: 3, b: None }.digest32::(b"test"), + ThingV3 { + a: Some(3), + b: None + } + .digest32::(b"test") + ); + + assert_eq!( + Thing { a: 3 }.digest32::(b"test"), + ThingV4 { + a: Some(3), + b: None, + c: Default::default() + } + .digest32::(b"test") + ); + + assert_eq!( + ThingV2 { a: 14, b: Some(99) }.digest32::(b"test"), + ThingV3 { + a: Some(14), + b: Some(99) + } + .digest32::(b"test") + ); + + assert_eq!( + ThingV2 { a: 14, b: Some(99) }.digest32::(b"test"), + ThingV4 { + a: Some(14), + b: Some(99), + c: Default::default() + } + .digest32::(b"test") + ); +} + +// Tests for what happens in exotic cases, like Option