Skip to content

Commit 5fa3e80

Browse files
Branch-free varint size calculation. On android art, compiles to:
``` int com.google.android.test.Outer.computeUInt32SizeNoTag(int) [24 bytes] 0x000024d0 mov w0, #0x160 0x000024d4 clz w1, w1 0x000024d8 add w1, w1, w1, lsl #3 0x000024dc sub w0, w0, w1 0x000024e0 lsr w0, w0, #6 0x000024e4 ret ``` versus existing: ``` int com.google.android.test.Outer.computeUInt32SizeNoTag(int) [72 bytes] 0x000022a0 and w0, w1, #0xffffff80 0x000022a4 cbnz w0, #+0xc (addr 0x22b0) 0x000022a8 mov w0, #0x1 0x000022ac b #+0x38 (addr 0x22e4) 0x000022b0 and w0, w1, #0xffffc000 0x000022b4 cbnz w0, #+0xc (addr 0x22c0) 0x000022b8 mov w0, #0x2 0x000022bc b #+0x28 (addr 0x22e4) 0x000022c0 and w0, w1, #0xffe00000 0x000022c4 cbnz w0, #+0xc (addr 0x22d0) 0x000022c8 mov w0, #0x3 0x000022cc b #+0x18 (addr 0x22e4) 0x000022d0 mov w2, #0x5 0x000022d4 mov w0, #0x4 0x000022d8 and w1, w1, #0xf0000000 0x000022dc cmp w1, #0x0 (0) 0x000022e0 csel w0, w2, w0, ne 0x000022e4 ret ``` PiperOrigin-RevId: 584143825
1 parent e35cf10 commit 5fa3e80

File tree

2 files changed

+125
-44
lines changed

2 files changed

+125
-44
lines changed

java/core/src/main/java/com/google/protobuf/CodedOutputStream.java

Lines changed: 50 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ public abstract class CodedOutputStream extends ByteOutput {
4040
/** Used to adapt to the experimental {@link Writer} interface. */
4141
CodedOutputStreamWriter wrapper;
4242

43-
/** @deprecated Use {@link #computeFixed32SizeNoTag(int)} instead. */
43+
/**
44+
* @deprecated Use {@link #computeFixed32SizeNoTag(int)} instead.
45+
*/
4446
@Deprecated public static final int LITTLE_ENDIAN_32_SIZE = FIXED32_SIZE;
4547

4648
/** The buffer size used in {@link #newInstance(OutputStream)}. */
@@ -669,9 +671,8 @@ public static int computeRawMessageSetExtensionSize(
669671
}
670672

671673
/**
672-
* Compute the number of bytes that would be needed to encode a lazily parsed MessageSet
673-
* extension field to the stream. For historical reasons, the wire format differs from normal
674-
* fields.
674+
* Compute the number of bytes that would be needed to encode a lazily parsed MessageSet extension
675+
* field to the stream. For historical reasons, the wire format differs from normal fields.
675676
*/
676677
public static int computeLazyFieldMessageSetExtensionSize(
677678
final int fieldNumber, final LazyFieldLite value) {
@@ -692,29 +693,52 @@ public static int computeTagSize(final int fieldNumber) {
692693
* tag.
693694
*/
694695
public static int computeInt32SizeNoTag(final int value) {
695-
if (value >= 0) {
696-
return computeUInt32SizeNoTag(value);
697-
} else {
698-
// Must sign-extend.
699-
return MAX_VARINT_SIZE;
700-
}
696+
return computeUInt64SizeNoTag((long) value);
701697
}
702698

703699
/** Compute the number of bytes that would be needed to encode a {@code uint32} field. */
704700
public static int computeUInt32SizeNoTag(final int value) {
705-
if ((value & (~0 << 7)) == 0) {
706-
return 1;
707-
}
708-
if ((value & (~0 << 14)) == 0) {
709-
return 2;
710-
}
711-
if ((value & (~0 << 21)) == 0) {
712-
return 3;
713-
}
714-
if ((value & (~0 << 28)) == 0) {
715-
return 4;
716-
}
717-
return 5;
701+
/*
702+
This code is ported from the C++ varint implementation.
703+
Implementation notes:
704+
705+
To calcuate varint size, we want to count the number of 7 bit chunks required. Rather than using
706+
division by 7 to accomplish this, we use multiplication by 9/64. This has a number of important
707+
properties:
708+
* It's roughly 1/7.111111. This makes the 0 bits set case have the same value as the 7 bits set
709+
case, so offsetting by 1 gives us the correct value we want for integers up to 448 bits.
710+
* Multiplying by 9 is special. x * 9 = x << 3 + x, and so this multiplication can be done by a
711+
single shifted add on arm (add w0, w0, w0, lsl #3), or a single lea instruction
712+
(leal (%rax,%rax,8), %eax)) on x86.
713+
* Dividing by 64 is a 6 bit right shift.
714+
715+
An explicit non-sign-extended right shift is used instead of the more obvious '/ 64' because
716+
that actually produces worse code on android arm64 at time of authoring because of sign
717+
extension. Rather than
718+
lsr w0, w0, #6
719+
It would emit:
720+
add w16, w0, #0x3f (63)
721+
cmp w0, #0x0 (0)
722+
csel w0, w16, w0, lt
723+
asr w0, w0, #6
724+
725+
Summarized:
726+
floor(((Integer.SIZE - clz) / 7.1111) + 1
727+
((Integer.SIZE - clz) * 9) / 64 + 1
728+
(((Integer.SIZE - clz) * 9) >>> 6) + 1
729+
((Integer.SIZE - clz) * 9 + (1 << 6)) >>> 6
730+
(Integer.SIZE * 9 + (1 << 6) - clz * 9) >>> 6
731+
(352 - clz * 9) >>> 6
732+
on arm:
733+
(352 - clz - (clz << 3)) >>> 6
734+
on x86:
735+
(352 - lea(clz, clz, 8)) >>> 6
736+
737+
If you make changes here, please validate their compiled output on different architectures and
738+
runtimes.
739+
*/
740+
int clz = Integer.numberOfLeadingZeros(value);
741+
return ((Integer.SIZE * 9 + (1 << 6)) - (clz * 9)) >>> 6;
718742
}
719743

720744
/** Compute the number of bytes that would be needed to encode an {@code sint32} field. */
@@ -745,27 +769,9 @@ public static int computeInt64SizeNoTag(final long value) {
745769
* tag.
746770
*/
747771
public static int computeUInt64SizeNoTag(long value) {
748-
// handle two popular special cases up front ...
749-
if ((value & (~0L << 7)) == 0L) {
750-
return 1;
751-
}
752-
if (value < 0L) {
753-
return 10;
754-
}
755-
// ... leaving us with 8 remaining, which we can divide and conquer
756-
int n = 2;
757-
if ((value & (~0L << 35)) != 0L) {
758-
n += 4;
759-
value >>>= 28;
760-
}
761-
if ((value & (~0L << 21)) != 0L) {
762-
n += 2;
763-
value >>>= 14;
764-
}
765-
if ((value & (~0L << 14)) != 0L) {
766-
n += 1;
767-
}
768-
return n;
772+
int clz = Long.numberOfLeadingZeros(value);
773+
// See computeUInt32SizeNoTag for explanation
774+
return ((Long.SIZE * 9 + (1 << 6)) - (clz * 9)) >>> 6;
769775
}
770776

771777
/** Compute the number of bytes that would be needed to encode an {@code sint64} field. */

java/core/src/test/java/com/google/protobuf/CodedOutputStreamTest.java

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,81 @@ public void testEncodeZigZag() throws Exception {
327327
.isEqualTo(-75123905439571256L);
328328
}
329329

330+
@Test
331+
public void computeIntSize() {
332+
assertThat(CodedOutputStream.computeUInt32SizeNoTag(0)).isEqualTo(1);
333+
assertThat(CodedOutputStream.computeUInt64SizeNoTag(0)).isEqualTo(1);
334+
int i;
335+
for (i = 0; i < 7; i++) {
336+
assertThat(CodedOutputStream.computeInt32SizeNoTag(1 << i)).isEqualTo(1);
337+
assertThat(CodedOutputStream.computeUInt32SizeNoTag(1 << i)).isEqualTo(1);
338+
assertThat(CodedOutputStream.computeUInt64SizeNoTag(1L << i)).isEqualTo(1);
339+
}
340+
for (; i < 14; i++) {
341+
assertThat(CodedOutputStream.computeInt32SizeNoTag(1 << i)).isEqualTo(2);
342+
assertThat(CodedOutputStream.computeUInt32SizeNoTag(1 << i)).isEqualTo(2);
343+
assertThat(CodedOutputStream.computeUInt64SizeNoTag(1L << i)).isEqualTo(2);
344+
}
345+
for (; i < 21; i++) {
346+
assertThat(CodedOutputStream.computeInt32SizeNoTag(1 << i)).isEqualTo(3);
347+
assertThat(CodedOutputStream.computeUInt32SizeNoTag(1 << i)).isEqualTo(3);
348+
assertThat(CodedOutputStream.computeUInt64SizeNoTag(1L << i)).isEqualTo(3);
349+
}
350+
for (; i < 28; i++) {
351+
assertThat(CodedOutputStream.computeInt32SizeNoTag(1 << i)).isEqualTo(4);
352+
assertThat(CodedOutputStream.computeUInt32SizeNoTag(1 << i)).isEqualTo(4);
353+
assertThat(CodedOutputStream.computeUInt64SizeNoTag(1L << i)).isEqualTo(4);
354+
}
355+
for (; i < 31; i++) {
356+
assertThat(CodedOutputStream.computeInt32SizeNoTag(1 << i)).isEqualTo(5);
357+
assertThat(CodedOutputStream.computeUInt32SizeNoTag(1 << i)).isEqualTo(5);
358+
assertThat(CodedOutputStream.computeUInt64SizeNoTag(1L << i)).isEqualTo(5);
359+
}
360+
for (; i < 32; i++) {
361+
assertThat(CodedOutputStream.computeInt32SizeNoTag(1 << i)).isEqualTo(10);
362+
assertThat(CodedOutputStream.computeUInt32SizeNoTag(1 << i)).isEqualTo(5);
363+
assertThat(CodedOutputStream.computeUInt64SizeNoTag(1L << i)).isEqualTo(5);
364+
}
365+
for (; i < 35; i++) {
366+
assertThat(CodedOutputStream.computeUInt64SizeNoTag(1L << i)).isEqualTo(5);
367+
}
368+
for (; i < 42; i++) {
369+
assertThat(CodedOutputStream.computeUInt64SizeNoTag(1L << i)).isEqualTo(6);
370+
}
371+
for (; i < 49; i++) {
372+
assertThat(CodedOutputStream.computeUInt64SizeNoTag(1L << i)).isEqualTo(7);
373+
}
374+
for (; i < 56; i++) {
375+
assertThat(CodedOutputStream.computeUInt64SizeNoTag(1L << i)).isEqualTo(8);
376+
}
377+
for (; i < 63; i++) {
378+
assertThat(CodedOutputStream.computeUInt64SizeNoTag(1L << i)).isEqualTo(9);
379+
}
380+
}
381+
382+
@Test
383+
public void computeTagSize() {
384+
assertThat(CodedOutputStream.computeTagSize(0)).isEqualTo(1);
385+
int i;
386+
for (i = 0; i < 4; i++) {
387+
assertThat(CodedOutputStream.computeTagSize(1 << i)).isEqualTo(1);
388+
}
389+
for (; i < 11; i++) {
390+
assertThat(CodedOutputStream.computeTagSize(1 << i)).isEqualTo(2);
391+
}
392+
for (; i < 18; i++) {
393+
assertThat(CodedOutputStream.computeTagSize(1 << i)).isEqualTo(3);
394+
}
395+
for (; i < 25; i++) {
396+
assertThat(CodedOutputStream.computeTagSize(1 << i)).isEqualTo(4);
397+
}
398+
for (; i < 29; i++) {
399+
assertThat(CodedOutputStream.computeTagSize(1 << i)).isEqualTo(5);
400+
}
401+
// Invalid tags
402+
assertThat(CodedOutputStream.computeTagSize((1 << 30) + 1)).isEqualTo(1);
403+
}
404+
330405
/** Tests writing a whole message with every field type. */
331406
@Test
332407
public void testWriteWholeMessage() throws Exception {

0 commit comments

Comments
 (0)