Skip to content

Commit c778f96

Browse files
WOnder93herbertx
authored andcommitted
crypto: lrw - Optimize tweak computation
This patch rewrites the tweak computation to a slightly simpler method that performs less bswaps. Based on performance measurements the new code seems to provide slightly better performance than the old one. PERFORMANCE MEASUREMENTS (x86_64) Performed using: https://gitlab.com/omos/linux-crypto-bench Crypto driver used: lrw(ecb-aes-aesni) Before: ALGORITHM KEY (b) DATA (B) TIME ENC (ns) TIME DEC (ns) lrw(aes) 256 64 204 286 lrw(aes) 320 64 227 203 lrw(aes) 384 64 208 204 lrw(aes) 256 512 441 439 lrw(aes) 320 512 456 455 lrw(aes) 384 512 469 483 lrw(aes) 256 4096 2136 2190 lrw(aes) 320 4096 2161 2213 lrw(aes) 384 4096 2295 2369 lrw(aes) 256 16384 7692 7868 lrw(aes) 320 16384 8230 8691 lrw(aes) 384 16384 8971 8813 lrw(aes) 256 32768 15336 15560 lrw(aes) 320 32768 16410 16346 lrw(aes) 384 32768 18023 17465 After: ALGORITHM KEY (b) DATA (B) TIME ENC (ns) TIME DEC (ns) lrw(aes) 256 64 200 203 lrw(aes) 320 64 202 204 lrw(aes) 384 64 204 205 lrw(aes) 256 512 415 415 lrw(aes) 320 512 432 440 lrw(aes) 384 512 449 451 lrw(aes) 256 4096 1838 1995 lrw(aes) 320 4096 2123 1980 lrw(aes) 384 4096 2100 2119 lrw(aes) 256 16384 7183 6954 lrw(aes) 320 16384 7844 7631 lrw(aes) 384 16384 8256 8126 lrw(aes) 256 32768 14772 14484 lrw(aes) 320 32768 15281 15431 lrw(aes) 384 32768 16469 16293 Signed-off-by: Ondrej Mosnacek <[email protected]> Signed-off-by: Herbert Xu <[email protected]>
1 parent dc6d6d5 commit c778f96

File tree

1 file changed

+37
-24
lines changed

1 file changed

+37
-24
lines changed

crypto/lrw.c

Lines changed: 37 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -120,27 +120,28 @@ static int setkey(struct crypto_skcipher *parent, const u8 *key,
120120
return 0;
121121
}
122122

123-
static inline void inc(be128 *iv)
124-
{
125-
be64_add_cpu(&iv->b, 1);
126-
if (!iv->b)
127-
be64_add_cpu(&iv->a, 1);
128-
}
129-
130-
/* this returns the number of consequative 1 bits starting
131-
* from the right, get_index128(00 00 00 00 00 00 ... 00 00 10 FB) = 2 */
132-
static inline int get_index128(be128 *block)
123+
/*
124+
* Returns the number of trailing '1' bits in the words of the counter, which is
125+
* represented by 4 32-bit words, arranged from least to most significant.
126+
* At the same time, increments the counter by one.
127+
*
128+
* For example:
129+
*
130+
* u32 counter[4] = { 0xFFFFFFFF, 0x1, 0x0, 0x0 };
131+
* int i = next_index(&counter);
132+
* // i == 33, counter == { 0x0, 0x2, 0x0, 0x0 }
133+
*/
134+
static int next_index(u32 *counter)
133135
{
134-
int x;
135-
__be32 *p = (__be32 *) block;
136+
int i, res = 0;
136137

137-
for (p += 3, x = 0; x < 128; p--, x += 32) {
138-
u32 val = be32_to_cpup(p);
139-
140-
if (!~val)
141-
continue;
142-
143-
return x + ffz(val);
138+
for (i = 0; i < 4; i++) {
139+
if (counter[i] + 1 != 0) {
140+
res += ffz(counter[i]++);
141+
break;
142+
}
143+
counter[i] = 0;
144+
res += 32;
144145
}
145146

146147
/*
@@ -214,8 +215,9 @@ static int pre_crypt(struct skcipher_request *req)
214215
struct scatterlist *sg;
215216
unsigned cryptlen;
216217
unsigned offset;
217-
be128 *iv;
218218
bool more;
219+
__be32 *iv;
220+
u32 counter[4];
219221
int err;
220222

221223
subreq = &rctx->subreq;
@@ -230,7 +232,12 @@ static int pre_crypt(struct skcipher_request *req)
230232
cryptlen, req->iv);
231233

232234
err = skcipher_walk_virt(&w, subreq, false);
233-
iv = w.iv;
235+
iv = (__be32 *)w.iv;
236+
237+
counter[0] = be32_to_cpu(iv[3]);
238+
counter[1] = be32_to_cpu(iv[2]);
239+
counter[2] = be32_to_cpu(iv[1]);
240+
counter[3] = be32_to_cpu(iv[0]);
234241

235242
while (w.nbytes) {
236243
unsigned int avail = w.nbytes;
@@ -247,10 +254,16 @@ static int pre_crypt(struct skcipher_request *req)
247254
/* T <- I*Key2, using the optimization
248255
* discussed in the specification */
249256
be128_xor(&rctx->t, &rctx->t,
250-
&ctx->mulinc[get_index128(iv)]);
251-
inc(iv);
257+
&ctx->mulinc[next_index(counter)]);
252258
} while ((avail -= bs) >= bs);
253259

260+
if (w.nbytes == w.total) {
261+
iv[0] = cpu_to_be32(counter[3]);
262+
iv[1] = cpu_to_be32(counter[2]);
263+
iv[2] = cpu_to_be32(counter[1]);
264+
iv[3] = cpu_to_be32(counter[0]);
265+
}
266+
254267
err = skcipher_walk_done(&w, avail);
255268
}
256269

@@ -548,7 +561,7 @@ static int create(struct crypto_template *tmpl, struct rtattr **tb)
548561
inst->alg.base.cra_priority = alg->base.cra_priority;
549562
inst->alg.base.cra_blocksize = LRW_BLOCK_SIZE;
550563
inst->alg.base.cra_alignmask = alg->base.cra_alignmask |
551-
(__alignof__(u64) - 1);
564+
(__alignof__(__be32) - 1);
552565

553566
inst->alg.ivsize = LRW_BLOCK_SIZE;
554567
inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg) +

0 commit comments

Comments
 (0)