platform/external/busybox.git - Unnamed repository; edit this file 'description' to name the repository.

1 /* vi: set sw=4 ts=4: */
2 /*
3  * Utility routines.
4  *
5  * Copyright (C) 2010 Denys Vlasenko
6  *
7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8  */
9
10 #include "libbb.h"
11
12 /* gcc 4.2.1 optimizes rotr64 better with inline than with macro
13  * (for rotX32, there is no difference). Why? My guess is that
14  * macro requires clever common subexpression elimination heuristics
15  * in gcc, while inline basically forces it to happen.
16  */
17 //#define rotl32(x,n) (((x) << (n)) | ((x) >> (32 - (n))))
18 static ALWAYS_INLINE uint32_t rotl32(uint32_t x, unsigned n)
19 {
20 	return (x << n) | (x >> (32 - n));
21 }
22 //#define rotr32(x,n) (((x) >> (n)) | ((x) << (32 - (n))))
23 static ALWAYS_INLINE uint32_t rotr32(uint32_t x, unsigned n)
24 {
25 	return (x >> n) | (x << (32 - n));
26 }
27 /* rotr64 in needed for sha512 only: */
28 //#define rotr64(x,n) (((x) >> (n)) | ((x) << (64 - (n))))
29 static ALWAYS_INLINE uint64_t rotr64(uint64_t x, unsigned n)
30 {
31 	return (x >> n) | (x << (64 - n));
32 }
33
34 /* rotl64 only used for sha3 currently */
35 static ALWAYS_INLINE uint64_t rotl64(uint64_t x, unsigned n)
36 {
37 	return (x << n) | (x >> (64 - n));
38 }
39
40 /* Feed data through a temporary buffer.
41  * The internal buffer remembers previous data until it has 64
42  * bytes worth to pass on.
43  */
44 static void FAST_FUNC common64_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
45 {
46 	unsigned bufpos = ctx->total64 & 63;
47
48 	ctx->total64 += len;
49
50 	while (1) {
51 		unsigned remaining = 64 - bufpos;
52 		if (remaining > len)
53 			remaining = len;
54 		/* Copy data into aligned buffer */
55 		memcpy(ctx->wbuffer + bufpos, buffer, remaining);
56 		len -= remaining;
57 		buffer = (const char *)buffer + remaining;
58 		bufpos += remaining;
59 		/* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
60 		bufpos -= 64;
61 		if (bufpos != 0)
62 			break;
63 		/* Buffer is filled up, process it */
64 		ctx->process_block(ctx);
65 		/*bufpos = 0; - already is */
66 	}
67 }
68
69 /* Process the remaining bytes in the buffer */
70 static void FAST_FUNC common64_end(md5_ctx_t *ctx, int swap_needed)
71 {
72 	unsigned bufpos = ctx->total64 & 63;
73 	/* Pad the buffer to the next 64-byte boundary with 0x80,0,0,0... */
74 	ctx->wbuffer[bufpos++] = 0x80;
75
76 	/* This loop iterates either once or twice, no more, no less */
77 	while (1) {
78 		unsigned remaining = 64 - bufpos;
79 		memset(ctx->wbuffer + bufpos, 0, remaining);
80 		/* Do we have enough space for the length count? */
81 		if (remaining >= 8) {
82 			/* Store the 64-bit counter of bits in the buffer */
83 			uint64_t t = ctx->total64 << 3;
84 			if (swap_needed)
85 				t = bb_bswap_64(t);
86 			/* wbuffer is suitably aligned for this */
87 			*(bb__aliased_uint64_t *) (&ctx->wbuffer[64 - 8]) = t;
88 		}
89 		ctx->process_block(ctx);
90 		if (remaining >= 8)
91 			break;
92 		bufpos = 0;
93 	}
94 }
95
96
97 /*
98  * Compute MD5 checksum of strings according to the
99  * definition of MD5 in RFC 1321 from April 1992.
100  *
101  * Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.
102  *
103  * Copyright (C) 1995-1999 Free Software Foundation, Inc.
104  * Copyright (C) 2001 Manuel Novoa III
105  * Copyright (C) 2003 Glenn L. McGrath
106  * Copyright (C) 2003 Erik Andersen
107  *
108  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
109  */
110
111 /* 0: fastest, 3: smallest */
112 #if CONFIG_MD5_SMALL < 0
113 # define MD5_SMALL 0
114 #elif CONFIG_MD5_SMALL > 3
115 # define MD5_SMALL 3
116 #else
117 # define MD5_SMALL CONFIG_MD5_SMALL
118 #endif
119
120 /* These are the four functions used in the four steps of the MD5 algorithm
121  * and defined in the RFC 1321.  The first function is a little bit optimized
122  * (as found in Colin Plumbs public domain implementation).
123  * #define FF(b, c, d) ((b & c) | (~b & d))
124  */
125 #undef FF
126 #undef FG
127 #undef FH
128 #undef FI
129 #define FF(b, c, d) (d ^ (b & (c ^ d)))
130 #define FG(b, c, d) FF(d, b, c)
131 #define FH(b, c, d) (b ^ c ^ d)
132 #define FI(b, c, d) (c ^ (b | ~d))
133
134 /* Hash a single block, 64 bytes long and 4-byte aligned */
135 static void FAST_FUNC md5_process_block64(md5_ctx_t *ctx)
136 {
137 #if MD5_SMALL > 0
138 	/* Before we start, one word to the strange constants.
139 	   They are defined in RFC 1321 as
140 	   T[i] = (int)(4294967296.0 * fabs(sin(i))), i=1..64
141 	 */
142 	static const uint32_t C_array[] = {
143 		/* round 1 */
144 		0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
145 		0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
146 		0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
147 		0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
148 		/* round 2 */
149 		0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
150 		0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
151 		0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
152 		0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
153 		/* round 3 */
154 		0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
155 		0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
156 		0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x4881d05,
157 		0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
158 		/* round 4 */
159 		0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
160 		0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
161 		0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
162 		0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391
163 	};
164 	static const char P_array[] ALIGN1 = {
165 # if MD5_SMALL > 1
166 		0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, /* 1 */
167 # endif
168 		1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, /* 2 */
169 		5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2, /* 3 */
170 		0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9  /* 4 */
171 	};
172 #endif
173 	uint32_t *words = (void*) ctx->wbuffer;
174 	uint32_t A = ctx->hash[0];
175 	uint32_t B = ctx->hash[1];
176 	uint32_t C = ctx->hash[2];
177 	uint32_t D = ctx->hash[3];
178
179 #if MD5_SMALL >= 2  /* 2 or 3 */
180
181 	static const char S_array[] ALIGN1 = {
182 		7, 12, 17, 22,
183 		5, 9, 14, 20,
184 		4, 11, 16, 23,
185 		6, 10, 15, 21
186 	};
187 	const uint32_t *pc;
188 	const char *pp;
189 	const char *ps;
190 	int i;
191 	uint32_t temp;
192
193 	if (BB_BIG_ENDIAN)
194 		for (i = 0; i < 16; i++)
195 			words[i] = SWAP_LE32(words[i]);
196
197 # if MD5_SMALL == 3
198 	pc = C_array;
199 	pp = P_array;
200 	ps = S_array - 4;
201
202 	for (i = 0; i < 64; i++) {
203 		if ((i & 0x0f) == 0)
204 			ps += 4;
205 		temp = A;
206 		switch (i >> 4) {
207 		case 0:
208 			temp += FF(B, C, D);
209 			break;
210 		case 1:
211 			temp += FG(B, C, D);
212 			break;
213 		case 2:
214 			temp += FH(B, C, D);
215 			break;
216 		case 3:
217 			temp += FI(B, C, D);
218 		}
219 		temp += words[(int) (*pp++)] + *pc++;
220 		temp = rotl32(temp, ps[i & 3]);
221 		temp += B;
222 		A = D;
223 		D = C;
224 		C = B;
225 		B = temp;
226 	}
227 # else  /* MD5_SMALL == 2 */
228 	pc = C_array;
229 	pp = P_array;
230 	ps = S_array;
231
232 	for (i = 0; i < 16; i++) {
233 		temp = A + FF(B, C, D) + words[(int) (*pp++)] + *pc++;
234 		temp = rotl32(temp, ps[i & 3]);
235 		temp += B;
236 		A = D;
237 		D = C;
238 		C = B;
239 		B = temp;
240 	}
241 	ps += 4;
242 	for (i = 0; i < 16; i++) {
243 		temp = A + FG(B, C, D) + words[(int) (*pp++)] + *pc++;
244 		temp = rotl32(temp, ps[i & 3]);
245 		temp += B;
246 		A = D;
247 		D = C;
248 		C = B;
249 		B = temp;
250 	}
251 	ps += 4;
252 	for (i = 0; i < 16; i++) {
253 		temp = A + FH(B, C, D) + words[(int) (*pp++)] + *pc++;
254 		temp = rotl32(temp, ps[i & 3]);
255 		temp += B;
256 		A = D;
257 		D = C;
258 		C = B;
259 		B = temp;
260 	}
261 	ps += 4;
262 	for (i = 0; i < 16; i++) {
263 		temp = A + FI(B, C, D) + words[(int) (*pp++)] + *pc++;
264 		temp = rotl32(temp, ps[i & 3]);
265 		temp += B;
266 		A = D;
267 		D = C;
268 		C = B;
269 		B = temp;
270 	}
271 # endif
272 	/* Add checksum to the starting values */
273 	ctx->hash[0] += A;
274 	ctx->hash[1] += B;
275 	ctx->hash[2] += C;
276 	ctx->hash[3] += D;
277
278 #else  /* MD5_SMALL == 0 or 1 */
279
280 	uint32_t A_save = A;
281 	uint32_t B_save = B;
282 	uint32_t C_save = C;
283 	uint32_t D_save = D;
284 # if MD5_SMALL == 1
285 	const uint32_t *pc;
286 	const char *pp;
287 	int i;
288 # endif
289
290 	/* First round: using the given function, the context and a constant
291 	   the next context is computed.  Because the algorithm's processing
292 	   unit is a 32-bit word and it is determined to work on words in
293 	   little endian byte order we perhaps have to change the byte order
294 	   before the computation.  To reduce the work for the next steps
295 	   we save swapped words in WORDS array.  */
296 # undef OP
297 # define OP(a, b, c, d, s, T) \
298 	do { \
299 		a += FF(b, c, d) + (*words IF_BIG_ENDIAN(= SWAP_LE32(*words))) + T; \
300 		words++; \
301 		a = rotl32(a, s); \
302 		a += b; \
303 	} while (0)
304
305 	/* Round 1 */
306 # if MD5_SMALL == 1
307 	pc = C_array;
308 	for (i = 0; i < 4; i++) {
309 		OP(A, B, C, D, 7, *pc++);
310 		OP(D, A, B, C, 12, *pc++);
311 		OP(C, D, A, B, 17, *pc++);
312 		OP(B, C, D, A, 22, *pc++);
313 	}
314 # else
315 	OP(A, B, C, D, 7, 0xd76aa478);
316 	OP(D, A, B, C, 12, 0xe8c7b756);
317 	OP(C, D, A, B, 17, 0x242070db);
318 	OP(B, C, D, A, 22, 0xc1bdceee);
319 	OP(A, B, C, D, 7, 0xf57c0faf);
320 	OP(D, A, B, C, 12, 0x4787c62a);
321 	OP(C, D, A, B, 17, 0xa8304613);
322 	OP(B, C, D, A, 22, 0xfd469501);
323 	OP(A, B, C, D, 7, 0x698098d8);
324 	OP(D, A, B, C, 12, 0x8b44f7af);
325 	OP(C, D, A, B, 17, 0xffff5bb1);
326 	OP(B, C, D, A, 22, 0x895cd7be);
327 	OP(A, B, C, D, 7, 0x6b901122);
328 	OP(D, A, B, C, 12, 0xfd987193);
329 	OP(C, D, A, B, 17, 0xa679438e);
330 	OP(B, C, D, A, 22, 0x49b40821);
331 # endif
332 	words -= 16;
333
334 	/* For the second to fourth round we have the possibly swapped words
335 	   in WORDS.  Redefine the macro to take an additional first
336 	   argument specifying the function to use.  */
337 # undef OP
338 # define OP(f, a, b, c, d, k, s, T) \
339 	do { \
340 		a += f(b, c, d) + words[k] + T; \
341 		a = rotl32(a, s); \
342 		a += b; \
343 	} while (0)
344
345 	/* Round 2 */
346 # if MD5_SMALL == 1
347 	pp = P_array;
348 	for (i = 0; i < 4; i++) {
349 		OP(FG, A, B, C, D, (int) (*pp++), 5, *pc++);
350 		OP(FG, D, A, B, C, (int) (*pp++), 9, *pc++);
351 		OP(FG, C, D, A, B, (int) (*pp++), 14, *pc++);
352 		OP(FG, B, C, D, A, (int) (*pp++), 20, *pc++);
353 	}
354 # else
355 	OP(FG, A, B, C, D, 1, 5, 0xf61e2562);
356 	OP(FG, D, A, B, C, 6, 9, 0xc040b340);
357 	OP(FG, C, D, A, B, 11, 14, 0x265e5a51);
358 	OP(FG, B, C, D, A, 0, 20, 0xe9b6c7aa);
359 	OP(FG, A, B, C, D, 5, 5, 0xd62f105d);
360 	OP(FG, D, A, B, C, 10, 9, 0x02441453);
361 	OP(FG, C, D, A, B, 15, 14, 0xd8a1e681);
362 	OP(FG, B, C, D, A, 4, 20, 0xe7d3fbc8);
363 	OP(FG, A, B, C, D, 9, 5, 0x21e1cde6);
364 	OP(FG, D, A, B, C, 14, 9, 0xc33707d6);
365 	OP(FG, C, D, A, B, 3, 14, 0xf4d50d87);
366 	OP(FG, B, C, D, A, 8, 20, 0x455a14ed);
367 	OP(FG, A, B, C, D, 13, 5, 0xa9e3e905);
368 	OP(FG, D, A, B, C, 2, 9, 0xfcefa3f8);
369 	OP(FG, C, D, A, B, 7, 14, 0x676f02d9);
370 	OP(FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
371 # endif
372
373 	/* Round 3 */
374 # if MD5_SMALL == 1
375 	for (i = 0; i < 4; i++) {
376 		OP(FH, A, B, C, D, (int) (*pp++), 4, *pc++);
377 		OP(FH, D, A, B, C, (int) (*pp++), 11, *pc++);
378 		OP(FH, C, D, A, B, (int) (*pp++), 16, *pc++);
379 		OP(FH, B, C, D, A, (int) (*pp++), 23, *pc++);
380 	}
381 # else
382 	OP(FH, A, B, C, D, 5, 4, 0xfffa3942);
383 	OP(FH, D, A, B, C, 8, 11, 0x8771f681);
384 	OP(FH, C, D, A, B, 11, 16, 0x6d9d6122);
385 	OP(FH, B, C, D, A, 14, 23, 0xfde5380c);
386 	OP(FH, A, B, C, D, 1, 4, 0xa4beea44);
387 	OP(FH, D, A, B, C, 4, 11, 0x4bdecfa9);
388 	OP(FH, C, D, A, B, 7, 16, 0xf6bb4b60);
389 	OP(FH, B, C, D, A, 10, 23, 0xbebfbc70);
390 	OP(FH, A, B, C, D, 13, 4, 0x289b7ec6);
391 	OP(FH, D, A, B, C, 0, 11, 0xeaa127fa);
392 	OP(FH, C, D, A, B, 3, 16, 0xd4ef3085);
393 	OP(FH, B, C, D, A, 6, 23, 0x04881d05);
394 	OP(FH, A, B, C, D, 9, 4, 0xd9d4d039);
395 	OP(FH, D, A, B, C, 12, 11, 0xe6db99e5);
396 	OP(FH, C, D, A, B, 15, 16, 0x1fa27cf8);
397 	OP(FH, B, C, D, A, 2, 23, 0xc4ac5665);
398 # endif
399
400 	/* Round 4 */
401 # if MD5_SMALL == 1
402 	for (i = 0; i < 4; i++) {
403 		OP(FI, A, B, C, D, (int) (*pp++), 6, *pc++);
404 		OP(FI, D, A, B, C, (int) (*pp++), 10, *pc++);
405 		OP(FI, C, D, A, B, (int) (*pp++), 15, *pc++);
406 		OP(FI, B, C, D, A, (int) (*pp++), 21, *pc++);
407 	}
408 # else
409 	OP(FI, A, B, C, D, 0, 6, 0xf4292244);
410 	OP(FI, D, A, B, C, 7, 10, 0x432aff97);
411 	OP(FI, C, D, A, B, 14, 15, 0xab9423a7);
412 	OP(FI, B, C, D, A, 5, 21, 0xfc93a039);
413 	OP(FI, A, B, C, D, 12, 6, 0x655b59c3);
414 	OP(FI, D, A, B, C, 3, 10, 0x8f0ccc92);
415 	OP(FI, C, D, A, B, 10, 15, 0xffeff47d);
416 	OP(FI, B, C, D, A, 1, 21, 0x85845dd1);
417 	OP(FI, A, B, C, D, 8, 6, 0x6fa87e4f);
418 	OP(FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
419 	OP(FI, C, D, A, B, 6, 15, 0xa3014314);
420 	OP(FI, B, C, D, A, 13, 21, 0x4e0811a1);
421 	OP(FI, A, B, C, D, 4, 6, 0xf7537e82);
422 	OP(FI, D, A, B, C, 11, 10, 0xbd3af235);
423 	OP(FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
424 	OP(FI, B, C, D, A, 9, 21, 0xeb86d391);
425 # undef OP
426 # endif
427 	/* Add checksum to the starting values */
428 	ctx->hash[0] = A_save + A;
429 	ctx->hash[1] = B_save + B;
430 	ctx->hash[2] = C_save + C;
431 	ctx->hash[3] = D_save + D;
432 #endif
433 }
434 #undef FF
435 #undef FG
436 #undef FH
437 #undef FI
438
439 /* Initialize structure containing state of computation.
440  * (RFC 1321, 3.3: Step 3)
441  */
442 void FAST_FUNC md5_begin(md5_ctx_t *ctx)
443 {
444 	ctx->hash[0] = 0x67452301;
445 	ctx->hash[1] = 0xefcdab89;
446 	ctx->hash[2] = 0x98badcfe;
447 	ctx->hash[3] = 0x10325476;
448 	ctx->total64 = 0;
449 	ctx->process_block = md5_process_block64;
450 }
451
452 /* Used also for sha1 and sha256 */
453 void FAST_FUNC md5_hash(md5_ctx_t *ctx, const void *buffer, size_t len)
454 {
455 	common64_hash(ctx, buffer, len);
456 }
457
458 /* Process the remaining bytes in the buffer and put result from CTX
459  * in first 16 bytes following RESBUF.  The result is always in little
460  * endian byte order, so that a byte-wise output yields to the wanted
461  * ASCII representation of the message digest.
462  */
463 void FAST_FUNC md5_end(md5_ctx_t *ctx, void *resbuf)
464 {
465 	/* MD5 stores total in LE, need to swap on BE arches: */
466 	common64_end(ctx, /*swap_needed:*/ BB_BIG_ENDIAN);
467
468 	/* The MD5 result is in little endian byte order */
469 	if (BB_BIG_ENDIAN) {
470 		ctx->hash[0] = SWAP_LE32(ctx->hash[0]);
471 		ctx->hash[1] = SWAP_LE32(ctx->hash[1]);
472 		ctx->hash[2] = SWAP_LE32(ctx->hash[2]);
473 		ctx->hash[3] = SWAP_LE32(ctx->hash[3]);
474 	}
475
476 	memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * 4);
477 }
478
479
480 /*
481  * SHA1 part is:
482  * Copyright 2007 Rob Landley <rob@landley.net>
483  *
484  * Based on the public domain SHA-1 in C by Steve Reid <steve@edmweb.com>
485  * from http://www.mirrors.wiretapped.net/security/cryptography/hashes/sha1/
486  *
487  * Licensed under GPLv2, see file LICENSE in this source tree.
488  *
489  * ---------------------------------------------------------------------------
490  *
491  * SHA256 and SHA512 parts are:
492  * Released into the Public Domain by Ulrich Drepper <drepper@redhat.com>.
493  * Shrank by Denys Vlasenko.
494  *
495  * ---------------------------------------------------------------------------
496  *
497  * The best way to test random blocksizes is to go to coreutils/md5_sha1_sum.c
498  * and replace "4096" with something like "2000 + time(NULL) % 2097",
499  * then rebuild and compare "shaNNNsum bigfile" results.
500  */
501
502 static void FAST_FUNC sha1_process_block64(sha1_ctx_t *ctx)
503 {
504 	static const uint32_t rconsts[] = {
505 		0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6
506 	};
507 	int i, j;
508 	int cnt;
509 	uint32_t W[16+16];
510 	uint32_t a, b, c, d, e;
511
512 	/* On-stack work buffer frees up one register in the main loop
513 	 * which otherwise will be needed to hold ctx pointer */
514 	for (i = 0; i < 16; i++)
515 		W[i] = W[i+16] = SWAP_BE32(((uint32_t*)ctx->wbuffer)[i]);
516
517 	a = ctx->hash[0];
518 	b = ctx->hash[1];
519 	c = ctx->hash[2];
520 	d = ctx->hash[3];
521 	e = ctx->hash[4];
522
523 	/* 4 rounds of 20 operations each */
524 	cnt = 0;
525 	for (i = 0; i < 4; i++) {
526 		j = 19;
527 		do {
528 			uint32_t work;
529
530 			work = c ^ d;
531 			if (i == 0) {
532 				work = (work & b) ^ d;
533 				if (j <= 3)
534 					goto ge16;
535 				/* Used to do SWAP_BE32 here, but this
536 				 * requires ctx (see comment above) */
537 				work += W[cnt];
538 			} else {
539 				if (i == 2)
540 					work = ((b | c) & d) | (b & c);
541 				else /* i = 1 or 3 */
542 					work ^= b;
543  ge16:
544 				W[cnt] = W[cnt+16] = rotl32(W[cnt+13] ^ W[cnt+8] ^ W[cnt+2] ^ W[cnt], 1);
545 				work += W[cnt];
546 			}
547 			work += e + rotl32(a, 5) + rconsts[i];
548
549 			/* Rotate by one for next time */
550 			e = d;
551 			d = c;
552 			c = /* b = */ rotl32(b, 30);
553 			b = a;
554 			a = work;
555 			cnt = (cnt + 1) & 15;
556 		} while (--j >= 0);
557 	}
558
559 	ctx->hash[0] += a;
560 	ctx->hash[1] += b;
561 	ctx->hash[2] += c;
562 	ctx->hash[3] += d;
563 	ctx->hash[4] += e;
564 }
565
566 /* Constants for SHA512 from FIPS 180-2:4.2.3.
567  * SHA256 constants from FIPS 180-2:4.2.2
568  * are the most significant half of first 64 elements
569  * of the same array.
570  */
571 static const uint64_t sha_K[80] = {
572 	0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
573 	0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
574 	0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
575 	0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
576 	0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
577 	0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
578 	0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
579 	0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
580 	0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
581 	0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
582 	0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
583 	0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
584 	0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
585 	0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
586 	0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
587 	0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
588 	0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
589 	0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
590 	0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
591 	0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
592 	0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
593 	0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
594 	0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
595 	0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
596 	0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
597 	0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
598 	0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
599 	0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
600 	0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
601 	0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
602 	0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
603 	0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
604 	0xca273eceea26619cULL, 0xd186b8c721c0c207ULL, /* [64]+ are used for sha512 only */
605 	0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
606 	0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
607 	0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
608 	0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
609 	0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
610 	0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
611 	0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
612 };
613
614 #undef Ch
615 #undef Maj
616 #undef S0
617 #undef S1
618 #undef R0
619 #undef R1
620
621 static void FAST_FUNC sha256_process_block64(sha256_ctx_t *ctx)
622 {
623 	unsigned t;
624 	uint32_t W[64], a, b, c, d, e, f, g, h;
625 	const uint32_t *words = (uint32_t*) ctx->wbuffer;
626
627 	/* Operators defined in FIPS 180-2:4.1.2.  */
628 #define Ch(x, y, z) ((x & y) ^ (~x & z))
629 #define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
630 #define S0(x) (rotr32(x, 2) ^ rotr32(x, 13) ^ rotr32(x, 22))
631 #define S1(x) (rotr32(x, 6) ^ rotr32(x, 11) ^ rotr32(x, 25))
632 #define R0(x) (rotr32(x, 7) ^ rotr32(x, 18) ^ (x >> 3))
633 #define R1(x) (rotr32(x, 17) ^ rotr32(x, 19) ^ (x >> 10))
634
635 	/* Compute the message schedule according to FIPS 180-2:6.2.2 step 2.  */
636 	for (t = 0; t < 16; ++t)
637 		W[t] = SWAP_BE32(words[t]);
638 	for (/*t = 16*/; t < 64; ++t)
639 		W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
640
641 	a = ctx->hash[0];
642 	b = ctx->hash[1];
643 	c = ctx->hash[2];
644 	d = ctx->hash[3];
645 	e = ctx->hash[4];
646 	f = ctx->hash[5];
647 	g = ctx->hash[6];
648 	h = ctx->hash[7];
649
650 	/* The actual computation according to FIPS 180-2:6.2.2 step 3.  */
651 	for (t = 0; t < 64; ++t) {
652 		/* Need to fetch upper half of sha_K[t]
653 		 * (I hope compiler is clever enough to just fetch
654 		 * upper half)
655 		 */
656 		uint32_t K_t = sha_K[t] >> 32;
657 		uint32_t T1 = h + S1(e) + Ch(e, f, g) + K_t + W[t];
658 		uint32_t T2 = S0(a) + Maj(a, b, c);
659 		h = g;
660 		g = f;
661 		f = e;
662 		e = d + T1;
663 		d = c;
664 		c = b;
665 		b = a;
666 		a = T1 + T2;
667 	}
668 #undef Ch
669 #undef Maj
670 #undef S0
671 #undef S1
672 #undef R0
673 #undef R1
674 	/* Add the starting values of the context according to FIPS 180-2:6.2.2
675 	   step 4.  */
676 	ctx->hash[0] += a;
677 	ctx->hash[1] += b;
678 	ctx->hash[2] += c;
679 	ctx->hash[3] += d;
680 	ctx->hash[4] += e;
681 	ctx->hash[5] += f;
682 	ctx->hash[6] += g;
683 	ctx->hash[7] += h;
684 }
685
686 static void FAST_FUNC sha512_process_block128(sha512_ctx_t *ctx)
687 {
688 	unsigned t;
689 	uint64_t W[80];
690 	/* On i386, having assignments here (not later as sha256 does)
691 	 * produces 99 bytes smaller code with gcc 4.3.1
692 	 */
693 	uint64_t a = ctx->hash[0];
694 	uint64_t b = ctx->hash[1];
695 	uint64_t c = ctx->hash[2];
696 	uint64_t d = ctx->hash[3];
697 	uint64_t e = ctx->hash[4];
698 	uint64_t f = ctx->hash[5];
699 	uint64_t g = ctx->hash[6];
700 	uint64_t h = ctx->hash[7];
701 	const uint64_t *words = (uint64_t*) ctx->wbuffer;
702
703 	/* Operators defined in FIPS 180-2:4.1.2.  */
704 #define Ch(x, y, z) ((x & y) ^ (~x & z))
705 #define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
706 #define S0(x) (rotr64(x, 28) ^ rotr64(x, 34) ^ rotr64(x, 39))
707 #define S1(x) (rotr64(x, 14) ^ rotr64(x, 18) ^ rotr64(x, 41))
708 #define R0(x) (rotr64(x, 1) ^ rotr64(x, 8) ^ (x >> 7))
709 #define R1(x) (rotr64(x, 19) ^ rotr64(x, 61) ^ (x >> 6))
710
711 	/* Compute the message schedule according to FIPS 180-2:6.3.2 step 2.  */
712 	for (t = 0; t < 16; ++t)
713 		W[t] = SWAP_BE64(words[t]);
714 	for (/*t = 16*/; t < 80; ++t)
715 		W[t] = R1(W[t - 2]) + W[t - 7] + R0(W[t - 15]) + W[t - 16];
716
717 	/* The actual computation according to FIPS 180-2:6.3.2 step 3.  */
718 	for (t = 0; t < 80; ++t) {
719 		uint64_t T1 = h + S1(e) + Ch(e, f, g) + sha_K[t] + W[t];
720 		uint64_t T2 = S0(a) + Maj(a, b, c);
721 		h = g;
722 		g = f;
723 		f = e;
724 		e = d + T1;
725 		d = c;
726 		c = b;
727 		b = a;
728 		a = T1 + T2;
729 	}
730 #undef Ch
731 #undef Maj
732 #undef S0
733 #undef S1
734 #undef R0
735 #undef R1
736 	/* Add the starting values of the context according to FIPS 180-2:6.3.2
737 	   step 4.  */
738 	ctx->hash[0] += a;
739 	ctx->hash[1] += b;
740 	ctx->hash[2] += c;
741 	ctx->hash[3] += d;
742 	ctx->hash[4] += e;
743 	ctx->hash[5] += f;
744 	ctx->hash[6] += g;
745 	ctx->hash[7] += h;
746 }
747
748
749 void FAST_FUNC sha1_begin(sha1_ctx_t *ctx)
750 {
751 	ctx->hash[0] = 0x67452301;
752 	ctx->hash[1] = 0xefcdab89;
753 	ctx->hash[2] = 0x98badcfe;
754 	ctx->hash[3] = 0x10325476;
755 	ctx->hash[4] = 0xc3d2e1f0;
756 	ctx->total64 = 0;
757 	ctx->process_block = sha1_process_block64;
758 }
759
760 static const uint32_t init256[] = {
761 	0,
762 	0,
763 	0x6a09e667,
764 	0xbb67ae85,
765 	0x3c6ef372,
766 	0xa54ff53a,
767 	0x510e527f,
768 	0x9b05688c,
769 	0x1f83d9ab,
770 	0x5be0cd19,
771 };
772 static const uint32_t init512_lo[] = {
773 	0,
774 	0,
775 	0xf3bcc908,
776 	0x84caa73b,
777 	0xfe94f82b,
778 	0x5f1d36f1,
779 	0xade682d1,
780 	0x2b3e6c1f,
781 	0xfb41bd6b,
782 	0x137e2179,
783 };
784
785 /* Initialize structure containing state of computation.
786    (FIPS 180-2:5.3.2)  */
787 void FAST_FUNC sha256_begin(sha256_ctx_t *ctx)
788 {
789 	memcpy(&ctx->total64, init256, sizeof(init256));
790 	/*ctx->total64 = 0; - done by prepending two 32-bit zeros to init256 */
791 	ctx->process_block = sha256_process_block64;
792 }
793
794 /* Initialize structure containing state of computation.
795    (FIPS 180-2:5.3.3)  */
796 void FAST_FUNC sha512_begin(sha512_ctx_t *ctx)
797 {
798 	int i;
799 	/* Two extra iterations zero out ctx->total64[2] */
800 	uint64_t *tp = ctx->total64;
801 	for (i = 0; i < 2+8; i++)
802 		tp[i] = ((uint64_t)(init256[i]) << 32) + init512_lo[i];
803 	/*ctx->total64[0] = ctx->total64[1] = 0; - already done */
804 }
805
806 void FAST_FUNC sha512_hash(sha512_ctx_t *ctx, const void *buffer, size_t len)
807 {
808 	unsigned bufpos = ctx->total64[0] & 127;
809 	unsigned remaining;
810
811 	/* First increment the byte count.  FIPS 180-2 specifies the possible
812 	   length of the file up to 2^128 _bits_.
813 	   We compute the number of _bytes_ and convert to bits later.  */
814 	ctx->total64[0] += len;
815 	if (ctx->total64[0] < len)
816 		ctx->total64[1]++;
817 #if 0
818 	remaining = 128 - bufpos;
819
820 	/* Hash whole blocks */
821 	while (len >= remaining) {
822 		memcpy(ctx->wbuffer + bufpos, buffer, remaining);
823 		buffer = (const char *)buffer + remaining;
824 		len -= remaining;
825 		remaining = 128;
826 		bufpos = 0;
827 		sha512_process_block128(ctx);
828 	}
829
830 	/* Save last, partial blosk */
831 	memcpy(ctx->wbuffer + bufpos, buffer, len);
832 #else
833 	while (1) {
834 		remaining = 128 - bufpos;
835 		if (remaining > len)
836 			remaining = len;
837 		/* Copy data into aligned buffer */
838 		memcpy(ctx->wbuffer + bufpos, buffer, remaining);
839 		len -= remaining;
840 		buffer = (const char *)buffer + remaining;
841 		bufpos += remaining;
842 		/* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
843 		bufpos -= 128;
844 		if (bufpos != 0)
845 			break;
846 		/* Buffer is filled up, process it */
847 		sha512_process_block128(ctx);
848 		/*bufpos = 0; - already is */
849 	}
850 #endif
851 }
852
853 /* Used also for sha256 */
854 void FAST_FUNC sha1_end(sha1_ctx_t *ctx, void *resbuf)
855 {
856 	unsigned hash_size;
857
858 	/* SHA stores total in BE, need to swap on LE arches: */
859 	common64_end(ctx, /*swap_needed:*/ BB_LITTLE_ENDIAN);
860
861 	hash_size = (ctx->process_block == sha1_process_block64) ? 5 : 8;
862 	/* This way we do not impose alignment constraints on resbuf: */
863 	if (BB_LITTLE_ENDIAN) {
864 		unsigned i;
865 		for (i = 0; i < hash_size; ++i)
866 			ctx->hash[i] = SWAP_BE32(ctx->hash[i]);
867 	}
868 	memcpy(resbuf, ctx->hash, sizeof(ctx->hash[0]) * hash_size);
869 }
870
871 void FAST_FUNC sha512_end(sha512_ctx_t *ctx, void *resbuf)
872 {
873 	unsigned bufpos = ctx->total64[0] & 127;
874
875 	/* Pad the buffer to the next 128-byte boundary with 0x80,0,0,0... */
876 	ctx->wbuffer[bufpos++] = 0x80;
877
878 	while (1) {
879 		unsigned remaining = 128 - bufpos;
880 		memset(ctx->wbuffer + bufpos, 0, remaining);
881 		if (remaining >= 16) {
882 			/* Store the 128-bit counter of bits in the buffer in BE format */
883 			uint64_t t;
884 			t = ctx->total64[0] << 3;
885 			t = SWAP_BE64(t);
886 			*(bb__aliased_uint64_t *) (&ctx->wbuffer[128 - 8]) = t;
887 			t = (ctx->total64[1] << 3) | (ctx->total64[0] >> 61);
888 			t = SWAP_BE64(t);
889 			*(bb__aliased_uint64_t *) (&ctx->wbuffer[128 - 16]) = t;
890 		}
891 		sha512_process_block128(ctx);
892 		if (remaining >= 16)
893 			break;
894 		bufpos = 0;
895 	}
896
897 	if (BB_LITTLE_ENDIAN) {
898 		unsigned i;
899 		for (i = 0; i < ARRAY_SIZE(ctx->hash); ++i)
900 			ctx->hash[i] = SWAP_BE64(ctx->hash[i]);
901 	}
902 	memcpy(resbuf, ctx->hash, sizeof(ctx->hash));
903 }
904
905
906 /*
907  * The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
908  * Michael Peeters and Gilles Van Assche. For more information, feedback or
909  * questions, please refer to our website: http://keccak.noekeon.org/
910  *
911  * Implementation by Ronny Van Keer,
912  * hereby denoted as "the implementer".
913  *
914  * To the extent possible under law, the implementer has waived all copyright
915  * and related or neighboring rights to the source code in this file.
916  * http://creativecommons.org/publicdomain/zero/1.0/
917  *
918  * Busybox modifications (C) Lauri Kasanen, under the GPLv2.
919  */
920
921 #if CONFIG_SHA3_SMALL < 0
922 # define SHA3_SMALL 0
923 #elif CONFIG_SHA3_SMALL > 1
924 # define SHA3_SMALL 1
925 #else
926 # define SHA3_SMALL CONFIG_SHA3_SMALL
927 #endif
928
929 enum {
930 	SHA3_IBLK_BYTES = 72, /* 576 bits / 8 */
931 };
932
933 /*
934  * In the crypto literature this function is usually called Keccak-f().
935  */
936 static void sha3_process_block72(uint64_t *state)
937 {
938 	enum { NROUNDS = 24 };
939
940 	/* Elements should be 64-bit, but top half is always zero or 0x80000000.
941 	 * We encode 63rd bits in a separate word below.
942 	 * Same is true for 31th bits, which lets us use 16-bit table instead of 64-bit.
943 	 * The speed penalty is lost in the noise.
944 	 */
945 	static const uint16_t IOTA_CONST[NROUNDS] = {
946 		0x0001,
947 		0x8082,
948 		0x808a,
949 		0x8000,
950 		0x808b,
951 		0x0001,
952 		0x8081,
953 		0x8009,
954 		0x008a,
955 		0x0088,
956 		0x8009,
957 		0x000a,
958 		0x808b,
959 		0x008b,
960 		0x8089,
961 		0x8003,
962 		0x8002,
963 		0x0080,
964 		0x800a,
965 		0x000a,
966 		0x8081,
967 		0x8080,
968 		0x0001,
969 		0x8008,
970 	};
971 	/* bit for CONST[0] is in msb: 0011 0011 0000 0111 1101 1101 */
972 	const uint32_t IOTA_CONST_bit63 = (uint32_t)(0x3307dd00);
973 	/* bit for CONST[0] is in msb: 0001 0110 0011 1000 0001 1011 */
974 	const uint32_t IOTA_CONST_bit31 = (uint32_t)(0x16381b00);
975
976 	static const uint8_t ROT_CONST[24] = {
977 		1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
978 		27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44,
979 	};
980 	static const uint8_t PI_LANE[24] = {
981 		10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
982 		15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1,
983 	};
984 	/*static const uint8_t MOD5[10] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, };*/
985
986 	unsigned x, y;
987 	unsigned round;
988
989 	if (BB_BIG_ENDIAN) {
990 		for (x = 0; x < 25; x++) {
991 			state[x] = SWAP_LE64(state[x]);
992 		}
993 	}
994
995 	for (round = 0; round < NROUNDS; ++round) {
996 		/* Theta */
997 		{
998 			uint64_t BC[10];
999 			for (x = 0; x < 5; ++x) {
1000 				BC[x + 5] = BC[x] = state[x]
1001 					^ state[x + 5] ^ state[x + 10]
1002 					^ state[x + 15]	^ state[x + 20];
1003 			}
1004 			/* Using 2x5 vector above eliminates the need to use
1005 			 * BC[MOD5[x+N]] trick below to fetch BC[(x+N) % 5],
1006 			 * and the code is a bit _smaller_.
1007 			 */
1008 			for (x = 0; x < 5; ++x) {
1009 				uint64_t temp = BC[x + 4] ^ rotl64(BC[x + 1], 1);
1010 				state[x] ^= temp;
1011 				state[x + 5] ^= temp;
1012 				state[x + 10] ^= temp;
1013 				state[x + 15] ^= temp;
1014 				state[x + 20] ^= temp;
1015 			}
1016 		}
1017
1018 		/* Rho Pi */
1019 		if (SHA3_SMALL) {
1020 			uint64_t t1 = state[1];
1021 			for (x = 0; x < 24; ++x) {
1022 				uint64_t t0 = state[PI_LANE[x]];
1023 				state[PI_LANE[x]] = rotl64(t1, ROT_CONST[x]);
1024 				t1 = t0;
1025 			}
1026 		} else {
1027 			/* Especially large benefit for 32-bit arch (75% faster):
1028 			 * 64-bit rotations by non-constant usually are SLOW on those.
1029 			 * We resort to unrolling here.
1030 			 * This optimizes out PI_LANE[] and ROT_CONST[],
1031 			 * but generates 300-500 more bytes of code.
1032 			 */
1033 			uint64_t t0;
1034 			uint64_t t1 = state[1];
1035 #define RhoPi_twice(x) \
1036 	t0 = state[PI_LANE[x  ]]; \
1037 	state[PI_LANE[x  ]] = rotl64(t1, ROT_CONST[x  ]); \
1038 	t1 = state[PI_LANE[x+1]]; \
1039 	state[PI_LANE[x+1]] = rotl64(t0, ROT_CONST[x+1]);
1040 			RhoPi_twice(0); RhoPi_twice(2);
1041 			RhoPi_twice(4); RhoPi_twice(6);
1042 			RhoPi_twice(8); RhoPi_twice(10);
1043 			RhoPi_twice(12); RhoPi_twice(14);
1044 			RhoPi_twice(16); RhoPi_twice(18);
1045 			RhoPi_twice(20); RhoPi_twice(22);
1046 #undef RhoPi_twice
1047 		}
1048
1049 		/* Chi */
1050 		for (y = 0; y <= 20; y += 5) {
1051 			uint64_t BC0, BC1, BC2, BC3, BC4;
1052 			BC0 = state[y + 0];
1053 			BC1 = state[y + 1];
1054 			BC2 = state[y + 2];
1055 			state[y + 0] = BC0 ^ ((~BC1) & BC2);
1056 			BC3 = state[y + 3];
1057 			state[y + 1] = BC1 ^ ((~BC2) & BC3);
1058 			BC4 = state[y + 4];
1059 			state[y + 2] = BC2 ^ ((~BC3) & BC4);
1060 			state[y + 3] = BC3 ^ ((~BC4) & BC0);
1061 			state[y + 4] = BC4 ^ ((~BC0) & BC1);
1062 		}
1063
1064 		/* Iota */
1065 		state[0] ^= IOTA_CONST[round]
1066 			| (uint32_t)((IOTA_CONST_bit31 << round) & 0x80000000)
1067 			| (uint64_t)((IOTA_CONST_bit63 << round) & 0x80000000) << 32;
1068 	}
1069
1070 	if (BB_BIG_ENDIAN) {
1071 		for (x = 0; x < 25; x++) {
1072 			state[x] = SWAP_LE64(state[x]);
1073 		}
1074 	}
1075 }
1076
1077 void FAST_FUNC sha3_begin(sha3_ctx_t *ctx)
1078 {
1079 	memset(ctx, 0, sizeof(*ctx));
1080 }
1081
1082 void FAST_FUNC sha3_hash(sha3_ctx_t *ctx, const void *buffer, size_t len)
1083 {
1084 #if SHA3_SMALL
1085 	const uint8_t *data = buffer;
1086 	unsigned bufpos = ctx->bytes_queued;
1087
1088 	while (1) {
1089 		unsigned remaining = SHA3_IBLK_BYTES - bufpos;
1090 		if (remaining > len)
1091 			remaining = len;
1092 		len -= remaining;
1093 		/* XOR data into buffer */
1094 		while (remaining != 0) {
1095 			uint8_t *buf = (uint8_t*)ctx->state;
1096 			buf[bufpos] ^= *data++;
1097 			bufpos++;
1098 			remaining--;
1099 		}
1100 		/* Clever way to do "if (bufpos != N) break; ... ; bufpos = 0;" */
1101 		bufpos -= SHA3_IBLK_BYTES;
1102 		if (bufpos != 0)
1103 			break;
1104 		/* Buffer is filled up, process it */
1105 		sha3_process_block72(ctx->state);
1106 		/*bufpos = 0; - already is */
1107 	}
1108 	ctx->bytes_queued = bufpos + SHA3_IBLK_BYTES;
1109 #else
1110 	/* +50 bytes code size, but a bit faster because of long-sized XORs */
1111 	const uint8_t *data = buffer;
1112 	unsigned bufpos = ctx->bytes_queued;
1113
1114 	/* If already data in queue, continue queuing first */
1115 	while (len != 0 && bufpos != 0) {
1116 		uint8_t *buf = (uint8_t*)ctx->state;
1117 		buf[bufpos] ^= *data++;
1118 		len--;
1119 		bufpos++;
1120 		if (bufpos == SHA3_IBLK_BYTES) {
1121 			bufpos = 0;
1122 			goto do_block;
1123 		}
1124 	}
1125
1126 	/* Absorb complete blocks */
1127 	while (len >= SHA3_IBLK_BYTES) {
1128 		/* XOR data onto beginning of state[].
1129 		 * We try to be efficient - operate one word at a time, not byte.
1130 		 * Careful wrt unaligned access: can't just use "*(long*)data"!
1131 		 */
1132 		unsigned count = SHA3_IBLK_BYTES / sizeof(long);
1133 		long *buf = (long*)ctx->state;
1134 		do {
1135 			long v;
1136 			move_from_unaligned_long(v, (long*)data);
1137 			*buf++ ^= v;
1138 			data += sizeof(long);
1139 		} while (--count);
1140 		len -= SHA3_IBLK_BYTES;
1141  do_block:
1142 		sha3_process_block72(ctx->state);
1143 	}
1144
1145 	/* Queue remaining data bytes */
1146 	while (len != 0) {
1147 		uint8_t *buf = (uint8_t*)ctx->state;
1148 		buf[bufpos] ^= *data++;
1149 		bufpos++;
1150 		len--;
1151 	}
1152
1153 	ctx->bytes_queued = bufpos;
1154 #endif
1155 }
1156
1157 void FAST_FUNC sha3_end(sha3_ctx_t *ctx, void *resbuf)
1158 {
1159 	/* Padding */
1160 	uint8_t *buf = (uint8_t*)ctx->state;
1161 	buf[ctx->bytes_queued]   ^= 1;
1162 	buf[SHA3_IBLK_BYTES - 1] ^= 0x80;
1163
1164 	sha3_process_block72(ctx->state);
1165
1166 	/* Output */
1167 	memcpy(resbuf, ctx->state, 64);
1168 }
1169