Projects : keksum : genesis

keccak.c

Dir - Raw

1/* KECCAK permutation and sponge construction
2 * J. Welsh, May 2019
3 *
4 * Based on Guido Bertoni, Joan Daemen, Michael Peeters and Gilles van Assche
5 * (2011): The KECCAK reference, Version 3.0.
6 */
7
8#include <stdint.h>
9#include "io.h"
10
11/* State is arranged in a 5 x 5 x W bit array, indexed as (x,y,z).
12 * The 1D section for a given (x,y) is called a lane and packed in a lane_t.
13 * The 1D section for a given (x,z) is called a column.
14 * The 2D section for a given z is called a slice.
15 */
16
17typedef uint64_t lane_t;
18#define L 6
19#define W (1 << L) /* lane size = 64 bits */
20#define B 25*W /* permutation width = 1600 bits */
21/* we require L be at least 3 (W=8, B=200) to avoid sub-byte lane size */
22
23#define ROUNDS (12 + 2*L)
24
25#define mask(x) (((unsigned) (x)) & (W-1))
26#define rot(v,r) (v = v<<mask(r) | v>>mask(W-mask(r)))
27
28static lane_t state[5][5];
29/* physical storage order can be swapped as a performance tweak */
30#define a(x,y) (state[x][y])
31
32/* Galois linear feedback shift register, output in low bit:
33 * (x^t mod x^8 + x^6 + x^5 + x^4 + 1) mod x in GF(2)(x)
34 * (I'd forgotten my grade-school algebra but the Handbook of Applied
35 * Cryptography plus http://archive.is/hhvKKJ (nayuki.io) and some written
36 * exercises cleared this up for me.) */
37static uint8_t lfsr_step(uint8_t rstate) {
38 /* polynomial xor'd in when high (overflowing) bit is set */
39 uint8_t gate = ((int8_t) rstate) >> 7;
40 return (rstate << 1) ^ (gate & 0x71); /* bits 6,5,4,0 */
41}
42
43/* Column parity diffusion */
44static void theta(void) {
45 /* LUT for x+1 mod 5 (not used on secret bits!) */
46 uint8_t const inc_mod5[8] = {1,2,3,4,0,1,2,3};
47 unsigned x, y;
48 lane_t c[5], d;
49
50 for (x=0; x<5; ++x)
51 c[x] = a(x,0) ^ a(x,1) ^ a(x,2) ^ a(x,3) ^ a(x,4);
52
53 for (x=0; x<5; ++x) {
54 d = c[inc_mod5[x]];
55 rot(d,1);
56 d ^= c[inc_mod5[x+3]]; /* x+3+1 == x-1 mod 5 */
57 for (y=0; y<5; ++y)
58 a(x,y) ^= d;
59 }
60}
61
62/* Inter-slice dispersion */
63static void rho(void) {
64 /* precomputed offsets from Table 2.1 */
65 rot(a(0,1), 36);
66 rot(a(0,2), 3);
67 rot(a(0,3), 105);
68 rot(a(0,4), 210);
69
70 rot(a(1,0), 1);
71 rot(a(1,1), 300);
72 rot(a(1,2), 10);
73 rot(a(1,3), 45);
74 rot(a(1,4), 66);
75
76 rot(a(2,0), 190);
77 rot(a(2,1), 6);
78 rot(a(2,2), 171);
79 rot(a(2,3), 15);
80 rot(a(2,4), 253);
81
82 rot(a(3,0), 28);
83 rot(a(3,1), 55);
84 rot(a(3,2), 153);
85 rot(a(3,3), 21);
86 rot(a(3,4), 120);
87
88 rot(a(4,0), 91);
89 rot(a(4,1), 276);
90 rot(a(4,2), 231);
91 rot(a(4,3), 136);
92 rot(a(4,4), 78);
93}
94
95/* Slicewise transposition by matrix
96 * [ 0 1 ]
97 * [ 2 3 ]
98 * For all indices x, y, z:
99 * a'(y, 2x+3y mod 5, z) = a(x, y, z)
100 */
101static void pi(void) {
102 /* shuffle in-place by swapping alternating temporaries with successive
103 * destinations */
104 lane_t c, d;
105#define even(x,y) (c=a(x,y), a(x,y)=d)
106#define odd(x,y) (d=a(x,y), a(x,y)=c)
107 /* a[0][0] doesn't move */
108 d=a(0,1); /* 2*0 + 3*1 = 3 -> 3 */
109 even(1,3); /* 2+9=11 -> 1 */
110 odd (3,1); /* 6+3=9 -> 4 */
111 even(1,4); /* 2+12=14 */
112 odd (4,4); /* 8+12=20 */
113 even(4,0); /* 8+0=8 */
114 odd (0,3); /* 0+9=9 */
115 even(3,4); /* 6+12=18 */
116 odd (4,3); /* 8+9=17 */
117 even(3,2); /* 6+6=12 */
118 odd (2,2); /* 4+6=10 */
119 even(2,0); /* 4+0=4 */
120 odd (0,4); /* 0+12=12 */
121 even(4,2); /* 8+6=14 */
122 odd (2,4); /* 4+12=16 */
123 even(4,1); /* 8+3=11 */
124 odd (1,1); /* 2+3=5 */
125 even(1,0); /* 2+0=2 */
126 odd (0,2); /* 0+6=6 */
127 even(2,1); /* 4+3=7 */
128 odd (1,2); /* 2+6=8 */
129 even(2,3); /* 4+9=13 */
130 odd (3,3); /* 6+9=15 */
131 even(3,0); /* 6+0=6 */
132 a(0,1)=c; /* And we're back home in 24 steps, lovely! */
133}
134
135/* Nonlinear row mapping */
136static void chi(void) {
137 unsigned y;
138 for (y=0; y<5; ++y) {
139 lane_t c0=a(0,y), c1=a(1,y), c2=a(2,y), c3=a(3,y), c4=a(4,y);
140
141 a(0,y) = c0 ^ (~c1 & c2);
142 a(1,y) = c1 ^ (~c2 & c3);
143 a(2,y) = c2 ^ (~c3 & c4);
144 a(3,y) = c3 ^ (~c4 & c0);
145 a(4,y) = c4 ^ (~c0 & c1);
146 }
147}
148
149/* Round constants for disrupting z symmetry
150 *
151 * Mix LFSR bitstream sequentially into first lane with z index given by
152 * 2^j - 1
153 * LFSR advances 7 steps per round regardless of how many "j" are needed to
154 * fill the lane, corresponding to the maximum 64-bit lane size.
155 */
156static uint8_t iota(uint8_t rstate) {
157 unsigned j;
158 for (j=0; j<=L; ++j) {
159 a(0,0) ^= ((lane_t) (rstate & 1)) << ((1 << j) - 1);
160 rstate = lfsr_step(rstate);
161 }
162 for (; j<7; ++j)
163 rstate = lfsr_step(rstate);
164 return rstate;
165}
166
167#ifdef TEST
168
169#include <stdio.h>
170#include "testvectors.h"
171
172static int test_fail;
173static unsigned test, test_step;
174
175static void checkpoint(unsigned round, char const *step) {
176 int fail = 0;
177 unsigned x, y, i=0;
178 printf("test=%u round=%u %s ", test, round, step);
179 for (y=0; y<5; ++y)
180 for (x=0; x<5; ++x, ++i) {
181 assert(i<25);
182 assert(test_step<TEST_STEPS);
183 if (a(x,y) != tests[test][test_step][i]) {
184 test_fail = fail = 1;
185 printf("FAIL x=%u y=%u (0x%lx != 0x%lx)\n",
186 x, y, a(x,y),
187 tests[test][test_step][i]);
188 }
189 }
190 if (!fail) puts("PASS");
191 ++test_step;
192}
193#else
194#define checkpoint(r,s)
195#endif
196
197static void keccakf(void) {
198 unsigned round;
199 uint8_t rstate=1;
200 for (round=0; round<ROUNDS; ++round) {
201 theta(); checkpoint(round,"theta");
202 rho(); checkpoint(round,"rho");
203 pi(); checkpoint(round,"pi");
204 chi(); checkpoint(round,"chi");
205 rstate = iota(rstate); checkpoint(round,"iota");
206 }
207}
208
209static void reset(void) {
210 unsigned x, y;
211 for (x=0; x<5; ++x)
212 for (y=0; y<5; ++y)
213 a(x,y) = 0;
214}
215
216static void load(uint8_t const *block, unsigned len) {
217 /* Keeping bytes in lane little-endian, independent of machine. This
218 * harmonizes with (rumored) Keccak convention for interpretation of
219 * octet stream as bitstream. For a big-endian interpretation we could
220 * flip the byte load/store order and the direction of rotations.
221 *
222 * Ordering of lanes is x-major due to the specified mapping of
223 * bitstring to state coordinates: s[w(5y+x)+z] = a[x][y][z] */
224 unsigned x, y, z, i=0;
225 for (y=0; y<5; ++y)
226 for (x=0; x<5; ++x)
227 for (z=0; z!=W; z+=8, ++i)
228 if (i==len) return;
229 else a(x,y) ^= ((lane_t) block[i]) << z;
230}
231
232static void store_hex(char *buf, unsigned block_len) {
233 static char const hex[] = "0123456789abcdef";
234 unsigned x, y, z, len = 2*block_len, i = 0;
235 for (y=0; y<5; ++y)
236 for (x=0; x<5; ++x)
237 for (z=0; z!=W; z+=8) {
238 uint8_t byte;
239 if (i==len) return;
240 byte = a(x,y) >> z;
241 /* Digits within the byte rendered big-endian,
242 * matching common practice but unfortunately
243 * not bitstream order. */
244 buf[i++] = hex[byte>>4];
245 buf[i++] = hex[byte&15];
246 }
247}
248
249static void extract(unsigned len) {
250 static char buf[2*B/8];
251 assert(2*len <= sizeof buf);
252 store_hex(buf,len);
253 write_all(1,buf,2*len);
254}
255
256/* Stream octets from stdin until EOF, then write hex to stdout. */
257void sponge(unsigned capacity, size_t out_bits) {
258 static uint8_t buf[B/8];
259 unsigned rate = B - capacity,
260 len = rate/8,
261 n;
262 size_t out_len = out_bits/8;
263 assert(W == 8*sizeof(lane_t));
264 assert(0 < rate && rate < B);
265 assert(!(rate%8));
266 assert(len <= sizeof buf);
267
268 /* absorb */
269 reset();
270 while ((n = read_all(0,buf,len)) == len) {
271 load(buf,len);
272 keccakf();
273 }
274
275 /* pad (1000...1) */
276 if (n < len-1) {
277 buf[n] = 0x01; /* first bit (little-endian) */
278 for (++n; n < len-1; ++n)
279 buf[n] = 0;
280 buf[n] = 0x80; /* last bit (little-endian) */
281 }
282 else {
283 assert(n == len-1);
284 buf[n] = 0x81; /* first and last bits */
285 }
286 load(buf,len);
287 keccakf();
288
289 /* squeeze */
290 for (; out_len > len; out_len -= len) {
291 extract(len);
292 keccakf();
293 }
294 extract(out_len);
295}
296
297#ifdef TEST
298int main() {
299 for (test=0; test<NTESTS; ++test) {
300 unsigned x, y, i=0;
301 /* initial state */
302 for (y=0; y<5; ++y)
303 for (x=0; x<5; ++x, ++i) {
304 assert(i<25);
305 a(x,y) = tests[test][0][i];
306 }
307 test_step = 1;
308 keccakf();
309 }
310 return test_fail;
311}
312#endif