Grain128-Aead - Light-weight cipherGrain is a Light Weight Stream Cipher and was written by Martin Hell, Thomas Johansson and Willi Meier. It has a relatively low gate count, power consumption and memory. In its AEAD form it has a 128-bit key, and has two shift registers (LFSR - Linear Feedback Shift Register and NFSR - Non-linear Feedback Shift Register) and a nonlinear output function. The gate equivalent (GE) relates to the level of parallelization defined. For the lowest level of parallization (s=1), we can achieve a GE of 3,638, and with the maximum parallelization level (s=32), we have a GE of 12,110. When running a maximum speed for the lowest level of parallelization we get a throughput of 560 Mbps, a power drain of 3.6mW and an area of 5,258 \(\mu m^2\). With the highest level of parallelization we can achieve a throughput of 10.59 Gbps. |
Outline
Overall we have two C files and a few header files. To compile we can use the gcc compiler:
gcc main.c keccak.c -o elephant.exe
An outline of the C code is [1]:
/* * The reference implementation of Grain128-AEAD. * * Note that this implementation is *not* meant to be run in software. * It is written to be as close to a hardware implementation as possible, * hence it should not be used in benchmarks due to bad performance. * * Since the optimized version reads the LSB first of a byte, we here perform * that transformation of all user provided buffers in order to generate * the same test vectors. * * Jonathan Sönnerup * 2019 */ #include "grain128aead.h" #include "api.h" void string2ByteArray(char* input, unsigned char *output); unsigned char grain_round; unsigned char swapsb(unsigned char n); #define CRYPTO_BYTES 64 void string2hexString(unsigned char* input, int clen, char* output); void *hextobyte(char *hexstring, unsigned char* bytearray ); int main (int argc, char *argv[]) { unsigned long long mlen; unsigned long long clen; unsigned char plaintext[CRYPTO_BYTES]; unsigned char cipher[CRYPTO_BYTES]; unsigned char npub[CRYPTO_NPUBBYTES]=""; unsigned char ad[CRYPTO_ABYTES]=""; unsigned char nsec[CRYPTO_ABYTES]=""; unsigned char key[CRYPTO_KEYBYTES]; char pl[CRYPTO_BYTES]="hello"; char chex[CRYPTO_BYTES]=""; char keyhex[2*CRYPTO_KEYBYTES+1]="0123456789ABCDEF0123456789ABCDEF"; char nonce[2*CRYPTO_NPUBBYTES+1]="000000000000111111111111"; char add[CRYPTO_ABYTES]=""; if( argc > 1 ) { strcpy(pl,argv[1]); } if( argc > 2 ) { strcpy(keyhex,argv[2]); } if( argc > 3 ) { strcpy(nonce,argv[3]); } if( argc > 4 ) { strcpy(add,argv[4]); } if (strlen(keyhex)!=32) { printf("Key length needs to be 16 bytes"); return(0); } strcpy(plaintext,pl); strcpy(ad,add); hextobyte(keyhex,key); hextobyte(nonce,npub); printf("Elephant light-weight cipher\n"); printf("Plaintext: %s\n",plaintext); printf("Key: %s\n",keyhex); printf("Nonce: %s\n",nonce); printf("Additional Information: %s\n\n",ad); printf("Plaintext: %s\n",plaintext); int ret = crypto_aead_encrypt(cipher,&clen,plaintext,strlen(plaintext),ad,strlen(ad),nsec,npub,key); string2hexString(cipher,clen,chex); printf("Cipher: %s, Len: %llu\n",chex, clen); ret = crypto_aead_decrypt(plaintext,&mlen,nsec,cipher,clen,ad,strlen(ad),npub,key); plaintext[mlen]='\0'; printf("Plaintext: %s, Len: %llu\n",plaintext, mlen); if (ret==0) { printf("Success!\n"); } return 0; } void init_grain(grain_state *grain, const unsigned char *key, const unsigned char *iv) { // expand the packed bytes and place one bit per array cell (like a flip flop in HW) for (int i = 0; i < 12; i++) { for (int j = 0; j < 8; j++) { grain->lfsr[8 * i + j] = (iv[i] & (1 << (7-j))) >> (7-j); } } for (int i = 96; i < 127; i++) { grain->lfsr[i] = 1; } grain->lfsr[127] = 0; for (int i = 0; i < 16; i++) { for (int j = 0; j < 8; j++) { grain->nfsr[8 * i + j] = (key[i] & (1 << (7-j))) >> (7-j); } } for (int i = 0; i < 64; i++) { grain->auth_acc[i] = 0; grain->auth_sr[i] = 0; } /* initialize grain and skip output */ grain_round = INIT; for (int i = 0; i < 256; i++) { next_z(grain, 0); } grain_round = ADDKEY; unsigned char key_idx = 0; /* inititalize the accumulator and shift reg. using the first 64 bits */ for (int i = 0; i < 8; i++) { for (int j = 0; j < 8; j++) { unsigned char addkey_fb = (key[key_idx] & (1 << (7-j))) >> (7-j); grain->auth_acc[8 * i + j] = next_z(grain, addkey_fb); } key_idx++; } for (int i = 0; i < 8; i++) { for (int j = 0; j < 8; j++) { unsigned char addkey_fb = (key[key_idx] & (1 << (7-j))) >> (7-j); grain->auth_sr[8 * i + j] = next_z(grain, addkey_fb); } key_idx++; } grain_round = NORMAL; } void init_data(grain_data *data, const unsigned char *msg, unsigned long long msg_len) { // allocate enough space for message, including the padding bit 1 (byte 0x80) data->message = (unsigned char *) calloc(8 * msg_len + 1, 1); data->msg_len = 8 * msg_len + 1; for (unsigned long long i = 0; i < msg_len; i++) { for (int j = 0; j < 8; j++) { data->message[8 * i + j] = (msg[i] & (1 << (7-j))) >> (7-j); } } // always pad data with the bit 1 (byte 0x80) data->message[data->msg_len - 1] = 1; } unsigned char next_lfsr_fb(grain_state *grain) { /* f(x) = 1 + x^32 + x^47 + x^58 + x^90 + x^121 + x^128 */ return grain->lfsr[96] ^ grain->lfsr[81] ^ grain->lfsr[70] ^ grain->lfsr[38] ^ grain->lfsr[7] ^ grain->lfsr[0]; } unsigned char next_nfsr_fb(grain_state *grain) { return grain->nfsr[96] ^ grain->nfsr[91] ^ grain->nfsr[56] ^ grain->nfsr[26] ^ grain->nfsr[0] ^ (grain->nfsr[84] & grain->nfsr[68]) ^ (grain->nfsr[67] & grain->nfsr[3]) ^ (grain->nfsr[65] & grain->nfsr[61]) ^ (grain->nfsr[59] & grain->nfsr[27]) ^ (grain->nfsr[48] & grain->nfsr[40]) ^ (grain->nfsr[18] & grain->nfsr[17]) ^ (grain->nfsr[13] & grain->nfsr[11]) ^ (grain->nfsr[82] & grain->nfsr[78] & grain->nfsr[70]) ^ (grain->nfsr[25] & grain->nfsr[24] & grain->nfsr[22]) ^ (grain->nfsr[95] & grain->nfsr[93] & grain->nfsr[92] & grain->nfsr[88]); } unsigned char next_h(grain_state *grain) { // h(x) = x0x1 + x2x3 + x4x5 + x6x7 + x0x4x8 #define x0 grain->nfsr[12] // bi+12 #define x1 grain->lfsr[8] // si+8 #define x2 grain->lfsr[13] // si+13 #define x3 grain->lfsr[20] // si+20 #define x4 grain->nfsr[95] // bi+95 #define x5 grain->lfsr[42] // si+42 #define x6 grain->lfsr[60] // si+60 #define x7 grain->lfsr[79] // si+79 #define x8 grain->lfsr[94] // si+94 unsigned char h_out = (x0 & x1) ^ (x2 & x3) ^ (x4 & x5) ^ (x6 & x7) ^ (x0 & x4 & x8); return h_out; } unsigned char shift(unsigned char fsr[128], unsigned char fb) { unsigned char out = fsr[0]; for (int i = 0; i < 127; i++) { fsr[i] = fsr[i+1]; } fsr[127] = fb; return out; } void auth_shift(unsigned char sr[64], unsigned char fb) { for (int i = 0; i < 63; i++) { sr[i] = sr[i+1]; } sr[63] = fb; } void accumulate(grain_state *grain) { for (int i = 0; i < 64; i++) { grain->auth_acc[i] ^= grain->auth_sr[i]; } } unsigned char next_z(grain_state *grain, unsigned char keybit) { unsigned char lfsr_fb = next_lfsr_fb(grain); unsigned char nfsr_fb = next_nfsr_fb(grain); unsigned char h_out = next_h(grain); /* y = h + s_{i+93} + sum(b_{i+j}), j \in A */ unsigned char A[] = {2, 15, 36, 45, 64, 73, 89}; unsigned char nfsr_tmp = 0; for (int i = 0; i < 7; i++) { nfsr_tmp ^= grain->nfsr[A[i]]; } unsigned char y = h_out ^ grain->lfsr[93] ^ nfsr_tmp; unsigned char lfsr_out; /* feedback y if we are in the initialization instance */ if (grain_round == INIT) { lfsr_out = shift(grain->lfsr, lfsr_fb ^ y); shift(grain->nfsr, nfsr_fb ^ lfsr_out ^ y); } else if (grain_round == ADDKEY) { lfsr_out = shift(grain->lfsr, lfsr_fb ^ keybit); shift(grain->nfsr, nfsr_fb ^ lfsr_out); } else if (grain_round == NORMAL) { lfsr_out = shift(grain->lfsr, lfsr_fb); shift(grain->nfsr, nfsr_fb ^ lfsr_out); } return y; } int encode_der(unsigned long long len, unsigned char **der) { unsigned long long len_tmp; int der_len = 0; if (len < 128) { *der = malloc(1); (*der)[0] = swapsb((unsigned char) len); return 1; } len_tmp = len; do { len_tmp >>= 8; der_len++; } while (len_tmp != 0); // one extra byte to describe the number of bytes used *der = malloc(der_len + 1); (*der)[0] = swapsb(0x80 | der_len); len_tmp = len; for (int i = der_len; i > 0; i--) { (*der)[i] = swapsb(len_tmp & 0xff); len_tmp >>= 8; } return der_len + 1; } unsigned char swapsb(unsigned char n) { // swaps significant bit unsigned char val = 0; for (int i = 0; i < 8; i++) { val |= ((n >> i) & 1) << (7-i); } return val; } int crypto_aead_encrypt(unsigned char *c, unsigned long long *clen, const unsigned char *mp, unsigned long long mlen, const unsigned char *adp, unsigned long long adlen, const unsigned char *nsec, const unsigned char *npubp, const unsigned char *kp ) { /* This implementation assumes that the most significant bit is processed first, * in a byte. The optimized version however, processes the lsb first. * In order to give the same test vectors, we here change the interpretation of the bits. */ unsigned char *m = malloc(mlen); unsigned char *ad = malloc(adlen); unsigned char npub[12]; unsigned char k[16]; for (unsigned long long i = 0; i < mlen; i++) { m[i] = swapsb(mp[i]); } for (unsigned long long i = 0; i < adlen; i++) { ad[i] = swapsb(adp[i]); } for (unsigned long long i = 0; i < 12; i++) { npub[i] = swapsb(npubp[i]); } for (unsigned long long i = 0; i < 16; i++) { k[i] = swapsb(kp[i]); } grain_state grain; grain_data data; init_grain(&grain, k, npub); init_data(&data, m, mlen); *clen = 0; // authenticate adlen by prepeding to ad, using DER encoding unsigned char *ader; int aderlen = encode_der(adlen, &ader); // append ad to buffer ader = realloc(ader, aderlen + adlen); memcpy(ader + aderlen, ad, adlen); unsigned long long ad_cnt = 0; unsigned char adval = 0; /* accumulate tag for associated data only */ for (unsigned long long i = 0; i < aderlen + adlen; i++) { /* every second bit is used for keystream, the others for MAC */ for (int j = 0; j < 16; j++) { unsigned char z_next = next_z(&grain, 0); if (j % 2 == 0) { // do not encrypt } else { adval = ader[ad_cnt / 8] & (1 << (7 - (ad_cnt % 8))); if (adval) { accumulate(&grain); } auth_shift(grain.auth_sr, z_next); ad_cnt++; } } } free(ader); unsigned long long ac_cnt = 0; unsigned long long m_cnt = 0; unsigned long long c_cnt = 0; unsigned char cc = 0; // generate keystream for message for (unsigned long long i = 0; i < mlen; i++) { // every second bit is used for keystream, the others for MAC cc = 0; for (int j = 0; j < 16; j++) { unsigned char z_next = next_z(&grain, 0); if (j % 2 == 0) { // transform it back to 8 bits per byte cc |= (data.message[m_cnt++] ^ z_next) << (7 - (c_cnt % 8)); c_cnt++; } else { if (data.message[ac_cnt++] == 1) { accumulate(&grain); } auth_shift(grain.auth_sr, z_next); } } c[i] = swapsb(cc); *clen += 1; } // generate unused keystream bit next_z(&grain, 0); // the 1 in the padding means accumulation accumulate(&grain); /* append MAC to ciphertext */ unsigned long long acc_idx = 0; for (unsigned long long i = mlen; i < mlen + 8; i++) { unsigned char acc = 0; // transform back to 8 bits per byte for (int j = 0; j < 8; j++) { acc |= grain.auth_acc[8 * acc_idx + j] << (7 - j); } c[i] = swapsb(acc); acc_idx++; *clen += 1; } free(data.message); free(m); free(ad); return 0; } int crypto_aead_decrypt( unsigned char *m,unsigned long long *mlen, unsigned char *nsec, const unsigned char *cp,unsigned long long clen, const unsigned char *adp,unsigned long long adlen, const unsigned char *npubp, const unsigned char *kp ) { /* This implementation assumes that the most significant bit is processed first, * in a byte. The optimized version however, processes the lsb first. * In order to give the same test vectors, we here change the interpretation of the bits. */ unsigned char *c = malloc(clen); unsigned char *ad = malloc(adlen); unsigned char npub[12]; unsigned char k[16]; for (unsigned long long i = 0; i < clen; i++) { c[i] = swapsb(cp[i]); } for (unsigned long long i = 0; i < adlen; i++) { ad[i] = swapsb(adp[i]); } for (unsigned long long i = 0; i < 12; i++) { npub[i] = swapsb(npubp[i]); } for (unsigned long long i = 0; i < 16; i++) { k[i] = swapsb(kp[i]); } grain_state grain; grain_data data; init_grain(&grain, k, npub); init_data(&data, c, clen); *mlen = 0; // authenticate adlen by prepeding to ad, using DER encoding unsigned char *ader; int aderlen = encode_der(adlen, &ader); // append ad to buffer ader = realloc(ader, aderlen + adlen); memcpy(ader + aderlen, ad, adlen); unsigned long long ad_cnt = 0; unsigned char adval = 0; /* accumulate tag for associated data only */ for (unsigned long long i = 0; i < aderlen + adlen; i++) { /* every second bit is used for keystream, the others for MAC */ for (int j = 0; j < 16; j++) { unsigned char z_next = next_z(&grain, 0); if (j % 2 == 0) { // do not encrypt } else { adval = ader[ad_cnt / 8] & (1 << (7 - (ad_cnt % 8))); if (adval) { accumulate(&grain); } auth_shift(grain.auth_sr, z_next); ad_cnt++; } } } free(ader); unsigned long long ac_cnt = 0; unsigned long long c_cnt = 0; unsigned char msgbyte = 0; unsigned char msgbit = 0; // generate keystream for message, skipping tag for (unsigned long long i = 0; i < clen - 8; i++) { // every second bit is used for keystream, the others for MAC msgbyte = 0; for (int j = 0; j < 8; j++) { unsigned char z_next = next_z(&grain, 0); // decrypt ciphertext msgbit = data.message[c_cnt] ^ z_next; // transform it back to 8 bits per byte msgbyte |= msgbit << (7 - (c_cnt % 8)); // generate accumulator bit z_next = next_z(&grain, 0); // use the decrypted message bit to control accumulator if (msgbit == 1) { accumulate(&grain); } auth_shift(grain.auth_sr, z_next); c_cnt++; ac_cnt++; } m[i] = swapsb(msgbyte); *mlen += 1; } // generate unused keystream bit next_z(&grain, 0); // the 1 in the padding means accumulation accumulate(&grain); // check MAC if (memcmp(grain.auth_acc, &data.message[8*(clen-8)], 64) != 0) { free(data.message); free(c); free(ad); return -1; } free(data.message); free(c); free(ad); return 0; }
A sample run is:
Grain128-AEAD light-weight cipher Plaintext: abc Key: 1123456989ABCDEF0123456989ABCDFF Nonce: AABB000000000111111111111 Additional Information: abc Plaintext: abc Cipher: FEC07BC59803, Len: 11 Plaintext: abc, Len: 3 Success!