32 #ifndef __Random123_aes_dot_hpp__
33 #define __Random123_aes_dot_hpp__
52 R123_STATIC_INLINE __m128i AES_128_ASSIST (__m128i temp1, __m128i temp2) {
54 temp2 = _mm_shuffle_epi32 (temp2 ,0xff);
55 temp3 = _mm_slli_si128 (temp1, 0x4);
56 temp1 = _mm_xor_si128 (temp1, temp3);
57 temp3 = _mm_slli_si128 (temp3, 0x4);
58 temp1 = _mm_xor_si128 (temp1, temp3);
59 temp3 = _mm_slli_si128 (temp3, 0x4);
60 temp1 = _mm_xor_si128 (temp1, temp3);
61 temp1 = _mm_xor_si128 (temp1, temp2);
67 __m128i rkey = uk.
v[0].
m;
71 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x1);
72 rkey = AES_128_ASSIST(rkey, tmp2);
75 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x2);
76 rkey = AES_128_ASSIST(rkey, tmp2);
79 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x4);
80 rkey = AES_128_ASSIST(rkey, tmp2);
83 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x8);
84 rkey = AES_128_ASSIST(rkey, tmp2);
87 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x10);
88 rkey = AES_128_ASSIST(rkey, tmp2);
91 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x20);
92 rkey = AES_128_ASSIST(rkey, tmp2);
95 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x40);
96 rkey = AES_128_ASSIST(rkey, tmp2);
99 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x80);
100 rkey = AES_128_ASSIST(rkey, tmp2);
103 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x1b);
104 rkey = AES_128_ASSIST(rkey, tmp2);
107 tmp2 = _mm_aeskeygenassist_si128(rkey, 0x36);
108 rkey = AES_128_ASSIST(rkey, tmp2);
119 uk.
v[0].
m = _mm_setzero_si128();
120 aesni1xm128iexpand(uk,
k);
123 aesni1xm128iexpand(uk,
k);
127 uk128.
v[0].
m = _mm_set_epi32(uk.
v[3], uk.
v[2], uk.
v[1], uk.
v[0]);
128 aesni1xm128iexpand(uk128,
k);
131 aesni1xm128iexpand(uk,
k);
136 uk128.
v[0].
m = _mm_set_epi32(uk.
v[3], uk.
v[2], uk.
v[1], uk.
v[0]);
137 aesni1xm128iexpand(uk128,
k);
141 for(
int i=0; i<11; ++i){
145 if( li != ri )
return false;
150 return !(*
this == rhs);
154 for(
int i=0; i<10; ++i){
163 for(
int i=0; i<11; ++i){
178 aesni1xm128iexpand(uk, ret.
k);
185 __m128i x = _mm_xor_si128(k.
k[0], in.
v[0].
m);
186 x = _mm_aesenc_si128(x, k.
k[1]);
187 x = _mm_aesenc_si128(x, k.
k[2]);
188 x = _mm_aesenc_si128(x, k.
k[3]);
189 x = _mm_aesenc_si128(x, k.
k[4]);
190 x = _mm_aesenc_si128(x, k.
k[5]);
191 x = _mm_aesenc_si128(x, k.
k[6]);
192 x = _mm_aesenc_si128(x, k.
k[7]);
193 x = _mm_aesenc_si128(x, k.
k[8]);
194 x = _mm_aesenc_si128(x, k.
k[9]);
195 x = _mm_aesenclast_si128(x, k.
k[10]);
220 uk128.
v[0].
m = _mm_set_epi32(uk.
v[3], uk.
v[2], uk.
v[1], uk.
v[0]);
221 aesni1xm128iexpand(uk128, ret.
k);
229 c128.
v[0].
m = _mm_set_epi32(c.
v[3], c.
v[2], c.
v[1], c.
v[0]);
231 _mm_storeu_si128((__m128i*)&c.
v[0], c128.
v[0].
m);
235 #define aesni4x32_rounds aesni1xm128i_rounds
239 #define aesni4x32(c,k) aesni4x32_R(aesni4x32_rounds, c, k)
300 template <
unsigned ROUNDS=10>
302 R123_STATIC_ASSERT(ROUNDS==10,
"AESNI1xm128i_R<R> is only valid with R=10");
306 template <
unsigned ROUNDS=10>
308 R123_STATIC_ASSERT(ROUNDS==10,
"AESNI4x32_R<R> is only valid with R=10");
315 #if R123_USE_AES_OPENSSL
317 #include <openssl/aes.h>
321 struct aesopenssl16x8_key_t{
323 aesopenssl16x8_key_t(){
324 aesopenssl16x8_ukey_t ukey={{}};
325 AES_set_encrypt_key((
const unsigned char *)&ukey.v[0], 128, &k);
327 aesopenssl16x8_key_t(
const aesopenssl16x8_ukey_t& ukey){
328 AES_set_encrypt_key((
const unsigned char *)&ukey.v[0], 128, &k);
330 aesopenssl16x8_key_t& operator=(
const aesopenssl16x8_ukey_t& ukey){
331 AES_set_encrypt_key((
const unsigned char *)&ukey.v[0], 128, &k);
334 bool operator==(
const aesopenssl16x8_key_t& rhs)
const{
335 return (k.rounds == rhs.k.rounds) && 0==::memcmp(&k.rd_key[0], &rhs.k.rd_key[0], (k.rounds+1) * 4 *
sizeof(uint32_t));
337 bool operator!=(
const aesopenssl16x8_key_t& rhs)
const{
338 return !(*
this == rhs);
340 friend std::ostream&
operator<<(std::ostream& os,
const aesopenssl16x8_key_t& v){
342 const unsigned int *p = &v.k.rd_key[0];
343 for(
int i=0; i<(v.k.rounds+1); ++i){
344 os <<
" " << p[0] <<
" " << p[1] <<
" " << p[2] <<
" " << p[3];
349 friend std::istream&
operator>>(std::istream& is, aesopenssl16x8_key_t& v){
351 unsigned int *p = &v.k.rd_key[0];
352 for(
int i=0; i<(v.k.rounds+1); ++i){
353 is >> p[0] >> p[1] >> p[2] >> p[3];
360 typedef struct aesopenssl16x8_key_t{
362 }aesopenssl16x8_key_t;
363 R123_STATIC_INLINE
struct aesopenssl16x8_key_t aesopenssl16x8keyinit(aesopenssl16x8_ukey_t uk){
364 aesopenssl16x8_key_t ret;
365 AES_set_encrypt_key((
const unsigned char *)&uk.v[0], 128, &ret.k);
370 R123_STATIC_INLINE R123_FORCE_INLINE(aesopenssl16x8_ctr_t aesopenssl16x8_R(aesopenssl16x8_ctr_t ctr, aesopenssl16x8_key_t key));
372 aesopenssl16x8_ctr_t aesopenssl16x8_R(aesopenssl16x8_ctr_t ctr, aesopenssl16x8_key_t key){
373 aesopenssl16x8_ctr_t ret;
374 AES_encrypt((
const unsigned char*)&ctr.v[0], (
unsigned char *)&ret.v[0], &key.k);
378 #define aesopenssl16x8_rounds aesni4x32_rounds
379 #define aesopenssl16x8(c,k) aesopenssl16x8_R(aesopenssl16x8_rounds)
383 struct AESOpenSSL16x8{
384 typedef aesopenssl16x8_ctr_t ctr_type;
385 typedef aesopenssl16x8_key_t key_type;
386 typedef aesopenssl16x8_ukey_t ukey_type;
387 static const unsigned int rounds=10;
388 ctr_type operator()(
const ctr_type& in,
const key_type& k){
390 AES_encrypt((
const unsigned char *)&in[0], (
unsigned char *)&out[0], &k.k);