Random123
array.h
Go to the documentation of this file.
1 /*
2 Copyright 2010-2011, D. E. Shaw Research.
3 All rights reserved.
4 
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are
7 met:
8 
9 * Redistributions of source code must retain the above copyright
10  notice, this list of conditions, and the following disclaimer.
11 
12 * Redistributions in binary form must reproduce the above copyright
13  notice, this list of conditions, and the following disclaimer in the
14  documentation and/or other materials provided with the distribution.
15 
16 * Neither the name of D. E. Shaw Research nor the names of its
17  contributors may be used to endorse or promote products derived from
18  this software without specific prior written permission.
19 
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32 #ifndef _r123array_dot_h__
33 #define _r123array_dot_h__
35 #include "features/sse.h"
36 
37 #if !defined(__cplusplus) || defined(__METAL_MACOS__)
38 #define CXXMETHODS(_N, W, T)
39 #define CXXOVERLOADS(_N, W, T)
40 #define CXXMETHODS_REQUIRING_STL
41 #else
42 
43 #include <stddef.h>
44 #include <algorithm>
45 #include <stdexcept>
46 #include <iterator>
47 #include <limits>
48 #include <iostream>
49 
74 template <typename value_type>
75 inline R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t *p32){
76  value_type v=0;
77  for(size_t i=0; i<(3+sizeof(value_type))/4; ++i)
78  v |= ((value_type)(*p32++)) << (32*i);
79  return v;
80 }
81 
84 #ifdef __CUDA_ARCH__
85 /* CUDA can't handle std::reverse_iterator. We *could* implement it
86  ourselves, but let's not bother until somebody really feels a need
87  to reverse-iterate through an r123array */
88 #define CXXMETHODS_REQUIRING_STL
89 #else
90 #define CXXMETHODS_REQUIRING_STL \
91  public: \
92  typedef std::reverse_iterator<iterator> reverse_iterator; \
93  typedef std::reverse_iterator<const_iterator> const_reverse_iterator; \
94  R123_CUDA_DEVICE reverse_iterator rbegin(){ return reverse_iterator(end()); } \
95  R123_CUDA_DEVICE const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } \
96  R123_CUDA_DEVICE reverse_iterator rend(){ return reverse_iterator(begin()); } \
97  R123_CUDA_DEVICE const_reverse_iterator rend() const{ return const_reverse_iterator(begin()); } \
98  R123_CUDA_DEVICE const_reverse_iterator crbegin() const{ return const_reverse_iterator(cend()); } \
99  R123_CUDA_DEVICE const_reverse_iterator crend() const{ return const_reverse_iterator(cbegin()); }
100 #endif
101 
102 // Work-alike methods and typedefs modeled on std::array:
103 #define CXXMETHODS(_N, W, T) \
104  typedef T value_type; \
105  typedef T* iterator; \
106  typedef const T* const_iterator; \
107  typedef value_type& reference; \
108  typedef const value_type& const_reference; \
109  typedef size_t size_type; \
110  typedef ptrdiff_t difference_type; \
111  typedef T* pointer; \
112  typedef const T* const_pointer; \
113  /* Boost.array has static_size. C++11 specializes tuple_size */ \
114  enum {static_size = _N}; \
115  R123_CUDA_DEVICE reference operator[](size_type i){return v[i];} \
116  R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];} \
117  R123_CUDA_DEVICE reference at(size_type i){ if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
118  R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
119  R123_CUDA_DEVICE size_type size() const { return _N; } \
120  R123_CUDA_DEVICE size_type max_size() const { return _N; } \
121  R123_CUDA_DEVICE bool empty() const { return _N==0; }; \
122  R123_CUDA_DEVICE iterator begin() { return &v[0]; } \
123  R123_CUDA_DEVICE iterator end() { return &v[_N]; } \
124  R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; } \
125  R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; } \
126  R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; } \
127  R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; } \
128  R123_CUDA_DEVICE pointer data(){ return &v[0]; } \
129  R123_CUDA_DEVICE const_pointer data() const{ return &v[0]; } \
130  R123_CUDA_DEVICE reference front(){ return v[0]; } \
131  R123_CUDA_DEVICE const_reference front() const{ return v[0]; } \
132  R123_CUDA_DEVICE reference back(){ return v[_N-1]; } \
133  R123_CUDA_DEVICE const_reference back() const{ return v[_N-1]; } \
134  R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const{ \
135  /* CUDA3 does not have std::equal */ \
136  for (size_t i = 0; i < _N; ++i) \
137  if (v[i] != rhs.v[i]) return false; \
138  return true; \
139  } \
140  R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const{ return !(*this == rhs); } \
141  /* CUDA3 does not have std::fill_n */ \
142  R123_CUDA_DEVICE void fill(const value_type& val){ for (size_t i = 0; i < _N; ++i) v[i] = val; } \
143  R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs){ \
144  /* CUDA3 does not have std::swap_ranges */ \
145  for (size_t i = 0; i < _N; ++i) { \
146  T tmp = v[i]; \
147  v[i] = rhs.v[i]; \
148  rhs.v[i] = tmp; \
149  } \
150  } \
151  R123_CUDA_DEVICE r123array##_N##x##W& incr(R123_ULONG_LONG n=1){ \
152  /* This test is tricky because we're trying to avoid spurious \
153  complaints about illegal shifts, yet still be compile-time \
154  evaulated. */ \
155  if(sizeof(T)<sizeof(n) && n>>((sizeof(T)<sizeof(n))?8*sizeof(T):0) ) \
156  return incr_carefully(n); \
157  if(n==1){ \
158  ++v[0]; \
159  if(_N==1 || R123_BUILTIN_EXPECT(!!v[0], 1)) return *this; \
160  }else{ \
161  v[0] += n; \
162  if(_N==1 || R123_BUILTIN_EXPECT(n<=v[0], 1)) return *this; \
163  } \
164  /* We expect that the N==?? tests will be \
165  constant-folded/optimized away by the compiler, so only the \
166  overflow tests (!!v[i]) remain to be done at runtime. For \
167  small values of N, it would be better to do this as an \
168  uncondtional sequence of adc. An experiment/optimization \
169  for another day... \
170  N.B. The weird subscripting: v[_N>3?3:0] is to silence \
171  a spurious error from icpc \
172  */ \
173  ++v[_N>1?1:0]; \
174  if(_N==2 || R123_BUILTIN_EXPECT(!!v[_N>1?1:0], 1)) return *this; \
175  ++v[_N>2?2:0]; \
176  if(_N==3 || R123_BUILTIN_EXPECT(!!v[_N>2?2:0], 1)) return *this; \
177  ++v[_N>3?3:0]; \
178  for(size_t i=4; i<_N; ++i){ \
179  if( R123_BUILTIN_EXPECT(!!v[i-1], 1) ) return *this; \
180  ++v[i]; \
181  } \
182  return *this; \
183  } \
184  /* seed(SeedSeq) would be a constructor if having a constructor */ \
185  /* didn't cause headaches with defaults */ \
186  template <typename SeedSeq> \
187  R123_CUDA_DEVICE static r123array##_N##x##W seed(SeedSeq &ss){ \
188  r123array##_N##x##W ret; \
189  const size_t Ngen = _N*((3+sizeof(value_type))/4); \
190  uint32_t u32[Ngen]; \
191  uint32_t *p32 = &u32[0]; \
192  ss.generate(&u32[0], &u32[Ngen]); \
193  for(size_t i=0; i<_N; ++i){ \
194  ret.v[i] = assemble_from_u32<value_type>(p32); \
195  p32 += (3+sizeof(value_type))/4; \
196  } \
197  return ret; \
198  } \
199 protected: \
200  R123_CUDA_DEVICE r123array##_N##x##W& incr_carefully(R123_ULONG_LONG n){ \
201  /* n may be greater than the maximum value of a single value_type */ \
202  value_type vtn; \
203  vtn = n; \
204  v[0] += n; \
205  const unsigned rshift = 8* ((sizeof(n)>sizeof(value_type))? sizeof(value_type) : 0); \
206  for(size_t i=1; i<_N; ++i){ \
207  if(rshift){ \
208  n >>= rshift; \
209  }else{ \
210  n=0; \
211  } \
212  if( v[i-1] < vtn ) \
213  ++n; \
214  if( n==0 ) break; \
215  vtn = n; \
216  v[i] += n; \
217  } \
218  return *this; \
219  } \
220 
221 
223 // There are several tricky considerations for the insertion and extraction
224 // operators:
225 // - we would like to be able to print r123array16x8 as a sequence of 16 integers,
226 // not as 16 bytes.
227 // - we would like to be able to print r123array1xm128i.
228 // - we do not want an int conversion operator in r123m128i because it causes
229 // lots of ambiguity problems with automatic promotions.
230 // Solution: r123arrayinsertable and r123arrayextractable
231 
232 template<typename T>
233 struct r123arrayinsertable{
234  const T& v;
235  r123arrayinsertable(const T& t_) : v(t_) {}
236  friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<T>& t){
237  return os << t.v;
238  }
239 };
240 
241 template<>
242 struct r123arrayinsertable<uint8_t>{
243  const uint8_t& v;
244  r123arrayinsertable(const uint8_t& t_) : v(t_) {}
245  friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<uint8_t>& t){
246  return os << (int)t.v;
247  }
248 };
249 
250 template<typename T>
251 struct r123arrayextractable{
252  T& v;
253  r123arrayextractable(T& t_) : v(t_) {}
254  friend std::istream& operator>>(std::istream& is, r123arrayextractable<T>& t){
255  return is >> t.v;
256  }
257 };
258 
259 template<>
260 struct r123arrayextractable<uint8_t>{
261  uint8_t& v;
262  r123arrayextractable(uint8_t& t_) : v(t_) {}
263  friend std::istream& operator>>(std::istream& is, r123arrayextractable<uint8_t>& t){
264  int i;
265  is >> i;
266  t.v = i;
267  return is;
268  }
269 };
272 #define CXXOVERLOADS(_N, W, T) \
273  \
274 inline std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a){ \
275  os << r123arrayinsertable<T>(a.v[0]); \
276  for(size_t i=1; i<_N; ++i) \
277  os << " " << r123arrayinsertable<T>(a.v[i]); \
278  return os; \
279 } \
280  \
281 inline std::istream& operator>>(std::istream& is, r123array##_N##x##W& a){ \
282  for(size_t i=0; i<_N; ++i){ \
283  r123arrayextractable<T> x(a.v[i]); \
284  is >> x; \
285  } \
286  return is; \
287 } \
288  \
289 namespace r123{ \
290  typedef r123array##_N##x##W Array##_N##x##W; \
291 }
292 
293 #endif /* __cplusplus */
294 
295 /* _r123array_tpl expands to a declaration of struct r123arrayNxW.
296 
297  In C, it's nothing more than a struct containing an array of N
298  objects of type T.
299 
300  In C++ it's the same, but endowed with an assortment of member
301  functions, typedefs and friends. In C++, r123arrayNxW looks a lot
302  like std::array<T,N>, has most of the capabilities of a container,
303  and satisfies the requirements outlined in compat/Engine.hpp for
304  counter and key types. ArrayNxW, in the r123 namespace is
305  a typedef equivalent to r123arrayNxW.
306 */
307 
308 #define _r123array_tpl(_N, W, T) \
309  \
310  \
311 struct r123array##_N##x##W{ \
312  T v[_N]; \
313  CXXMETHODS(_N, W, T) \
314  CXXMETHODS_REQUIRING_STL \
315 }; \
316  \
317 CXXOVERLOADS(_N, W, T)
318 
319 
320 #if defined(__CUDACC__)
321 /* Disable complaints from CUDA8 and C++ */
322 #pragma diag_suppress = code_is_unreachable
323 #endif
324 _r123array_tpl(1, 32, uint32_t) /* r123array1x32 */
325 _r123array_tpl(2, 32, uint32_t) /* r123array2x32 */
326 _r123array_tpl(4, 32, uint32_t) /* r123array4x32 */
327 _r123array_tpl(8, 32, uint32_t) /* r123array8x32 */
328 
329 #if R123_USE_64BIT
330 _r123array_tpl(1, 64, uint64_t) /* r123array1x64 */
331 _r123array_tpl(2, 64, uint64_t) /* r123array2x64 */
332 _r123array_tpl(4, 64, uint64_t) /* r123array4x64 */
333 #endif
334 #if defined(__CUDACC__)
335 #pragma diag_default = code_is_unreachable
336 #endif
337 
338 _r123array_tpl(16, 8, uint8_t) /* r123array16x8 for ARSsw, AESsw */
339 
340 #if R123_USE_SSE
341 _r123array_tpl(1, m128i, r123m128i) /* r123array1x128i for ARSni, AESni */
342 #endif
343 
344 /* In C++, it's natural to use sizeof(a::value_type), but in C it's
345  pretty convoluted to figure out the width of the value_type of an
346  r123arrayNxW:
347 */
348 #define R123_W(a) (8*sizeof(((a *)0)->v[0]))
349 
354 #endif
355 
operator>>
std::istream & operator>>(std::istream &is, r123array1x32 &a)
Definition: array.h:314
_r123array_tpl
#define _r123array_tpl(_N, W, T)
Definition: array.h:298
operator<<
std::ostream & operator<<(std::ostream &os, const r123array1x32 &a)
Definition: array.h:314
assemble_from_u32
T assemble_from_u32(uint32_t *p32)
compilerfeatures.h
r123m128i
Definition: sse.h:148
sse.h