32 #ifndef _r123array_dot_h__
33 #define _r123array_dot_h__
37 #if !defined(__cplusplus) || defined(__METAL_MACOS__)
38 #define CXXMETHODS(_N, W, T)
39 #define CXXOVERLOADS(_N, W, T)
40 #define CXXMETHODS_REQUIRING_STL
74 template <
typename value_type>
77 for(
size_t i=0; i<(3+
sizeof(value_type))/4; ++i)
78 v |= ((value_type)(*p32++)) << (32*i);
88 #define CXXMETHODS_REQUIRING_STL
90 #define CXXMETHODS_REQUIRING_STL \
92 typedef std::reverse_iterator<iterator> reverse_iterator; \
93 typedef std::reverse_iterator<const_iterator> const_reverse_iterator; \
94 R123_CUDA_DEVICE reverse_iterator rbegin(){ return reverse_iterator(end()); } \
95 R123_CUDA_DEVICE const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } \
96 R123_CUDA_DEVICE reverse_iterator rend(){ return reverse_iterator(begin()); } \
97 R123_CUDA_DEVICE const_reverse_iterator rend() const{ return const_reverse_iterator(begin()); } \
98 R123_CUDA_DEVICE const_reverse_iterator crbegin() const{ return const_reverse_iterator(cend()); } \
99 R123_CUDA_DEVICE const_reverse_iterator crend() const{ return const_reverse_iterator(cbegin()); }
103 #define CXXMETHODS(_N, W, T) \
104 typedef T value_type; \
105 typedef T* iterator; \
106 typedef const T* const_iterator; \
107 typedef value_type& reference; \
108 typedef const value_type& const_reference; \
109 typedef size_t size_type; \
110 typedef ptrdiff_t difference_type; \
111 typedef T* pointer; \
112 typedef const T* const_pointer; \
114 enum {static_size = _N}; \
115 R123_CUDA_DEVICE reference operator[](size_type i){return v[i];} \
116 R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];} \
117 R123_CUDA_DEVICE reference at(size_type i){ if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
118 R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >= _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
119 R123_CUDA_DEVICE size_type size() const { return _N; } \
120 R123_CUDA_DEVICE size_type max_size() const { return _N; } \
121 R123_CUDA_DEVICE bool empty() const { return _N==0; }; \
122 R123_CUDA_DEVICE iterator begin() { return &v[0]; } \
123 R123_CUDA_DEVICE iterator end() { return &v[_N]; } \
124 R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; } \
125 R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; } \
126 R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; } \
127 R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; } \
128 R123_CUDA_DEVICE pointer data(){ return &v[0]; } \
129 R123_CUDA_DEVICE const_pointer data() const{ return &v[0]; } \
130 R123_CUDA_DEVICE reference front(){ return v[0]; } \
131 R123_CUDA_DEVICE const_reference front() const{ return v[0]; } \
132 R123_CUDA_DEVICE reference back(){ return v[_N-1]; } \
133 R123_CUDA_DEVICE const_reference back() const{ return v[_N-1]; } \
134 R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const{ \
136 for (size_t i = 0; i < _N; ++i) \
137 if (v[i] != rhs.v[i]) return false; \
140 R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const{ return !(*this == rhs); } \
142 R123_CUDA_DEVICE void fill(const value_type& val){ for (size_t i = 0; i < _N; ++i) v[i] = val; } \
143 R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs){ \
145 for (size_t i = 0; i < _N; ++i) { \
151 R123_CUDA_DEVICE r123array##_N##x##W& incr(R123_ULONG_LONG n=1){ \
155 if(sizeof(T)<sizeof(n) && n>>((sizeof(T)<sizeof(n))?8*sizeof(T):0) ) \
156 return incr_carefully(n); \
159 if(_N==1 || R123_BUILTIN_EXPECT(!!v[0], 1)) return *this; \
162 if(_N==1 || R123_BUILTIN_EXPECT(n<=v[0], 1)) return *this; \
174 if(_N==2 || R123_BUILTIN_EXPECT(!!v[_N>1?1:0], 1)) return *this; \
176 if(_N==3 || R123_BUILTIN_EXPECT(!!v[_N>2?2:0], 1)) return *this; \
178 for(size_t i=4; i<_N; ++i){ \
179 if( R123_BUILTIN_EXPECT(!!v[i-1], 1) ) return *this; \
186 template <typename SeedSeq> \
187 R123_CUDA_DEVICE static r123array##_N##x##W seed(SeedSeq &ss){ \
188 r123array##_N##x##W ret; \
189 const size_t Ngen = _N*((3+sizeof(value_type))/4); \
190 uint32_t u32[Ngen]; \
191 uint32_t *p32 = &u32[0]; \
192 ss.generate(&u32[0], &u32[Ngen]); \
193 for(size_t i=0; i<_N; ++i){ \
194 ret.v[i] = assemble_from_u32<value_type>(p32); \
195 p32 += (3+sizeof(value_type))/4; \
200 R123_CUDA_DEVICE r123array##_N##x##W& incr_carefully(R123_ULONG_LONG n){ \
205 const unsigned rshift = 8* ((sizeof(n)>sizeof(value_type))? sizeof(value_type) : 0); \
206 for(size_t i=1; i<_N; ++i){ \
233 struct r123arrayinsertable{
235 r123arrayinsertable(
const T& t_) : v(t_) {}
236 friend std::ostream&
operator<<(std::ostream& os,
const r123arrayinsertable<T>& t){
242 struct r123arrayinsertable<uint8_t>{
244 r123arrayinsertable(
const uint8_t& t_) : v(t_) {}
245 friend std::ostream&
operator<<(std::ostream& os,
const r123arrayinsertable<uint8_t>& t){
246 return os << (int)t.v;
251 struct r123arrayextractable{
253 r123arrayextractable(T& t_) : v(t_) {}
254 friend std::istream&
operator>>(std::istream& is, r123arrayextractable<T>& t){
260 struct r123arrayextractable<uint8_t>{
262 r123arrayextractable(uint8_t& t_) : v(t_) {}
263 friend std::istream&
operator>>(std::istream& is, r123arrayextractable<uint8_t>& t){
272 #define CXXOVERLOADS(_N, W, T) \
274 inline std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a){ \
275 os << r123arrayinsertable<T>(a.v[0]); \
276 for(size_t i=1; i<_N; ++i) \
277 os << " " << r123arrayinsertable<T>(a.v[i]); \
281 inline std::istream& operator>>(std::istream& is, r123array##_N##x##W& a){ \
282 for(size_t i=0; i<_N; ++i){ \
283 r123arrayextractable<T> x(a.v[i]); \
290 typedef r123array##_N##x##W Array##_N##x##W; \
308 #define _r123array_tpl(_N, W, T) \
311 struct r123array##_N##x##W{ \
313 CXXMETHODS(_N, W, T) \
314 CXXMETHODS_REQUIRING_STL \
317 CXXOVERLOADS(_N, W, T)
320 #if defined(__CUDACC__)
322 #pragma diag_suppress = code_is_unreachable
334 #if defined(__CUDACC__)
335 #pragma diag_default = code_is_unreachable
348 #define R123_W(a) (8*sizeof(((a *)0)->v[0]))