IAP GITLAB

Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • AirShowerPhysics/corsika
  • rulrich/corsika
  • AAAlvesJr/corsika
  • Andre/corsika
  • arrabito/corsika
  • Nikos/corsika
  • olheiser73/corsika
  • AirShowerPhysics/papers/corsika
  • pranav/corsika
9 results
Show changes
Showing
with 4136 additions and 0 deletions
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _r123array_dot_h__
#define _r123array_dot_h__
#include "features/compilerfeatures.h"
#include "features/sse.h"
#if !defined(__cplusplus) || defined(__METAL_MACOS__)
#define CXXMETHODS(_N, W, T)
#define CXXOVERLOADS(_N, W, T)
#define CXXMETHODS_REQUIRING_STL
#else
#include <stddef.h>
#include <algorithm>
#include <stdexcept>
#include <iterator>
#include <limits>
#include <iostream>
/** @defgroup arrayNxW The r123arrayNxW classes
Each of the r123arrayNxW is a fixed size array of N W-bit unsigned integers.
It is functionally equivalent to the C++11 std::array<N, uintW_t>,
but does not require C++11 features or libraries.
In addition to meeting most of the requirements of a Container,
it also has a member function, incr(), which increments the zero-th
element and carrys overflows into higher indexed elements. Thus,
by using incr(), sequences of up to 2^(N*W) distinct values
can be produced.
If SSE is supported by the compiler, then the class
r123array1xm128i is also defined, in which the data member is an
array of one r123m128i object.
When compiling with __CUDA_ARCH__ defined, the reverse iterator
methods (rbegin, rend, crbegin, crend) are not defined because
CUDA does not support std::reverse_iterator.
*/
/** @cond HIDDEN_FROM_DOXYGEN */
template <typename value_type>
inline RANDOM_ITERATOR_R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t *p32){
value_type v=0;
for(size_t i=0; i<(3+sizeof(value_type))/4; ++i)
v |= ((value_type)(*p32++)) << (32*i);
return v;
}
/** @endcond */
#ifdef __CUDA_ARCH__
/* CUDA can't handle std::reverse_iterator. We *could* implement it
ourselves, but let's not bother until somebody really feels a need
to reverse-iterate through an r123array */
#define CXXMETHODS_REQUIRING_STL
#else
#define CXXMETHODS_REQUIRING_STL \
public: \
typedef std::reverse_iterator<iterator> reverse_iterator; \
typedef std::reverse_iterator<const_iterator> const_reverse_iterator; \
RANDOM_ITERATOR_R123_CUDA_DEVICE reverse_iterator rbegin(){ return reverse_iterator(end()); } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } \
RANDOM_ITERATOR_R123_CUDA_DEVICE reverse_iterator rend(){ return reverse_iterator(begin()); } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_reverse_iterator rend() const{ return const_reverse_iterator(begin()); } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_reverse_iterator crbegin() const{ return const_reverse_iterator(cend()); } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_reverse_iterator crend() const{ return const_reverse_iterator(cbegin()); }
#endif
// Work-alike methods and typedefs modeled on std::array:
#define CXXMETHODS(_N, W, T) \
typedef T value_type; \
typedef T* iterator; \
typedef const T* const_iterator; \
typedef value_type& reference; \
typedef const value_type& const_reference; \
typedef size_t size_type; \
typedef ptrdiff_t difference_type; \
typedef T* pointer; \
typedef const T* const_pointer; \
/* Boost.array has static_size. C++11 specializes tuple_size */ \
enum {static_size = _N}; \
RANDOM_ITERATOR_R123_CUDA_DEVICE reference operator[](size_type i){return v[i];} \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];} \
RANDOM_ITERATOR_R123_CUDA_DEVICE reference at(size_type i){ if(i >= _N) RANDOM_ITERATOR_R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >= _N) RANDOM_ITERATOR_R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE size_type size() const { return _N; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE size_type max_size() const { return _N; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE bool empty() const { return _N==0; }; \
RANDOM_ITERATOR_R123_CUDA_DEVICE iterator begin() { return &v[0]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE iterator end() { return &v[_N]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE pointer data(){ return &v[0]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_pointer data() const{ return &v[0]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE reference front(){ return v[0]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_reference front() const{ return v[0]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE reference back(){ return v[_N-1]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_reference back() const{ return v[_N-1]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const{ \
/* CUDA3 does not have std::equal */ \
for (size_t i = 0; i < _N; ++i) \
if (v[i] != rhs.v[i]) return false; \
return true; \
} \
RANDOM_ITERATOR_R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const{ return !(*this == rhs); } \
/* CUDA3 does not have std::fill_n */ \
RANDOM_ITERATOR_R123_CUDA_DEVICE void fill(const value_type& val){ for (size_t i = 0; i < _N; ++i) v[i] = val; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs){ \
/* CUDA3 does not have std::swap_ranges */ \
for (size_t i = 0; i < _N; ++i) { \
T tmp = v[i]; \
v[i] = rhs.v[i]; \
rhs.v[i] = tmp; \
} \
} \
RANDOM_ITERATOR_R123_CUDA_DEVICE r123array##_N##x##W& incr(RANDOM_ITERATOR_R123_ULONG_LONG n=1){ \
/* This test is tricky because we're trying to avoid spurious \
complaints about illegal shifts, yet still be compile-time \
evaulated. */ \
if(sizeof(T)<sizeof(n) && n>>((sizeof(T)<sizeof(n))?8*sizeof(T):0) ) \
return incr_carefully(n); \
if(n==1){ \
++v[0]; \
if(_N==1 || RANDOM_ITERATOR_R123_BUILTIN_EXPECT(!!v[0], 1)) return *this; \
}else{ \
v[0] += n; \
if(_N==1 || RANDOM_ITERATOR_R123_BUILTIN_EXPECT(n<=v[0], 1)) return *this; \
} \
/* We expect that the N==?? tests will be \
constant-folded/optimized away by the compiler, so only the \
overflow tests (!!v[i]) remain to be done at runtime. For \
small values of N, it would be better to do this as an \
uncondtional sequence of adc. An experiment/optimization \
for another day... \
N.B. The weird subscripting: v[_N>3?3:0] is to silence \
a spurious error from icpc \
*/ \
++v[_N>1?1:0]; \
if(_N==2 || RANDOM_ITERATOR_R123_BUILTIN_EXPECT(!!v[_N>1?1:0], 1)) return *this; \
++v[_N>2?2:0]; \
if(_N==3 || RANDOM_ITERATOR_R123_BUILTIN_EXPECT(!!v[_N>2?2:0], 1)) return *this; \
++v[_N>3?3:0]; \
for(size_t i=4; i<_N; ++i){ \
if( RANDOM_ITERATOR_R123_BUILTIN_EXPECT(!!v[i-1], 1) ) return *this; \
++v[i]; \
} \
return *this; \
} \
/* seed(SeedSeq) would be a constructor if having a constructor */ \
/* didn't cause headaches with defaults */ \
template <typename SeedSeq> \
RANDOM_ITERATOR_R123_CUDA_DEVICE static r123array##_N##x##W seed(SeedSeq &ss){ \
r123array##_N##x##W ret; \
const size_t Ngen = _N*((3+sizeof(value_type))/4); \
uint32_t u32[Ngen]; \
uint32_t *p32 = &u32[0]; \
ss.generate(&u32[0], &u32[Ngen]); \
for(size_t i=0; i<_N; ++i){ \
ret.v[i] = assemble_from_u32<value_type>(p32); \
p32 += (3+sizeof(value_type))/4; \
} \
return ret; \
} \
protected: \
RANDOM_ITERATOR_R123_CUDA_DEVICE r123array##_N##x##W& incr_carefully(RANDOM_ITERATOR_R123_ULONG_LONG n){ \
/* n may be greater than the maximum value of a single value_type */ \
value_type vtn; \
vtn = n; \
v[0] += n; \
const unsigned rshift = 8* ((sizeof(n)>sizeof(value_type))? sizeof(value_type) : 0); \
for(size_t i=1; i<_N; ++i){ \
if(rshift){ \
n >>= rshift; \
}else{ \
n=0; \
} \
if( v[i-1] < vtn ) \
++n; \
if( n==0 ) break; \
vtn = n; \
v[i] += n; \
} \
return *this; \
} \
/** @cond HIDDEN_FROM_DOXYGEN */
// There are several tricky considerations for the insertion and extraction
// operators:
// - we would like to be able to print r123array16x8 as a sequence of 16 integers,
// not as 16 bytes.
// - we would like to be able to print r123array1xm128i.
// - we do not want an int conversion operator in r123m128i because it causes
// lots of ambiguity problems with automatic promotions.
// Solution: r123arrayinsertable and r123arrayextractable
template<typename T>
struct r123arrayinsertable{
const T& v;
r123arrayinsertable(const T& t_) : v(t_) {}
friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<T>& t){
return os << t.v;
}
};
template<>
struct r123arrayinsertable<uint8_t>{
const uint8_t& v;
r123arrayinsertable(const uint8_t& t_) : v(t_) {}
friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<uint8_t>& t){
return os << (int)t.v;
}
};
template<typename T>
struct r123arrayextractable{
T& v;
r123arrayextractable(T& t_) : v(t_) {}
friend std::istream& operator>>(std::istream& is, r123arrayextractable<T>& t){
return is >> t.v;
}
};
template<>
struct r123arrayextractable<uint8_t>{
uint8_t& v;
r123arrayextractable(uint8_t& t_) : v(t_) {}
friend std::istream& operator>>(std::istream& is, r123arrayextractable<uint8_t>& t){
int i;
is >> i;
t.v = i;
return is;
}
};
/** @endcond */
#define CXXOVERLOADS(_N, W, T) \
\
inline std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a){ \
os << r123arrayinsertable<T>(a.v[0]); \
for(size_t i=1; i<_N; ++i) \
os << " " << r123arrayinsertable<T>(a.v[i]); \
return os; \
} \
\
inline std::istream& operator>>(std::istream& is, r123array##_N##x##W& a){ \
for(size_t i=0; i<_N; ++i){ \
r123arrayextractable<T> x(a.v[i]); \
is >> x; \
} \
return is; \
} \
\
namespace random_iterator_r123{ \
typedef r123array##_N##x##W Array##_N##x##W; \
}
#endif /* __cplusplus */
/* _r123array_tpl expands to a declaration of struct r123arrayNxW.
In C, it's nothing more than a struct containing an array of N
objects of type T.
In C++ it's the same, but endowed with an assortment of member
functions, typedefs and friends. In C++, r123arrayNxW looks a lot
like std::array<T,N>, has most of the capabilities of a container,
and satisfies the requirements outlined in compat/Engine.hpp for
counter and key types. ArrayNxW, in the r123 namespace is
a typedef equivalent to r123arrayNxW.
*/
#define _r123array_tpl(_N, W, T) \
/** @ingroup arrayNxW */ \
/** @see arrayNxW */ \
struct r123array##_N##x##W{ \
T v[_N]; \
CXXMETHODS(_N, W, T) \
CXXMETHODS_REQUIRING_STL \
}; \
\
CXXOVERLOADS(_N, W, T)
_r123array_tpl(1, 32, uint32_t) /* r123array1x32 */
_r123array_tpl(2, 32, uint32_t) /* r123array2x32 */
_r123array_tpl(4, 32, uint32_t) /* r123array4x32 */
_r123array_tpl(8, 32, uint32_t) /* r123array8x32 */
#if RANDOM_ITERATOR_R123_USE_64BIT
_r123array_tpl(1, 64, uint64_t) /* r123array1x64 */
_r123array_tpl(2, 64, uint64_t) /* r123array2x64 */
_r123array_tpl(4, 64, uint64_t) /* r123array4x64 */
#endif
_r123array_tpl(16, 8, uint8_t) /* r123array16x8 for ARSsw, AESsw */
#if RANDOM_ITERATOR_R123_USE_SSE
_r123array_tpl(1, m128i, r123m128i) /* r123array1x128i for ARSni, AESni */
#endif
/* In C++, it's natural to use sizeof(a::value_type), but in C it's
pretty convoluted to figure out the width of the value_type of an
r123arrayNxW:
*/
#define RANDOM_ITERATOR_R123_W(a) (8*sizeof(((a *)0)->v[0]))
/** @namespace random_iterator_r123
Most of the Random123 C++ API is contained in the r123 namespace.
*/
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __Random123_ars_dot_hpp__
#define __Random123_ars_dot_hpp__
#include "features/compilerfeatures.h"
#include "array.h"
#if RANDOM_ITERATOR_R123_USE_AES_NI
#ifndef ARS1xm128i_DEFAULT_ROUNDS
#define ARS1xm128i_DEFAULT_ROUNDS 7
#endif
/** @ingroup AESNI */
enum r123_enum_ars1xm128i {ars1xm128i_rounds = ARS1xm128i_DEFAULT_ROUNDS};
/* ARS1xm128i with Weyl keys. Fast, and Crush-resistant, but NOT CRYPTO. */
/** @ingroup AESNI */
typedef struct r123array1xm128i ars1xm128i_ctr_t;
/** @ingroup AESNI */
typedef struct r123array1xm128i ars1xm128i_key_t;
/** @ingroup AESNI */
typedef struct r123array1xm128i ars1xm128i_ukey_t;
/** @ingroup AESNI */
RANDOM_ITERATOR_R123_STATIC_INLINE ars1xm128i_key_t ars1xm128ikeyinit(ars1xm128i_ukey_t uk) { return uk; }
/** @ingroup AESNI */
RANDOM_ITERATOR_R123_STATIC_INLINE ars1xm128i_ctr_t ars1xm128i_R(unsigned int Nrounds, ars1xm128i_ctr_t in, ars1xm128i_key_t k){
__m128i kweyl = _mm_set_epi64x(RANDOM_ITERATOR_R123_64BIT(0xBB67AE8584CAA73B), /* sqrt(3) - 1.0 */
RANDOM_ITERATOR_R123_64BIT(0x9E3779B97F4A7C15)); /* golden ratio */
/* N.B. the aesenc instructions do the xor *after*
// so if we want to follow the AES pattern, we
// have to do the initial xor explicitly */
__m128i kk = k.v[0].m;
__m128i v = _mm_xor_si128(in.v[0].m, kk);
ars1xm128i_ctr_t ret;
RANDOM_ITERATOR_R123_ASSERT(Nrounds<=10);
if( Nrounds>1 ){
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenc_si128(v, kk);
}
if( Nrounds>2 ){
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenc_si128(v, kk);
}
if( Nrounds>3 ){
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenc_si128(v, kk);
}
if( Nrounds>4 ){
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenc_si128(v, kk);
}
if( Nrounds>5 ){
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenc_si128(v, kk);
}
if( Nrounds>6 ){
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenc_si128(v, kk);
}
if( Nrounds>7 ){
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenc_si128(v, kk);
}
if( Nrounds>8 ){
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenc_si128(v, kk);
}
if( Nrounds>9 ){
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenc_si128(v, kk);
}
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenclast_si128(v, kk);
ret.v[0].m = v;
return ret;
}
/** @def ars1xm128i
@ingroup AESNI
The ars1mx128i macro provides a C API interface to the @ref AESNI "ARS" CBRNG with the default number of rounds i.e. \c ars1xm128i_rounds **/
#define ars1xm128i(c,k) ars1xm128i_R(ars1xm128i_rounds, c, k)
/** @ingroup AESNI */
typedef struct r123array4x32 ars4x32_ctr_t;
/** @ingroup AESNI */
typedef struct r123array4x32 ars4x32_key_t;
/** @ingroup AESNI */
typedef struct r123array4x32 ars4x32_ukey_t;
typedef struct r123array2x64 ars2x64_ctr_t;
/** @ingroup AESNI */
typedef struct r123array2x64 ars2x64_key_t;
/** @ingroup AESNI */
typedef struct r123array2x64 ars2x64_ukey_t;
/** @ingroup AESNI */
enum r123_enum_ars4x32 {ars4x32_rounds = ARS1xm128i_DEFAULT_ROUNDS};
/** @ingroup AESNI */
RANDOM_ITERATOR_R123_STATIC_INLINE ars4x32_key_t ars4x32keyinit(ars4x32_ukey_t uk) { return uk; }
/** @ingroup AESNI */
RANDOM_ITERATOR_R123_STATIC_INLINE ars4x32_ctr_t ars4x32_R(unsigned int Nrounds, ars4x32_ctr_t c, ars4x32_key_t k){
ars1xm128i_ctr_t c128;
ars1xm128i_key_t k128;
c128.v[0].m = _mm_set_epi32(c.v[3], c.v[2], c.v[1], c.v[0]);
k128.v[0].m = _mm_set_epi32(k.v[3], k.v[2], k.v[1], k.v[0]);
c128 = ars1xm128i_R(Nrounds, c128, k128);
_mm_storeu_si128((__m128i*)&c.v[0], c128.v[0].m);
return c;
}
/** @def ars4x32
@ingroup AESNI
The ars4x32 macro provides a C API interface to the @ref AESNI "ARS" CBRNG with the default number of rounds i.e. \c ars4x32_rounds **/
#define ars4x32(c,k) ars4x32_R(ars4x32_rounds, c, k)
#ifdef __cplusplus
namespace random_iterator_r123{
/**
@ingroup AESNI
ARS1xm128i_R exports the member functions, typedefs and operator overloads required by a @ref CBRNG class.
ARS1xm128i uses the crypotgraphic AES round function, but a @b non-cryptographc key schedule
to save time and space.
ARS1xm128i is only available when the feature-test macro RANDOM_ITERATOR_R123_USE_AES_NI is true, which
should occur only when the compiler is configured to generate AES-NI instructions (or
when defaults are overridden by compile-time, compiler-command-line options).
The template argument, ROUNDS, is the number of times the ARS round
functions will be applied.
As of September 2011, the authors know of no statistical flaws with
ROUNDS=5 or more.
@class ARS1xm128i_R
*/
template<unsigned int ROUNDS>
struct ARS1xm128i_R{
typedef ars1xm128i_ctr_t ctr_type;
typedef ars1xm128i_key_t key_type;
typedef ars1xm128i_key_t ukey_type;
static const unsigned int rounds=ROUNDS;
RANDOM_ITERATOR_R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
return ars1xm128i_R(ROUNDS, ctr, key);
}
};
/** @class ARS4x32_R
@ingroup AESNI
*/
template<unsigned int ROUNDS>
struct ARS4x32_R{
typedef ars4x32_ctr_t ctr_type;
typedef ars4x32_key_t key_type;
typedef ars4x32_key_t ukey_type;
static const unsigned int rounds=ROUNDS;
RANDOM_ITERATOR_R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
return ars4x32_R(ROUNDS, ctr, key);
}
};
template<unsigned int ROUNDS>
struct ARS2x64_R{
typedef ars2x64_ctr_t ctr_type;
typedef ars2x64_key_t key_type;
typedef ars2x64_key_t ukey_type;
static const unsigned int rounds=ROUNDS;
RANDOM_ITERATOR_R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
ars4x32_ctr_t ctr_{ctr.v[0], ctr.v[0]>>32 , ctr.v[1], ctr.v[1]>>32};
ars4x32_key_t key_{key.v[0], key.v[0]>>32 , key.v[1], key.v[1]>>32};
ars4x32_ctr_t res_ = ars4x32_R(ROUNDS, ctr_, key_);
ctr_type res{{}};
res.v[0] = (res.v[0] | res_[1])<<32;
res.v[0] = (res.v[0] | res_[0]);
res.v[1] = (res.v[1] | res_[3])<<32;
res.v[1] = (res.v[1] | res_[2]);
return res;
}
};
/**
@ingroup AESNI
@class ARS1xm128i_R
ARS1xm128i is equivalent to ARS1xm128i_R<7>. With 7 rounds,
the ARS1xm128i CBRNG has a considerable safety margin over the minimum number
of rounds with no known statistical flaws, but still has excellent
performance. */
typedef ARS1xm128i_R<ars1xm128i_rounds> ARS1xm128i;
typedef ARS4x32_R<ars4x32_rounds> ARS4x32;
typedef ARS2x64_R<ars4x32_rounds> ARS2x64;
} // namespace random_iterator_r123
#endif /* __cplusplus */
#endif /* RANDOM_ITERATOR_R123_USE_AES_NI */
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// This file implements the Box-Muller method for generating gaussian
// random variables (GRVs). Box-Muller has the advantage of
// deterministically requiring exactly two uniform random variables as
// input and producing exactly two GRVs as output, which makes it
// especially well-suited to the counter-based generators in
// Random123. Other methods (e.g., Ziggurat, polar) require an
// indeterminate number of inputs for each output and so require a
// 'MicroURNG' to be used with Random123. The down side of Box-Muller
// is that it calls sincos, log and sqrt, which may be slow. However,
// on GPUs, these functions are remarkably fast, which makes
// Box-Muller the fastest GRV generator we know of on GPUs.
//
// This file exports two structs and one overloaded function,
// all in the r123 namespace:
// struct r123::float2{ float x,y; }
// struct r123::double2{ double x,y; }
//
// r123::float2 r123::boxmuller(uint32_t u0, uint32_t u1);
// r123::double2 r123::boxmuller(uint64_t u0, uint64_t u1);
//
// float2 and double2 are identical to their synonymous global-
// namespace structures in CUDA.
//
// This file may not be as portable, and has not been tested as
// rigorously as other files in the library, e.g., the generators.
// Nevertheless, we hope it is useful and we encourage developers to
// copy it and modify it for their own use. We invite comments and
// improvements.
#ifndef _r123_BOXMULLER_HPP__
#define _r123_BOXMULLER_HPP__
#include <Random123/features/compilerfeatures.h>
#include <Random123/uniform.hpp>
#include <math.h>
namespace random_iterator_r123 {
#if !defined(__CUDACC__)
typedef struct {
float x, y;
} float2;
typedef struct {
double x, y;
} double2;
#else
typedef ::float2 float2;
typedef ::double2 double2;
#endif
#if !defined(RANDOM_ITERATOR_R123_NO_SINCOS) && defined(__APPLE__)
/* MacOS X 10.10.5 (2015) doesn't have sincosf */
#define RANDOM_ITERATOR_R123_NO_SINCOS 1
#endif
#if RANDOM_ITERATOR_R123_NO_SINCOS /* enable this if sincos and sincosf are not in the \
math library */
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE void sincosf(
float x, float* s, float* c) {
*s = sinf(x);
*c = cosf(x);
}
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE void sincos(
double x, double* s, double* c) {
*s = sin(x);
*c = cos(x);
}
#endif /* sincos is not in the math library */
#if !defined(CUDART_VERSION) || \
CUDART_VERSION < 5000 /* enabled if sincospi and sincospif are not in math lib */
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE void sincospif(
float x, float* s, float* c) {
const float PIf = 3.1415926535897932f;
sincosf(PIf * x, s, c);
}
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE void sincospi(
double x, double* s, double* c) {
const double PI = 3.1415926535897932;
sincos(PI * x, s, c);
}
#endif /* sincospi is not in math lib */
/*
* take two 32bit unsigned random values and return a float2 with
* two random floats in a normal distribution via a Box-Muller transform
*/
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE float2
boxmuller(uint32_t u0, uint32_t u1) {
float r;
float2 f;
sincospif(uneg11<float>(u0), &f.x, &f.y);
r = sqrtf(-2.f * logf(u01<float>(u1))); // u01 is guaranteed to avoid 0.
f.x *= r;
f.y *= r;
return f;
}
/*
* take two 64bit unsigned random values and return a double2 with
* two random doubles in a normal distribution via a Box-Muller transform
*/
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE double2
boxmuller(uint64_t u0, uint64_t u1) {
double r;
double2 f;
sincospi(uneg11<double>(u0), &f.x, &f.y);
r = sqrt(-2. * log(u01<double>(u1))); // u01 is guaranteed to avoid 0.
f.x *= r;
f.y *= r;
return f;
}
} // namespace random_iterator_r123
#endif /* BOXMULLER_H__ */
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __Engine_dot_hpp_
#define __Engine_dot_hpp_
#include "../features/compilerfeatures.h"
#include "../array.h"
#include <limits>
#include <stdexcept>
#include <sstream>
#include <algorithm>
#include <vector>
#if RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS
#include <type_traits>
#endif
namespace random_iterator_r123 {
/**
If G satisfies the requirements of a CBRNG, and has a ctr_type whose
value_type is an unsigned integral type, then Engine<G> satisfies
the requirements of a C++11 "Uniform Random Number Engine" and can
be used in any context where such an object is expected.
Note that wrapping a counter based RNG with a traditional API in
this way obscures much of the power of counter based PRNGs.
Nevertheless, it may be of value in applications that are already
coded to work with the C++11 random number engines.
The MicroURNG template in MicroURNG.hpp
provides the more limited functionality of a C++11 "Uniform
Random Number Generator", but leaves the application in control
of counters and keys and hence may be preferable to the Engine template.
For example, a MicroURNG allows one to use C++11 "Random Number
Distributions" without giving up control over the counters
and keys.
*/
template <typename CBRNG>
struct Engine {
typedef CBRNG cbrng_type;
typedef typename CBRNG::ctr_type ctr_type;
typedef typename CBRNG::key_type key_type;
typedef typename CBRNG::ukey_type ukey_type;
typedef typename ctr_type::value_type result_type;
protected:
cbrng_type b;
key_type key;
ctr_type c;
ctr_type v;
void fix_invariant() {
if (v.back() != 0) {
result_type vv = v.back();
v = b(c, key);
v.back() = vv;
}
}
public:
explicit Engine()
: b()
, c() {
ukey_type x = {{}};
v.back() = 0;
key = x;
}
explicit Engine(result_type r)
: b()
, c() {
ukey_type x = {{typename ukey_type::value_type(r)}};
v.back() = 0;
key = x;
}
// 26.5.3 says that the SeedSeq templates shouldn't particpate in
// overload resolution unless the type qualifies as a SeedSeq.
// How that is determined is unspecified, except that "as a
// minimum a type shall not qualify as a SeedSeq if it is
// implicitly convertible to a result_type."
//
// First, we make sure that even the non-const copy constructor
// works as expected. In addition, if we've got C++11
// type_traits, we use enable_if and is_convertible to implement
// the convertible-to-result_type restriction. Otherwise, the
// template is unconditional and will match in some surpirsing
// and undesirable situations.
Engine(Engine& e)
: b(e.b)
, key(e.key)
, c(e.c) {
v.back() = e.v.back();
fix_invariant();
}
Engine(const Engine& e)
: b(e.b)
, key(e.key)
, c(e.c) {
v.back() = e.v.back();
fix_invariant();
}
template <typename SeedSeq>
explicit Engine(SeedSeq& s
#if RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS
,
typename std::enable_if<
!std::is_convertible<SeedSeq, result_type>::value>::type* = 0
#endif
)
: b()
, c() {
ukey_type ukey = ukey_type::seed(s);
key = ukey;
v.back() = 0;
}
void seed(result_type r) { *this = Engine(r); }
template <typename SeedSeq>
void seed(SeedSeq& s
#if RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS
,
typename std::enable_if<
!std::is_convertible<SeedSeq, result_type>::value>::type* = 0
#endif
) {
*this = Engine(s);
}
void seed() { *this = Engine(); }
friend bool operator==(const Engine& lhs, const Engine& rhs) {
return lhs.c == rhs.c && lhs.v.back() == rhs.v.back() && lhs.key == rhs.key;
}
friend bool operator!=(const Engine& lhs, const Engine& rhs) {
return lhs.c != rhs.c || lhs.v.back() != rhs.v.back() || lhs.key != rhs.key;
}
friend std::ostream& operator<<(std::ostream& os, const Engine& be) {
return os << be.c << " " << be.key << " " << be.v.back();
}
friend std::istream& operator>>(std::istream& is, Engine& be) {
is >> be.c >> be.key >> be.v.back();
be.fix_invariant();
return is;
}
// The <random> shipped with MacOS Xcode 4.5.2 imposes a
// non-standard requirement that URNGs also have static data
// members: _Min and _Max. Later versions of libc++ impose the
// requirement only when constexpr isn't supported. Although the
// Xcode 4.5.2 requirement is clearly non-standard, it is unlikely
// to be fixed and it is very easy work around. We certainly
// don't want to go to great lengths to accommodate every buggy
// library we come across, but in this particular case, the effort
// is low and the benefit is high, so it's worth doing. Thanks to
// Yan Zhou for pointing this out to us. See similar code in
// ../MicroURNG.hpp
const static result_type _Min = 0;
const static result_type _Max = ~((result_type)0);
static RANDOM_ITERATOR_R123_CONSTEXPR result_type min
RANDOM_ITERATOR_R123_NO_MACRO_SUBST() {
return _Min;
}
static RANDOM_ITERATOR_R123_CONSTEXPR result_type max
RANDOM_ITERATOR_R123_NO_MACRO_SUBST() {
return _Max;
}
result_type operator()() {
if (c.size() == 1) // short-circuit the scalar case. Compilers aren't mind-readers.
return b(c.incr(), key)[0];
result_type& elem = v.back();
if (elem == 0) {
v = b(c.incr(), key);
result_type ret = v.back();
elem = c.size() - 1;
return ret;
}
return v[--elem];
}
void discard(RANDOM_ITERATOR_R123_ULONG_LONG skip) {
// don't forget: elem counts down
size_t nelem = c.size();
size_t sub = skip % nelem;
result_type& elem = v.back();
skip /= nelem;
if (elem < sub) {
elem += nelem;
skip++;
}
elem -= sub;
c.incr(skip);
fix_invariant();
}
//--------------------------
// Some bonus methods, not required for a Random Number
// Engine
// Constructors and seed() method for ukey_type seem useful
// We need const and non-const to supersede the SeedSeq template.
explicit Engine(const ukey_type& uk)
: key(uk)
, c() {
v.back() = 0;
}
explicit Engine(ukey_type& uk)
: key(uk)
, c() {
v.back() = 0;
}
void seed(const ukey_type& uk) { *this = Engine(uk); }
void seed(ukey_type& uk) { *this = Engine(uk); }
#if RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS
template <typename DUMMY = void>
explicit Engine(const key_type& k,
typename std::enable_if<!std::is_same<ukey_type, key_type>::value,
DUMMY>::type* = 0)
: key(k)
, c() {
v.back() = 0;
}
template <typename DUMMY = void>
void seed(const key_type& k,
typename std::enable_if<!std::is_same<ukey_type, key_type>::value,
DUMMY>::type* = 0) {
*this = Engine(k);
}
#endif
// Forward the e(counter) to the CBRNG we are templated
// on, using the current value of the key.
ctr_type operator()(const ctr_type& c) const { return b(c, key); }
key_type getkey() const { return key; }
// N.B. setkey(k) is different from seed(k) because seed(k) zeros
// the counter (per the C++11 requirements for an Engine), whereas
// setkey does not.
void setkey(const key_type& k) {
key = k;
fix_invariant();
}
// Maybe the caller want's to know the details of
// the internal state, e.g., so it can call a different
// bijection with the same counter.
std::pair<ctr_type, result_type> getcounter() const {
return std::make_pair(c, v.back());
}
// And the inverse.
void setcounter(const ctr_type& _c, result_type _elem) {
static const size_t nelem = c.size();
if (_elem >= nelem)
throw std::range_error("Engine::setcounter called with elem out of range");
c = _c;
v.back() = _elem;
fix_invariant();
}
void setcounter(const std::pair<ctr_type, result_type>& ce) {
setcounter(ce.first, ce.second);
}
};
} // namespace random_iterator_r123
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __r123_compat_gslrng_dot_h__
#define __r123_compat_gslrng_dot_h__
#include <gsl/gsl_rng.h>
#include <string.h>
/**
The macro: GSL_CBRNG(NAME, CBRNGNAME)
declares the necessary structs and constants that define a
gsl_rng_NAME type based on the counter-based RNG CBRNGNAME. For example:
Usage:
@code
#include <Random123/threefry.h>
#include <Random123/conventional/gsl_cbrng.h> // this file
GSL_CBRNG(cbrng, threefry4x32); // creates gsl_rng_cbrng
int main(int argc, char **argv){
gsl_rng *r = gsl_rng_alloc(gsl_rng_cbrng);
... use r as you would use any other gsl_rng ...
}
@endcode
It requires that NAME be the name of a CBRNG that follows the
naming and stylistic conventions of the Random123 library.
Note that wrapping a \ref CBRNG "counter-based PRNG" with a traditional API in
this way obscures much of the power of the CBRNG API.
Nevertheless, it may be of value to applications that are already
coded to work with GSL random number generators, and that wish
to use the RNGs in the Random123 library.
*/
#define GSL_CBRNG(NAME, CBRNGNAME) \
const gsl_rng_type *gsl_rng_##NAME; \
\
typedef struct{ \
CBRNGNAME##_ctr_t ctr; \
CBRNGNAME##_ctr_t r; \
CBRNGNAME##_key_t key; \
int elem; \
} NAME##_state; \
\
static unsigned long int NAME##_get(void *vstate){ \
NAME##_state *st = (NAME##_state *)vstate; \
const int N=sizeof(st->ctr.v)/sizeof(st->ctr.v[0]); \
if( st->elem == 0 ){ \
++st->ctr.v[0]; \
if( N>1 && st->ctr.v[0] == 0 ) ++st->ctr.v[1]; \
if( N>2 && st->ctr.v[1] == 0 ) ++st->ctr.v[2]; \
if( N>3 && st->ctr.v[2] == 0 ) ++st->ctr.v[3]; \
st->r = CBRNGNAME(st->ctr, st->key); \
st->elem = N; \
} \
return 0xffffffffUL & st->r.v[--st->elem]; \
} \
\
static double \
NAME##_get_double (void * vstate) \
{ \
return NAME##_get (vstate)/4294967296.0; \
} \
\
static void NAME##_set(void *vstate, unsigned long int s){ \
NAME##_state *st = (NAME##_state *)vstate; \
st->elem = 0; \
/* Assume that key and ctr have an array member, v, \
as if they are r123arrayNxW. If not, this will fail \
to compile. In particular, this macro fails to compile \
when the underlying CBRNG requires use of keyinit */ \
memset(&st->ctr.v[0], 0, sizeof(st->ctr.v)); \
memset(&st->key.v[0], 0, sizeof(st->key.v)); \
/* GSL 1.15 documentation says this about gsl_rng_set: \
Note that the most generators only accept 32-bit seeds, with higher \
values being reduced modulo 2^32. For generators with smaller \
ranges the maximum seed value will typically be lower. \
so we won't jump through any hoops here to deal with \
high bits if sizeof(unsigned long) > sizeof(uint32_t). */ \
st->key.v[0] = s; \
} \
\
static const gsl_rng_type NAME##_type = { \
#NAME, \
0xffffffffUL, \
0, \
sizeof(NAME##_state), \
&NAME##_set, \
&NAME##_get, \
&NAME##_get_double \
}; \
\
const gsl_rng_type *gsl_rng_##NAME = &NAME##_type
#endif
/*
Copyright 2010-2016, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __clangfeatures_dot_hpp
#define __clangfeatures_dot_hpp
#ifndef RANDOM_ITERATOR_R123_USE_X86INTRIN_H
#if (defined(__x86_64__)||defined(__i386__))
#define RANDOM_ITERATOR_R123_USE_X86INTRIN_H 1
#else
#define RANDOM_ITERATOR_R123_USE_X86INTRIN_H 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_UNRESTRICTED_UNIONS
#define RANDOM_ITERATOR_R123_USE_CXX11_UNRESTRICTED_UNIONS __has_feature(cxx_unrestricted_unions)
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_STATIC_ASSERT
#define RANDOM_ITERATOR_R123_USE_CXX11_STATIC_ASSERT __has_feature(cxx_static_assert)
#endif
// With clang-3.6, -Wall warns about unused-local-typedefs.
// The "obvious" thing to do is to ignore -Wunused-local-typedefs,
// but that doesn't work because earlier versions of clang blow
// up on an 'unknown warning group'. So we briefly ignore -Wall...
// It's tempting to just give up on static assertions in pre-c++11 code.
#if !RANDOM_ITERATOR_R123_USE_CXX11_STATIC_ASSERT && !defined(RANDOM_ITERATOR_R123_STATIC_ASSERT)
#define RANDOM_ITERATOR_R123_STATIC_ASSERT(expr, msg) \
_Pragma("clang diagnostic push") \
_Pragma("clang diagnostic ignored \"-Wall\"") \
typedef char static_assertion[(!!(expr))*2-1] \
_Pragma("clang diagnostic pop")
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_CONSTEXPR
#define RANDOM_ITERATOR_R123_USE_CXX11_CONSTEXPR __has_feature(cxx_constexpr)
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_EXPLICIT_CONVERSIONS
#define RANDOM_ITERATOR_R123_USE_CXX11_EXPLICIT_CONVERSIONS __has_feature(cxx_explicit_conversions)
#endif
// With clang-3.0, the apparently simpler:
// #define RANDOM_ITERATOR_R123_USE_CXX11_RANDOM __has_include(<random>)
// dumps core.
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_RANDOM
#if __cplusplus>=201103L && __has_include(<random>)
#define RANDOM_ITERATOR_R123_USE_CXX11_RANDOM 1
#else
#define RANDOM_ITERATOR_R123_USE_CXX11_RANDOM 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS
#if __cplusplus>=201103L && __has_include(<type_traits>)
#define RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS 1
#else
#define RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS 0
#endif
#endif
#include "gccfeatures.h"
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
@page porting Preprocessor symbols for porting Random123 to different platforms.
The Random123 library is portable across C, C++, CUDA, OpenCL environments,
and multiple operating systems (Linux, Windows 7, Mac OS X, FreeBSD, Solaris).
This level of portability requires the abstraction of some features
and idioms that are either not standardized (e.g., asm statments), or for which
different vendors have their own standards (e.g., SSE intrinsics) or for
which vendors simply refuse to conform to well-established standards (e.g., <inttypes.h>).
Random123/features/compilerfeatures.h
conditionally includes a compiler-or-OS-specific Random123/featires/XXXfeatures.h file which
defines appropriate values for the preprocessor symbols which can be used with
a specific compiler or OS. Those symbols will then
be used by other header files and source files in the Random123
library (and may be used by applications) to control what actually
gets presented to the compiler.
Most of the symbols are boolean valued. In general, they will
\b always be defined with value either 1 or 0, so do
\b NOT use \#ifdef. Use \#if RANDOM_ITERATOR_R123_USE_SOMETHING instead.
Library users can override any value by defining the pp-symbol with a compiler option,
e.g.,
cc -DRANDOM_ITERATOR_R123_USE_MULHILO64_C99
will use a strictly c99 version of the full-width 64x64->128-bit multiplication
function, even if it would be disabled by default.
All boolean-valued pre-processor symbols in Random123/features/compilerfeatures.h start with the prefix RANDOM_ITERATOR_R123_USE_
@verbatim
AES_NI
AES_OPENSSL
SSE4_2
SSE4_1
SSE
STD_RANDOM
GNU_UINT128
ASM_GNU
ASM_MSASM
CPUID_MSVC
CXX11_RANDOM
CXX11_TYPE_TRAITS
CXX11_STATIC_ASSERT
CXX11_CONSTEXPR
CXX11_UNRESTRICTED_UNIONS
CXX11_EXPLICIT_CONVERSIONS
CXX11_LONG_LONG
CXX11_STD_ARRAY
CXX11
X86INTRIN_H
IA32INTRIN_H
XMMINTRIN_H
EMMINTRIN_H
SMMINTRIN_H
WMMINTRIN_H
INTRIN_H
MULHILO32_ASM
MULHILO64_ASM
MULHILO64_MSVC_INTRIN
MULHILO64_CUDA_INTRIN
MULHILO64_OPENCL_INTRIN
MULHILO64_C99
U01_DOUBLE
@endverbatim
Most have obvious meanings. Some non-obvious ones:
AES_NI and AES_OPENSSL are not mutually exclusive. You can have one,
both or neither.
GNU_UINT128 says that it's safe to use __uint128_t, but it
does not require its use. In particular, it should be
used in mulhilo<uint64_t> only if MULHILO64_ASM is unset.
If the XXXINTRIN_H macros are true, then one should
@code
#include <xxxintrin.h>
@endcode
to gain accesss to compiler intrinsics.
The CXX11_SOME_FEATURE macros allow the code to use specific
features of the C++11 language and library. The catchall
In the absence of a specific CXX11_SOME_FEATURE, the feature
is controlled by the catch-all RANDOM_ITERATOR_R123_USE_CXX11 macro.
U01_DOUBLE defaults on, and can be turned off (set to 0)
if one does not want the utility functions that convert to double
(i.e. u01_*_53()), e.g. on OpenCL without the cl_khr_fp64 extension.
There are a number of invariants that are always true. Application code may
choose to rely on these:
<ul>
<li>ASM_GNU and ASM_MASM are mutually exclusive
<li>The "higher" SSE values imply the lower ones.
</ul>
There are also non-boolean valued symbols:
<ul>
<li>RANDOM_ITERATOR_R123_STATIC_INLINE -
According to both C99 and GNU99, the 'static inline' declaration allows
the compiler to not emit code if the function is not used.
Note that the semantics of 'inline', 'static' and 'extern' in
gcc have changed over time and are subject to modification by
command line options, e.g., -std=gnu89, -fgnu-inline.
Nevertheless, it appears that the meaning of 'static inline'
has not changed over time and (with a little luck) the use of 'static inline'
here will be portable between versions of gcc and to other C99
compilers.
See: http://gcc.gnu.org/onlinedocs/gcc/Inline.html
http://www.greenend.org.uk/rjk/2003/03/inline.html
<li>RANDOM_ITERATOR_R123_FORCE_INLINE(decl) -
which expands to 'decl', adorned with the compiler-specific
embellishments to strongly encourage that the declared function be
inlined. If there is no such compiler-specific magic, it should
expand to decl, unadorned.
<li>RANDOM_ITERATOR_R123_CUDA_DEVICE - which expands to __device__ (or something else with
sufficiently similar semantics) when CUDA is in use, and expands
to nothing in other cases.
<li>RANDOM_ITERATOR_R123_METAL_THREAD_ADDRESS_SPACE - which expands to 'thread' (or
something else with sufficiently similar semantics) when compiling a
Metal kernel, and expands to nothing in other cases.
<li>RANDOM_ITERATOR_R123_ASSERT(x) - which expands to assert(x), or maybe to nothing at
all if we're in an environment so feature-poor that you can't even
call assert (I'm looking at you, CUDA and OpenCL), or even include
assert.h safely (OpenCL).
<li>RANDOM_ITERATOR_R123_STATIC_ASSERT(expr,msg) - which expands to
static_assert(expr,msg), or to an expression that
will raise a compile-time exception if expr is not true.
<li>RANDOM_ITERATOR_R123_ULONG_LONG - which expands to a declaration of the longest available
unsigned integer.
<li>RANDOM_ITERATOR_R123_64BIT(x) - expands to something equivalent to
UINT64_C(x) from <stdint.h>, even in environments where <stdint.h>
is not available, e.g., MSVC and OpenCL.
<li>RANDOM_ITERATOR_R123_BUILTIN_EXPECT(expr,likely_value) - expands to something with
the semantics of gcc's __builtin_expect(expr,likely_value). If
the environment has nothing like __builtin_expect, it should expand
to just expr.
</ul>
\cond HIDDEN_FROM_DOXYGEN
*/
/*
N.B. When something is added to the list of features, it should be
added to each of the *features.h files, AND to examples/ut_features.cpp.
*/
/* N.B. most other compilers (icc, nvcc, open64, llvm) will also define __GNUC__, so order matters. */
#if defined(__METAL_MACOS__)
#include "metalfeatures.h"
#elif defined(__OPENCL_VERSION__) && __OPENCL_VERSION__ > 0
#include "openclfeatures.h"
#elif defined(__CUDACC__)
#include "nvccfeatures.h"
#elif defined(__ICC)
#include "iccfeatures.h"
#elif defined(__xlC__)
#include "xlcfeatures.h"
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
#include "sunprofeatures.h"
#elif defined(__OPEN64__)
#include "open64features.h"
#elif defined(__clang__)
#include "clangfeatures.h"
#elif defined(__GNUC__)
#include "gccfeatures.h"
#elif defined(__PGI)
#include "pgccfeatures.h"
#elif defined(_MSC_FULL_VER)
#include "msvcfeatures.h"
#else
#error "Can't identify compiler. You'll need to add a new xxfeatures.hpp"
{ /* maybe an unbalanced brace will terminate the compilation */
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11
#define RANDOM_ITERATOR_R123_USE_CXX11 (__cplusplus >= 201103L)
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_UNRESTRICTED_UNIONS
#define RANDOM_ITERATOR_R123_USE_CXX11_UNRESTRICTED_UNIONS RANDOM_ITERATOR_R123_USE_CXX11
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_STATIC_ASSERT
#define RANDOM_ITERATOR_R123_USE_CXX11_STATIC_ASSERT RANDOM_ITERATOR_R123_USE_CXX11
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_CONSTEXPR
#define RANDOM_ITERATOR_R123_USE_CXX11_CONSTEXPR RANDOM_ITERATOR_R123_USE_CXX11
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_EXPLICIT_CONVERSIONS
#define RANDOM_ITERATOR_R123_USE_CXX11_EXPLICIT_CONVERSIONS RANDOM_ITERATOR_R123_USE_CXX11
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_RANDOM
#define RANDOM_ITERATOR_R123_USE_CXX11_RANDOM RANDOM_ITERATOR_R123_USE_CXX11
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS
#define RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS RANDOM_ITERATOR_R123_USE_CXX11
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_LONG_LONG
#define RANDOM_ITERATOR_R123_USE_CXX11_LONG_LONG RANDOM_ITERATOR_R123_USE_CXX11
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_STD_ARRAY
#define RANDOM_ITERATOR_R123_USE_CXX11_STD_ARRAY RANDOM_ITERATOR_R123_USE_CXX11
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_C99
#define RANDOM_ITERATOR_R123_USE_MULHILO64_C99 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_MULHI_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_MULHI_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO32_MULHI_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO32_MULHI_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_STATIC_ASSERT
#if RANDOM_ITERATOR_R123_USE_CXX11_STATIC_ASSERT
#define RANDOM_ITERATOR_R123_STATIC_ASSERT(expr, msg) static_assert(expr, msg)
#else
/* if msg always_looked_like_this, we could paste it into the name. Worth it? */
#define RANDOM_ITERATOR_R123_STATIC_ASSERT(expr, msg) typedef char static_assertion[(!!(expr))*2-1]
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_CONSTEXPR
#if RANDOM_ITERATOR_R123_USE_CXX11_CONSTEXPR
#define RANDOM_ITERATOR_R123_CONSTEXPR constexpr
#else
#define RANDOM_ITERATOR_R123_CONSTEXPR
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_64BIT
#define RANDOM_ITERATOR_R123_USE_64BIT 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_PHILOX_64BIT
#define RANDOM_ITERATOR_R123_USE_PHILOX_64BIT (RANDOM_ITERATOR_R123_USE_64BIT && (RANDOM_ITERATOR_R123_USE_MULHILO64_ASM || RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN || RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN || RANDOM_ITERATOR_R123_USE_GNU_UINT128 || RANDOM_ITERATOR_R123_USE_MULHILO64_C99 || RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN || RANDOM_ITERATOR_R123_USE_MULHILO64_MULHI_INTRIN))
#endif
#ifndef RANDOM_ITERATOR_R123_ULONG_LONG
#if defined(__cplusplus) && !RANDOM_ITERATOR_R123_USE_CXX11_LONG_LONG
/* C++98 doesn't have long long. It doesn't have uint64_t either, but
we will have typedef'ed uint64_t to something in the xxxfeatures.h.
With luck, it won't elicit complaints from -pedantic. Cross your
fingers... */
#define RANDOM_ITERATOR_R123_ULONG_LONG uint64_t
#else
#define RANDOM_ITERATOR_R123_ULONG_LONG unsigned long long
#endif
#endif
/* UINT64_C should have been #defined by XXXfeatures.h, either by
#include <stdint.h> or through compiler-dependent hacks */
#ifndef RANDOM_ITERATOR_R123_64BIT
#define RANDOM_ITERATOR_R123_64BIT(x) UINT64_C(x)
#endif
#ifndef RANDOM_ITERATOR_R123_THROW
#define RANDOM_ITERATOR_R123_THROW(x) throw (x)
#endif
#ifndef RANDOM_ITERATOR_R123_METAL_THREAD_ADDRESS_SPACE
#define RANDOM_ITERATOR_R123_METAL_THREAD_ADDRESS_SPACE
#endif
#ifndef RANDOM_ITERATOR_R123_METAL_CONSTANT_ADDRESS_SPACE
#define RANDOM_ITERATOR_R123_METAL_CONSTANT_ADDRESS_SPACE
#endif
/*
* Windows.h (and perhaps other "well-meaning" code define min and
* max, so there's a high chance that our definition of min, max
* methods or use of std::numeric_limits min and max will cause
* complaints in any program that happened to include Windows.h or
* suchlike first. We use the null macro below in our own header
* files definition or use of min, max to defensively preclude
* this problem. It may not be enough; one might need to #define
* NOMINMAX before including Windows.h or compile with -DNOMINMAX.
*/
#define RANDOM_ITERATOR_R123_NO_MACRO_SUBST
/** \endcond */
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __gccfeatures_dot_hpp
#define __gccfeatures_dot_hpp
#define RANDOM_ITERATOR_R123_GNUC_VERSION (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__)
#if !defined(__x86_64__) && !defined(__i386__) && !defined(__powerpc__) && !defined(__arm__) && !defined(__aarch64__)
# error "This code has only been tested on x86, powerpc and a few arm platforms."
#include <including_a_nonexistent_file_will_stop_some_compilers_from_continuing_with_a_hopeless_task>
{ /* maybe an unbalanced brace will terminate the compilation */
/* Feel free to try the Random123 library on other architectures by changing
the conditions that reach this error, but you should consider it a
porting exercise and expect to encounter bugs and deficiencies.
Please let the authors know of any successes (or failures). */
#endif
#ifdef __powerpc__
#include <ppu_intrinsics.h>
#endif
#ifndef RANDOM_ITERATOR_R123_STATIC_INLINE
#define RANDOM_ITERATOR_R123_STATIC_INLINE static __inline__
#endif
#ifndef RANDOM_ITERATOR_R123_FORCE_INLINE
#if RANDOM_ITERATOR_R123_GNUC_VERSION >= 40000
#define RANDOM_ITERATOR_R123_FORCE_INLINE(decl) decl __attribute__((always_inline))
#else
#define RANDOM_ITERATOR_R123_FORCE_INLINE(decl) decl
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_CUDA_DEVICE
#define RANDOM_ITERATOR_R123_CUDA_DEVICE
#endif
#ifndef RANDOM_ITERATOR_R123_ASSERT
#include <assert.h>
#define RANDOM_ITERATOR_R123_ASSERT(x) assert(x)
#endif
#ifndef RANDOM_ITERATOR_R123_BUILTIN_EXPECT
#define RANDOM_ITERATOR_R123_BUILTIN_EXPECT(expr,likely) __builtin_expect(expr,likely)
#endif
/* According to the C++0x standard, we should be able to test the numeric
value of __cplusplus == 199701L for C++98, __cplusplus == 201103L for C++11
But gcc has had an open bug http://gcc.gnu.org/bugzilla/show_bug.cgi?id=1773
since early 2001, which was finally fixed in 4.7 (early 2012). For
earlier versions, the only way to detect whether --std=c++0x was requested
on the command line is to look at the __GCC_EXPERIMENTAL_CXX0X__ pp-symbol.
*/
#if defined(__GCC_EXPERIMENTAL_CXX0X__)
#define GNU_CXX11 (__cplusplus>=201103L || (RANDOM_ITERATOR_R123_GNUC_VERSION<40700 && 1/* defined(__GCC_EXPERIMENTAL_CXX0X__) */))
#else
#define GNU_CXX11 (__cplusplus>=201103L || (RANDOM_ITERATOR_R123_GNUC_VERSION<40700 && 0/* defined(__GCC_EXPERIMENTAL_CXX0X__) */))
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_UNRESTRICTED_UNIONS
#define RANDOM_ITERATOR_R123_USE_CXX11_UNRESTRICTED_UNIONS ((RANDOM_ITERATOR_R123_GNUC_VERSION >= 40600) && GNU_CXX11)
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_STATIC_ASSERT
#define RANDOM_ITERATOR_R123_USE_CXX11_STATIC_ASSERT ((RANDOM_ITERATOR_R123_GNUC_VERSION >= 40300) && GNU_CXX11)
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_CONSTEXPR
#define RANDOM_ITERATOR_R123_USE_CXX11_CONSTEXPR ((RANDOM_ITERATOR_R123_GNUC_VERSION >= 40600) && GNU_CXX11)
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_EXPLICIT_CONVERSIONS
#define RANDOM_ITERATOR_R123_USE_CXX11_EXPLICIT_CONVERSIONS ((RANDOM_ITERATOR_R123_GNUC_VERSION >= 40500) && GNU_CXX11)
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_RANDOM
#define RANDOM_ITERATOR_R123_USE_CXX11_RANDOM ((RANDOM_ITERATOR_R123_GNUC_VERSION>=40500) && GNU_CXX11)
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS
#define RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS ((RANDOM_ITERATOR_R123_GNUC_VERSION>=40400) && GNU_CXX11)
#endif
#ifndef RANDOM_ITERATOR_R123_USE_AES_NI
#ifdef __AES__
#define RANDOM_ITERATOR_R123_USE_AES_NI 1
#else
#define RANDOM_ITERATOR_R123_USE_AES_NI 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE4_2
#ifdef __SSE4_2__
#define RANDOM_ITERATOR_R123_USE_SSE4_2 1
#else
#define RANDOM_ITERATOR_R123_USE_SSE4_2 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE4_1
#ifdef __SSE4_1__
#define RANDOM_ITERATOR_R123_USE_SSE4_1 1
#else
#define RANDOM_ITERATOR_R123_USE_SSE4_1 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE
/* There's no point in trying to compile SSE code in Random123
unless SSE2 is available. */
#ifdef __SSE2__
#define RANDOM_ITERATOR_R123_USE_SSE 1
#else
#define RANDOM_ITERATOR_R123_USE_SSE 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_AES_OPENSSL
/* There isn't really a good way to tell at compile time whether
openssl is available. Without a pre-compilation configure-like
tool, it's less error-prone to guess that it isn't available. Add
-DRANDOM_ITERATOR_R123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to
play with openssl */
#define RANDOM_ITERATOR_R123_USE_AES_OPENSSL 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_GNU_UINT128
#if defined(__x86_64__) || defined(__aarch64__)
#define RANDOM_ITERATOR_R123_USE_GNU_UINT128 1
#else
#define RANDOM_ITERATOR_R123_USE_GNU_UINT128 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_ASM_GNU
#if (defined(__x86_64__)||defined(__i386__))
#define RANDOM_ITERATOR_R123_USE_ASM_GNU 1
#else
#define RANDOM_ITERATOR_R123_USE_ASM_GNU 1
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CPUID_MSVC
#define RANDOM_ITERATOR_R123_USE_CPUID_MSVC 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_X86INTRIN_H
#if (defined(__x86_64__)||defined(__i386__))
#define RANDOM_ITERATOR_R123_USE_X86INTRIN_H (1/* (defined(__x86_64__)||defined(__i386__)) */ && RANDOM_ITERATOR_R123_GNUC_VERSION >= 40402)
#else
#define RANDOM_ITERATOR_R123_USE_X86INTRIN_H (0/* (defined(__x86_64__)||defined(__i386__)) */ && RANDOM_ITERATOR_R123_GNUC_VERSION >= 40402)
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_IA32INTRIN_H
#define RANDOM_ITERATOR_R123_USE_IA32INTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_XMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_XMMINTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_EMMINTRIN_H
/* gcc -m64 on Solaris 10 defines __SSE2__ but doesn't have
emmintrin.h in the include search path. This is
so broken that I refuse to try to work around it. If this
affects you, figure out where your emmintrin.h lives and
add an appropriate -I to your CPPFLAGS. Or add -DRANDOM_ITERATOR_R123_USE_SSE=0. */
#define RANDOM_ITERATOR_R123_USE_EMMINTRIN_H (RANDOM_ITERATOR_R123_USE_SSE && (RANDOM_ITERATOR_R123_GNUC_VERSION < 40402))
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_SMMINTRIN_H ((RANDOM_ITERATOR_R123_USE_SSE4_1 || RANDOM_ITERATOR_R123_USE_SSE4_2) && (RANDOM_ITERATOR_R123_GNUC_VERSION < 40402))
#endif
#ifndef RANDOM_ITERATOR_R123_USE_WMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_WMMINTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_INTRIN_H
#define RANDOM_ITERATOR_R123_USE_INTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO32_ASM
#define RANDOM_ITERATOR_R123_USE_MULHILO32_ASM 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_ASM
#define RANDOM_ITERATOR_R123_USE_MULHILO64_ASM 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_MULHI_INTRIN
#if (defined(__powerpc64__))
#define RANDOM_ITERATOR_R123_USE_MULHILO64_MULHI_INTRIN 1
#else
#define RANDOM_ITERATOR_R123_USE_MULHILO64_MULHI_INTRIN 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_MULHILO64_MULHI_INTRIN
#define RANDOM_ITERATOR_R123_MULHILO64_MULHI_INTRIN __mulhdu
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO32_MULHI_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO32_MULHI_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_MULHILO32_MULHI_INTRIN
#define RANDOM_ITERATOR_R123_MULHILO32_MULHI_INTRIN __mulhwu
#endif
#ifndef __STDC_CONSTANT_MACROS
#define __STDC_CONSTANT_MACROS
#endif
#include <stdint.h>
#ifndef UINT64_C
#error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
#endif
/* If you add something, it must go in all the other XXfeatures.hpp
and in ../ut_features.cpp */
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __icpcfeatures_dot_hpp
#define __icpcfeatures_dot_hpp
// icc relies on gcc libraries and other toolchain components.
#define RANDOM_ITERATOR_R123_GNUC_VERSION (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__)
#if !defined(__x86_64__) && !defined(__i386__)
# error "This code has only been tested on x86 platforms."
{ // maybe an unbalanced brace will terminate the compilation
// You are invited to try Easy123 on other architectures, by changing
// the conditions that reach this error, but you should consider it a
// porting exercise and expect to encounter bugs and deficiencies.
// Please let the authors know of any successes (or failures).
#endif
#ifndef RANDOM_ITERATOR_R123_STATIC_INLINE
#define RANDOM_ITERATOR_R123_STATIC_INLINE static inline
#endif
#ifndef RANDOM_ITERATOR_R123_FORCE_INLINE
#define RANDOM_ITERATOR_R123_FORCE_INLINE(decl) decl __attribute__((always_inline))
#endif
#ifndef RANDOM_ITERATOR_R123_CUDA_DEVICE
#define RANDOM_ITERATOR_R123_CUDA_DEVICE
#endif
#ifndef RANDOM_ITERATOR_R123_ASSERT
#include <assert.h>
#define RANDOM_ITERATOR_R123_ASSERT(x) assert(x)
#endif
#ifndef RANDOM_ITERATOR_R123_BUILTIN_EXPECT
#define RANDOM_ITERATOR_R123_BUILTIN_EXPECT(expr,likely) __builtin_expect(expr,likely)
#endif
// The basic idiom is:
// #ifndef RANDOM_ITERATOR_R123_SOMETHING
// #if some condition
// #define RANDOM_ITERATOR_R123_SOMETHING 1
// #else
// #define RANDOM_ITERATOR_R123_SOMETHING 0
// #endif
// #endif
// This idiom allows an external user to override any decision
// in this file with a command-line -DRANDOM_ITERATOR_R123_SOMETHING=1 or -DRANDOM_ITERATOR_R123_SOMETHINE=0
// An alternative idiom is:
// #ifndef RANDOM_ITERATOR_R123_SOMETHING
// #define RANDOM_ITERATOR_R123_SOMETHING (some boolean expression)
// #endif
// where the boolean expression might contain previously-defined RANDOM_ITERATOR_R123_SOMETHING_ELSE
// pp-symbols.
#ifndef RANDOM_ITERATOR_R123_USE_SSE4_2
#ifdef __SSE4_2__
#define RANDOM_ITERATOR_R123_USE_SSE4_2 1
#else
#define RANDOM_ITERATOR_R123_USE_SSE4_2 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE4_1
#ifdef __SSE4_1__
#define RANDOM_ITERATOR_R123_USE_SSE4_1 1
#else
#define RANDOM_ITERATOR_R123_USE_SSE4_1 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE
#ifdef __SSE2__
#define RANDOM_ITERATOR_R123_USE_SSE 1
#else
#define RANDOM_ITERATOR_R123_USE_SSE 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_AES_NI
// Unlike gcc, icc (version 12) does not pre-define an __AES__
// pp-symbol when -maes or -xHost is on the command line. This feels
// like a defect in icc (it defines __SSE4_2__ in analogous
// circumstances), but until Intel fixes it, we're better off erring
// on the side of caution and not generating instructions that are
// going to raise SIGILL when executed. To get the AES-NI
// instructions with icc, the caller must puts something like
// -DRANDOM_ITERATOR_R123_USE_AES_NI=1 or -D__AES__ on the command line. FWIW, the
// AES-NI Whitepaper by Gueron says that icc has supported AES-NI from
// 11.1 onwards.
//
#if defined(__AES__)
#define RANDOM_ITERATOR_R123_USE_AES_NI ((__ICC>=1101) && 1/*defined(__AES__)*/)
#else
#define RANDOM_ITERATOR_R123_USE_AES_NI ((__ICC>=1101) && 0/*defined(__AES__)*/)
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_AES_OPENSSL
/* There isn't really a good way to tell at compile time whether
openssl is available. Without a pre-compilation configure-like
tool, it's less error-prone to guess that it isn't available. Add
-DRANDOM_ITERATOR_R123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to
play with openssl */
#define RANDOM_ITERATOR_R123_USE_AES_OPENSSL 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_GNU_UINT128
#define RANDOM_ITERATOR_R123_USE_GNU_UINT128 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_ASM_GNU
#define RANDOM_ITERATOR_R123_USE_ASM_GNU 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CPUID_MSVC
#define RANDOM_ITERATOR_R123_USE_CPUID_MSVC 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_X86INTRIN_H
#define RANDOM_ITERATOR_R123_USE_X86INTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_IA32INTRIN_H
#define RANDOM_ITERATOR_R123_USE_IA32INTRIN_H 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_XMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_XMMINTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_EMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_EMMINTRIN_H 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_SMMINTRIN_H 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_WMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_WMMINTRIN_H 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_INTRIN_H
#define RANDOM_ITERATOR_R123_USE_INTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO16_ASM
#define RANDOM_ITERATOR_R123_USE_MULHILO16_ASM 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO32_ASM
#define RANDOM_ITERATOR_R123_USE_MULHILO32_ASM 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_ASM
#define RANDOM_ITERATOR_R123_USE_MULHILO64_ASM 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN 0
#endif
#ifndef __STDC_CONSTANT_MACROS
#define __STDC_CONSTANT_MACROS
#endif
#include <stdint.h>
#ifndef UINT64_C
#error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
#endif
// If you add something, it must go in all the other XXfeatures.hpp
// and in ../ut_features.cpp
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
* Written by Tom Schoonjans <Tom.Schoonjans@me.com>
*/
#ifndef __metalfeatures_dot_hpp
#define __metalfeatures_dot_hpp
#ifndef RANDOM_ITERATOR_R123_STATIC_INLINE
#define RANDOM_ITERATOR_R123_STATIC_INLINE inline
#endif
#ifndef RANDOM_ITERATOR_R123_FORCE_INLINE
#define RANDOM_ITERATOR_R123_FORCE_INLINE(decl) decl __attribute__((always_inline))
#endif
#ifndef RANDOM_ITERATOR_R123_CUDA_DEVICE
#define RANDOM_ITERATOR_R123_CUDA_DEVICE
#endif
#ifndef RANDOM_ITERATOR_R123_METAL_THREAD_ADDRESS_SPACE
#define RANDOM_ITERATOR_R123_METAL_THREAD_ADDRESS_SPACE thread
#endif
#ifndef RANDOM_ITERATOR_R123_METAL_CONSTANT_ADDRESS_SPACE
#define RANDOM_ITERATOR_R123_METAL_CONSTANT_ADDRESS_SPACE constant
#endif
#ifndef RANDOM_ITERATOR_R123_ASSERT
#define RANDOM_ITERATOR_R123_ASSERT(x)
#endif
#ifndef RANDOM_ITERATOR_R123_BUILTIN_EXPECT
#define RANDOM_ITERATOR_R123_BUILTIN_EXPECT(expr,likely) expr
#endif
#ifndef RANDOM_ITERATOR_R123_USE_GNU_UINT128
#define RANDOM_ITERATOR_R123_USE_GNU_UINT128 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_ASM
#define RANDOM_ITERATOR_R123_USE_MULHILO64_ASM 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO32_MULHI_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO32_MULHI_INTRIN 1
#endif
#if RANDOM_ITERATOR_R123_USE_MULHILO32_MULHI_INTRIN
#include <metal_integer>
#define RANDOM_ITERATOR_R123_MULHILO32_MULHI_INTRIN metal::mulhi
#endif
#ifndef RANDOM_ITERATOR_R123_USE_AES_NI
#define RANDOM_ITERATOR_R123_USE_AES_NI 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_64BIT
#define RANDOM_ITERATOR_R123_USE_64BIT 0 /* Metal currently (Feb 2019, Specification-2) does not support 64-bit variable types */
#endif
#ifndef RANDOM_ITERATOR_R123_ULONG_LONG
/* the longest integer type in Metal (Feb 2019, Specification-2) is a
* 32-bit unsigned int. Let's hope for the best... */
#define RANDOM_ITERATOR_R123_ULONG_LONG unsigned int
#endif
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __msvcfeatures_dot_hpp
#define __msvcfeatures_dot_hpp
//#if _MSVC_FULL_VER <= 15
//#error "We've only tested MSVC_FULL_VER==15."
//#endif
#if !defined(_M_IX86) && !defined(_M_X64)
# error "This code has only been tested on x86 platforms."
{ // maybe an unbalanced brace will terminate the compilation
// You are invited to try Random123 on other architectures, by changing
// the conditions that reach this error, but you should consider it a
// porting exercise and expect to encounter bugs and deficiencies.
// Please let the authors know of any successes (or failures).
#endif
#ifndef RANDOM_ITERATOR_R123_STATIC_INLINE
#define RANDOM_ITERATOR_R123_STATIC_INLINE static __inline
#endif
#ifndef RANDOM_ITERATOR_R123_FORCE_INLINE
#define RANDOM_ITERATOR_R123_FORCE_INLINE(decl) _forceinline decl
#endif
#ifndef RANDOM_ITERATOR_R123_CUDA_DEVICE
#define RANDOM_ITERATOR_R123_CUDA_DEVICE
#endif
#ifndef RANDOM_ITERATOR_R123_ASSERT
#include <assert.h>
#define RANDOM_ITERATOR_R123_ASSERT(x) assert(x)
#endif
#ifndef RANDOM_ITERATOR_R123_BUILTIN_EXPECT
#define RANDOM_ITERATOR_R123_BUILTIN_EXPECT(expr,likely) expr
#endif
// The basic idiom is:
// #ifndef RANDOM_ITERATOR_R123_SOMETHING
// #if some condition
// #define RANDOM_ITERATOR_R123_SOMETHING 1
// #else
// #define RANDOM_ITERATOR_R123_SOMETHING 0
// #endif
// #endif
// This idiom allows an external user to override any decision
// in this file with a command-line -DRANDOM_ITERATOR_R123_SOMETHING=1 or -DRANDOM_ITERATOR_R123_SOMETHINE=0
// An alternative idiom is:
// #ifndef RANDOM_ITERATOR_R123_SOMETHING
// #define RANDOM_ITERATOR_R123_SOMETHING (some boolean expression)
// #endif
// where the boolean expression might contain previously-defined RANDOM_ITERATOR_R123_SOMETHING_ELSE
// pp-symbols.
#ifndef RANDOM_ITERATOR_R123_USE_AES_NI
#if defined(_M_X64)
#define RANDOM_ITERATOR_R123_USE_AES_NI 1
#else
#define RANDOM_ITERATOR_R123_USE_AES_NI 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE4_2
#if defined(_M_X64)
#define RANDOM_ITERATOR_R123_USE_SSE4_2 1
#else
#define RANDOM_ITERATOR_R123_USE_SSE4_2 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE4_1
#if defined(_M_X64)
#define RANDOM_ITERATOR_R123_USE_SSE4_1 1
#else
#define RANDOM_ITERATOR_R123_USE_SSE4_1 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE
#define RANDOM_ITERATOR_R123_USE_SSE 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_AES_OPENSSL
#define RANDOM_ITERATOR_R123_USE_AES_OPENSSL 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_GNU_UINT128
#define RANDOM_ITERATOR_R123_USE_GNU_UINT128 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_ASM_GNU
#define RANDOM_ITERATOR_R123_USE_ASM_GNU 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CPUID_MSVC
#define RANDOM_ITERATOR_R123_USE_CPUID_MSVC 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_X86INTRIN_H
#define RANDOM_ITERATOR_R123_USE_X86INTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_IA32INTRIN_H
#define RANDOM_ITERATOR_R123_USE_IA32INTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_XMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_XMMINTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_EMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_EMMINTRIN_H 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_SMMINTRIN_H 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_WMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_WMMINTRIN_H 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_INTRIN_H
#define RANDOM_ITERATOR_R123_USE_INTRIN_H 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO16_ASM
#define RANDOM_ITERATOR_R123_USE_MULHILO16_ASM 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO32_ASM
#define RANDOM_ITERATOR_R123_USE_MULHILO32_ASM 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_ASM
#define RANDOM_ITERATOR_R123_USE_MULHILO64_ASM 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN
#if defined(_M_X64)
#define RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN 1
#else
#define RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN 0
#endif
#ifndef __STDC_CONSTANT_MACROS
#define __STDC_CONSTANT_MACROS
#endif
#include <stdint.h>
#ifndef UINT64_C
#error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
#endif
#pragma warning(disable:4244)
#pragma warning(disable:4996)
// If you add something, it must go in all the other XXfeatures.hpp
// and in ../ut_features.cpp
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __r123_nvcc_features_dot_h__
#define __r123_nvcc_features_dot_h__
#if !defined(CUDART_VERSION)
#error "why are we in nvccfeatures.h if CUDART_VERSION is not defined"
#endif
#if CUDART_VERSION < 4010
#error "CUDA versions earlier than 4.1 produce incorrect results for some templated functions in namespaces. Random123 isunsupported. See comments in nvccfeatures.h"
// This test was added in Random123-1.08 (August, 2013) because we
// discovered that Ftype(maxTvalue<T>()) with Ftype=double and
// T=uint64_t in examples/uniform.hpp produces -1 for CUDA4.0 and
// earlier. We can't be sure this bug doesn't also affect invocations
// of other templated functions, e.g., essentially all of Random123.
// Thus, we no longer trust CUDA versions earlier than 4.1 even though
// we had previously tested and timed Random123 with CUDA 3.x and 4.0.
// If you feel lucky or desperate, you can change #error to #warning, but
// please take extra care to be sure that you are getting correct
// results.
#endif
// nvcc falls through to gcc or msvc. So first define
// a couple of things and then include either gccfeatures.h
// or msvcfeatures.h
//#ifdef __CUDA_ARCH__ allows Philox32 and Philox64 to be compiled
//for both device and host functions in CUDA by setting compiler flags
//for the device function
#ifdef __CUDA_ARCH__
#ifndef RANDOM_ITERATOR_R123_CUDA_DEVICE
#define RANDOM_ITERATOR_R123_CUDA_DEVICE __device__
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN 1
#endif
#ifndef RANDOM_ITERATOR_R123_THROW
// No exceptions in CUDA, at least upto 4.0
#define RANDOM_ITERATOR_R123_THROW(x) RANDOM_ITERATOR_R123_ASSERT(0)
#endif
#ifndef RANDOM_ITERATOR_R123_ASSERT
#define RANDOM_ITERATOR_R123_ASSERT(x) if((x)) ; else asm("trap;")
#endif
#else // ! __CUDA_ARCH__
// If we're using nvcc not compiling for the CUDA architecture,
// then we must be compiling for the host. In that case,
// tell the philox code to use the mulhilo64 asm because
// nvcc doesn't grok uint128_t.
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_ASM
#define RANDOM_ITERATOR_R123_USE_MULHILO64_ASM 1
#endif
#endif // __CUDA_ARCH__
#ifndef RANDOM_ITERATOR_R123_BUILTIN_EXPECT
#define RANDOM_ITERATOR_R123_BUILTIN_EXPECT(expr,likely) expr
#endif
#ifndef RANDOM_ITERATOR_R123_USE_AES_NI
#define RANDOM_ITERATOR_R123_USE_AES_NI 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE4_2
#define RANDOM_ITERATOR_R123_USE_SSE4_2 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE4_1
#define RANDOM_ITERATOR_R123_USE_SSE4_1 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE
#define RANDOM_ITERATOR_R123_USE_SSE 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_GNU_UINT128
#define RANDOM_ITERATOR_R123_USE_GNU_UINT128 0
#endif
#ifndef RANDOM_ITERATOR_R123_ULONG_LONG
// uint64_t, which is what we'd get without this, is
// not the same as unsigned long long
#define RANDOM_ITERATOR_R123_ULONG_LONG unsigned long long
#endif
#if defined(__GNUC__)
#include "gccfeatures.h"
#elif defined(_MSC_FULL_VER)
#include "msvcfeatures.h"
#endif
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __open64features_dot_hpp
#define __open64features_dot_hpp
/* The gcc features are mostly right. We just override a few and then include gccfeatures.h */
/* Open64 4.2.3 and 4.2.4 accept the __uint128_t code without complaint
but produce incorrect code for 64-bit philox. The MULHILO64_ASM
seems to work fine */
#ifndef RANDOM_ITERATOR_R123_USE_GNU_UINT128
#define RANDOM_ITERATOR_R123_USE_GNU_UINT128 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_ASM
#define RANDOM_ITERATOR_R123_USE_MULHILO64_ASM 1
#endif
#include "gccfeatures.h"
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __openclfeatures_dot_hpp
#define __openclfeatures_dot_hpp
#ifndef RANDOM_ITERATOR_R123_STATIC_INLINE
#define RANDOM_ITERATOR_R123_STATIC_INLINE inline
#endif
#ifndef RANDOM_ITERATOR_R123_FORCE_INLINE
#define RANDOM_ITERATOR_R123_FORCE_INLINE(decl) decl __attribute__((always_inline))
#endif
#ifndef RANDOM_ITERATOR_R123_CUDA_DEVICE
#define RANDOM_ITERATOR_R123_CUDA_DEVICE
#endif
#ifndef RANDOM_ITERATOR_R123_ASSERT
#define RANDOM_ITERATOR_R123_ASSERT(x)
#endif
#ifndef RANDOM_ITERATOR_R123_BUILTIN_EXPECT
#define RANDOM_ITERATOR_R123_BUILTIN_EXPECT(expr,likely) expr
#endif
#ifndef RANDOM_ITERATOR_R123_USE_GNU_UINT128
#define RANDOM_ITERATOR_R123_USE_GNU_UINT128 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_ASM
#define RANDOM_ITERATOR_R123_USE_MULHILO64_ASM 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_AES_NI
#define RANDOM_ITERATOR_R123_USE_AES_NI 0
#endif
// XXX ATI APP SDK 2.4 clBuildProgram SEGVs if one uses uint64_t instead of
// ulong to mul_hi. And gets lots of complaints from stdint.h
// on some machines.
// But these typedefs mean we cannot include stdint.h with
// these headers? Do we need RANDOM_ITERATOR_R123_64T, RANDOM_ITERATOR_R123_32T, RANDOM_ITERATOR_R123_8T?
typedef ulong uint64_t;
typedef uint uint32_t;
typedef uchar uint8_t;
#define UINT64_C(x) ((ulong)(x##UL))
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Copyright (c) 2013, Los Alamos National Security, LLC
All rights reserved.
Copyright 2013. Los Alamos National Security, LLC. This software was produced
under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
Laboratory (LANL), which is operated by Los Alamos National Security, LLC for
the U.S. Department of Energy. The U.S. Government has rights to use,
reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
to produce derivative works, such modified software should be clearly marked,
so as not to confuse it with the version available from LANL.
*/
#ifndef __pgccfeatures_dot_hpp
#define __pgccfeatures_dot_hpp
#if !defined(__x86_64__) && !defined(__i386__)
# error "This code has only been tested on x86 platforms."
#include <including_a_nonexistent_file_will_stop_some_compilers_from_continuing_with_a_hopeless_task>
{ /* maybe an unbalanced brace will terminate the compilation */
/* Feel free to try the Random123 library on other architectures by changing
the conditions that reach this error, but you should consider it a
porting exercise and expect to encounter bugs and deficiencies.
Please let the authors know of any successes (or failures). */
#endif
#ifndef RANDOM_ITERATOR_R123_STATIC_INLINE
#define RANDOM_ITERATOR_R123_STATIC_INLINE static inline
#endif
/* Found this example in PGI's emmintrin.h. */
#ifndef RANDOM_ITERATOR_R123_FORCE_INLINE
#define RANDOM_ITERATOR_R123_FORCE_INLINE(decl) decl __attribute__((__always_inline__))
#endif
#ifndef RANDOM_ITERATOR_R123_CUDA_DEVICE
#define RANDOM_ITERATOR_R123_CUDA_DEVICE
#endif
#ifndef RANDOM_ITERATOR_R123_ASSERT
#include <assert.h>
#define RANDOM_ITERATOR_R123_ASSERT(x) assert(x)
#endif
#ifndef RANDOM_ITERATOR_R123_BUILTIN_EXPECT
#define RANDOM_ITERATOR_R123_BUILTIN_EXPECT(expr,likely) (expr)
#endif
/* PGI through 13.2 doesn't appear to support AES-NI. */
#ifndef RANDOM_ITERATOR_R123_USE_AES_NI
#define RANDOM_ITERATOR_R123_USE_AES_NI 0
#endif
/* PGI through 13.2 appears to support MMX, SSE, SSE3, SSE3, SSSE3, SSE4a, and
ABM, but not SSE4.1 or SSE4.2. */
#ifndef RANDOM_ITERATOR_R123_USE_SSE4_2
#define RANDOM_ITERATOR_R123_USE_SSE4_2 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE4_1
#define RANDOM_ITERATOR_R123_USE_SSE4_1 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE
/* There's no point in trying to compile SSE code in Random123
unless SSE2 is available. */
#ifdef __SSE2__
#define RANDOM_ITERATOR_R123_USE_SSE 1
#else
#define RANDOM_ITERATOR_R123_USE_SSE 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_AES_OPENSSL
/* There isn't really a good way to tell at compile time whether
openssl is available. Without a pre-compilation configure-like
tool, it's less error-prone to guess that it isn't available. Add
-DRANDOM_ITERATOR_R123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to
play with openssl */
#define RANDOM_ITERATOR_R123_USE_AES_OPENSSL 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_GNU_UINT128
#define RANDOM_ITERATOR_R123_USE_GNU_UINT128 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_ASM_GNU
#define RANDOM_ITERATOR_R123_USE_ASM_GNU 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CPUID_MSVC
#define RANDOM_ITERATOR_R123_USE_CPUID_MSVC 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_X86INTRIN_H
#define RANDOM_ITERATOR_R123_USE_X86INTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_IA32INTRIN_H
#define RANDOM_ITERATOR_R123_USE_IA32INTRIN_H 0
#endif
/* emmintrin.h from PGI #includes xmmintrin.h but then complains at link time
about undefined references to _mm_castsi128_ps(__m128i). Why? */
#ifndef RANDOM_ITERATOR_R123_USE_XMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_XMMINTRIN_H 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_EMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_EMMINTRIN_H 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_SMMINTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_WMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_WMMINTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_INTRIN_H
#ifdef __ABM__
#define RANDOM_ITERATOR_R123_USE_INTRIN_H 1
#else
#define RANDOM_ITERATOR_R123_USE_INTRIN_H 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO32_ASM
#define RANDOM_ITERATOR_R123_USE_MULHILO32_ASM 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_MULHI_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_MULHI_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_ASM
#define RANDOM_ITERATOR_R123_USE_MULHILO64_ASM 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN 0
#endif
#ifndef __STDC_CONSTANT_MACROS
#define __STDC_CONSTANT_MACROS
#endif
#include <stdint.h>
#ifndef UINT64_C
#error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
#endif
/* If you add something, it must go in all the other XXfeatures.hpp
and in ../ut_features.cpp */
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _Random123_sse_dot_h__
#define _Random123_sse_dot_h__
#if RANDOM_ITERATOR_R123_USE_SSE
#if RANDOM_ITERATOR_R123_USE_X86INTRIN_H
#include <x86intrin.h>
#endif
#if RANDOM_ITERATOR_R123_USE_IA32INTRIN_H
#include <ia32intrin.h>
#endif
#if RANDOM_ITERATOR_R123_USE_XMMINTRIN_H
#include <xmmintrin.h>
#endif
#if RANDOM_ITERATOR_R123_USE_EMMINTRIN_H
#include <emmintrin.h>
#endif
#if RANDOM_ITERATOR_R123_USE_SMMINTRIN_H
#include <smmintrin.h>
#endif
#if RANDOM_ITERATOR_R123_USE_WMMINTRIN_H
#include <wmmintrin.h>
#endif
#if RANDOM_ITERATOR_R123_USE_INTRIN_H
#include <intrin.h>
#endif
#ifdef __cplusplus
#include <iostream>
#include <limits>
#include <stdexcept>
#endif
#if RANDOM_ITERATOR_R123_USE_ASM_GNU
/* bit25 of CX tells us whether AES is enabled. */
RANDOM_ITERATOR_R123_STATIC_INLINE int haveAESNI(){
unsigned int eax, ebx, ecx, edx;
__asm__ __volatile__ ("cpuid": "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) :
"a" (1));
return (ecx>>25) & 1;
}
#elif RANDOM_ITERATOR_R123_USE_CPUID_MSVC
RANDOM_ITERATOR_R123_STATIC_INLINE int haveAESNI(){
int CPUInfo[4];
__cpuid(CPUInfo, 1);
return (CPUInfo[2]>>25)&1;
}
#else /* RANDOM_ITERATOR_R123_USE_CPUID_??? */
#warning "No RANDOM_ITERATOR_R123_USE_CPUID_XXX method chosen. haveAESNI will always return false"
RANDOM_ITERATOR_R123_STATIC_INLINE int haveAESNI(){
return 0;
}
#endif /* RANDOM_ITERATOR_R123_USE_ASM_GNU || RANDOM_ITERATOR_R123_USE_CPUID_MSVC */
// There is a lot of annoying and inexplicable variation in the
// SSE intrinsics available in different compilation environments.
// The details seem to depend on the compiler, the version and
// the target architecture. Rather than insisting on
// RANDOM_ITERATOR_R123_USE_feature tests for each of these in each of the
// compilerfeatures.h files we just keep the complexity localized
// to here...
#if (defined(__ICC) && __ICC<1210) || (defined(_MSC_VER) && !defined(_WIN64))
/* Is there an intrinsic to assemble an __m128i from two 64-bit words?
If not, use the 4x32-bit intrisic instead. N.B. It looks like Intel
added _mm_set_epi64x to icc version 12.1 in Jan 2012.
*/
RANDOM_ITERATOR_R123_STATIC_INLINE __m128i _mm_set_epi64x(uint64_t v1, uint64_t v0){
union{
uint64_t u64;
uint32_t u32[2];
} u1, u0;
u1.u64 = v1;
u0.u64 = v0;
return _mm_set_epi32(u1.u32[1], u1.u32[0], u0.u32[1], u0.u32[0]);
}
#endif
/* _mm_extract_lo64 abstracts the task of extracting the low 64-bit
word from an __m128i. The _mm_cvtsi128_si64 intrinsic does the job
on 64-bit platforms. Unfortunately, both MSVC and Open64 fail
assertions in ut_M128.cpp and ut_carray.cpp when we use the
_mm_cvtsi128_si64 intrinsic. (See
https://bugs.open64.net/show_bug.cgi?id=873 for the Open64 bug).
On 32-bit platforms, there's no MOVQ, so there's no intrinsic.
Finally, even if the intrinsic exists, it may be spelled with or
without the 'x'.
*/
#if !defined(__x86_64__) || defined(_MSC_VER) || defined(__OPEN64__)
RANDOM_ITERATOR_R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){
union{
uint64_t u64[2];
__m128i m;
}u;
_mm_store_si128(&u.m, si);
return u.u64[0];
}
#elif defined(__llvm__) || defined(__ICC)
RANDOM_ITERATOR_R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){
return (uint64_t)_mm_cvtsi128_si64(si);
}
#else /* GNUC, others */
/* FWIW, gcc's emmintrin.h has had the 'x' spelling
since at least gcc-3.4.4. The no-'x' spelling showed up
around 4.2. */
RANDOM_ITERATOR_R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){
return (uint64_t)_mm_cvtsi128_si64x(si);
}
#endif
#if defined(__GNUC__) && __GNUC__ < 4
/* the cast builtins showed up in gcc4. */
RANDOM_ITERATOR_R123_STATIC_INLINE __m128 _mm_castsi128_ps(__m128i si){
return (__m128)si;
}
#endif
#ifdef __cplusplus
struct r123m128i{
__m128i m;
#if RANDOM_ITERATOR_R123_USE_CXX11_UNRESTRICTED_UNIONS
// C++98 forbids a union member from having *any* constructors.
// C++11 relaxes this, and allows union members to have constructors
// as long as there is a "trivial" default construtor. So in C++11
// we can provide a r123m128i constructor with an __m128i argument, and still
// have the default (and hence trivial) default constructor.
r123m128i() = default;
r123m128i(__m128i _m): m(_m){}
#endif
r123m128i& operator=(const __m128i& rhs){ m=rhs; return *this;}
r123m128i& operator=(RANDOM_ITERATOR_R123_ULONG_LONG n){ m = _mm_set_epi64x(0, n); return *this;}
#if RANDOM_ITERATOR_R123_USE_CXX11_EXPLICIT_CONVERSIONS
// With C++11 we can attach explicit to the bool conversion operator
// to disambiguate undesired promotions. For g++, this works
// only in 4.5 and above.
explicit operator bool() const {return _bool();}
#else
// Pre-C++11, we have to do something else. Google for the "safe bool"
// idiom for other ideas...
operator const void*() const{return _bool()?this:0;}
#endif
operator __m128i() const {return m;}
private:
#if RANDOM_ITERATOR_R123_USE_SSE4_1
bool _bool() const{ return !_mm_testz_si128(m,m); }
#else
bool _bool() const{ return 0xf != _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(m, _mm_setzero_si128()))); }
#endif
};
RANDOM_ITERATOR_R123_STATIC_INLINE r123m128i& operator++(r123m128i& v){
__m128i& c = v.m;
__m128i zeroone = _mm_set_epi64x(RANDOM_ITERATOR_R123_64BIT(0), RANDOM_ITERATOR_R123_64BIT(1));
c = _mm_add_epi64(c, zeroone);
//return c;
#if RANDOM_ITERATOR_R123_USE_SSE4_1
__m128i zerofff = _mm_set_epi64x(0, ~(RANDOM_ITERATOR_R123_64BIT(0)));
if( RANDOM_ITERATOR_R123_BUILTIN_EXPECT(_mm_testz_si128(c,zerofff), 0) ){
__m128i onezero = _mm_set_epi64x(RANDOM_ITERATOR_R123_64BIT(1), RANDOM_ITERATOR_R123_64BIT(0));
c = _mm_add_epi64(c, onezero);
}
#else
unsigned mask = _mm_movemask_ps( _mm_castsi128_ps(_mm_cmpeq_epi32(c, _mm_setzero_si128())));
// The low two bits of mask are 11 iff the low 64 bits of
// c are zero.
if( RANDOM_ITERATOR_R123_BUILTIN_EXPECT((mask&0x3) == 0x3, 0) ){
__m128i onezero = _mm_set_epi64x(1,0);
c = _mm_add_epi64(c, onezero);
}
#endif
return v;
}
RANDOM_ITERATOR_R123_STATIC_INLINE r123m128i& operator+=(r123m128i& lhs, RANDOM_ITERATOR_R123_ULONG_LONG n){
__m128i c = lhs.m;
__m128i incr128 = _mm_set_epi64x(0, n);
c = _mm_add_epi64(c, incr128);
// return c; // NO CARRY!
int64_t lo64 = _mm_extract_lo64(c);
if((uint64_t)lo64 < n)
c = _mm_add_epi64(c, _mm_set_epi64x(1,0));
lhs.m = c;
return lhs;
}
// We need this one because it's present, but never used in r123array1xm128i::incr
RANDOM_ITERATOR_R123_STATIC_INLINE bool operator<=(RANDOM_ITERATOR_R123_ULONG_LONG, const r123m128i &){
throw std::runtime_error("operator<=(unsigned long long, r123m128i) is unimplemented.");}
// The comparisons aren't implemented, but if we leave them out, and
// somebody writes, e.g., M1 < M2, the compiler will do an implicit
// conversion through void*. Sigh...
RANDOM_ITERATOR_R123_STATIC_INLINE bool operator<(const r123m128i&, const r123m128i&){
throw std::runtime_error("operator<(r123m128i, r123m128i) is unimplemented.");}
RANDOM_ITERATOR_R123_STATIC_INLINE bool operator<=(const r123m128i&, const r123m128i&){
throw std::runtime_error("operator<=(r123m128i, r123m128i) is unimplemented.");}
RANDOM_ITERATOR_R123_STATIC_INLINE bool operator>(const r123m128i&, const r123m128i&){
throw std::runtime_error("operator>(r123m128i, r123m128i) is unimplemented.");}
RANDOM_ITERATOR_R123_STATIC_INLINE bool operator>=(const r123m128i&, const r123m128i&){
throw std::runtime_error("operator>=(r123m128i, r123m128i) is unimplemented.");}
RANDOM_ITERATOR_R123_STATIC_INLINE bool operator==(const r123m128i &lhs, const r123m128i &rhs){
return 0xf==_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(lhs, rhs))); }
RANDOM_ITERATOR_R123_STATIC_INLINE bool operator!=(const r123m128i &lhs, const r123m128i &rhs){
return !(lhs==rhs);}
RANDOM_ITERATOR_R123_STATIC_INLINE bool operator==(RANDOM_ITERATOR_R123_ULONG_LONG lhs, const r123m128i &rhs){
r123m128i LHS; LHS.m=_mm_set_epi64x(0, lhs); return LHS == rhs; }
RANDOM_ITERATOR_R123_STATIC_INLINE bool operator!=(RANDOM_ITERATOR_R123_ULONG_LONG lhs, const r123m128i &rhs){
return !(lhs==rhs);}
RANDOM_ITERATOR_R123_STATIC_INLINE std::ostream& operator<<(std::ostream& os, const r123m128i& m){
union{
uint64_t u64[2];
__m128i m;
}u;
_mm_storeu_si128(&u.m, m.m);
return os << u.u64[0] << " " << u.u64[1];
}
RANDOM_ITERATOR_R123_STATIC_INLINE std::istream& operator>>(std::istream& is, r123m128i& m){
uint64_t u64[2];
is >> u64[0] >> u64[1];
m.m = _mm_set_epi64x(u64[1], u64[0]);
return is;
}
template<typename T> inline T assemble_from_u32(uint32_t *p32); // forward declaration
template <>
inline r123m128i assemble_from_u32<r123m128i>(uint32_t *p32){
r123m128i ret;
ret.m = _mm_set_epi32(p32[3], p32[2], p32[1], p32[0]);
return ret;
}
#else
typedef struct {
__m128i m;
} r123m128i;
#endif /* __cplusplus */
#else /* !RANDOM_ITERATOR_R123_USE_SSE */
RANDOM_ITERATOR_R123_STATIC_INLINE int haveAESNI(){
return 0;
}
#endif /* RANDOM_ITERATOR_R123_USE_SSE */
#endif /* _Random123_sse_dot_h__ */
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __sunprofeatures_dot_hpp
#define __sunprofeatures_dot_hpp
#ifndef RANDOM_ITERATOR_R123_STATIC_INLINE
#define RANDOM_ITERATOR_R123_STATIC_INLINE static inline
#endif
#ifndef RANDOM_ITERATOR_R123_FORCE_INLINE
#define RANDOM_ITERATOR_R123_FORCE_INLINE(decl) decl
#endif
#ifndef RANDOM_ITERATOR_R123_CUDA_DEVICE
#define RANDOM_ITERATOR_R123_CUDA_DEVICE
#endif
#ifndef RANDOM_ITERATOR_R123_ASSERT
#include <assert.h>
#define RANDOM_ITERATOR_R123_ASSERT(x) assert(x)
#endif
#ifndef RANDOM_ITERATOR_R123_BUILTIN_EXPECT
#define RANDOM_ITERATOR_R123_BUILTIN_EXPECT(expr,likely) expr
#endif
// The basic idiom is:
// #ifndef RANDOM_ITERATOR_R123_SOMETHING
// #if some condition
// #define RANDOM_ITERATOR_R123_SOMETHING 1
// #else
// #define RANDOM_ITERATOR_R123_SOMETHING 0
// #endif
// #endif
// This idiom allows an external user to override any decision
// in this file with a command-line -DRANDOM_ITERATOR_R123_SOMETHING=1 or -DRANDOM_ITERATOR_R123_SOMETHINE=0
// An alternative idiom is:
// #ifndef RANDOM_ITERATOR_R123_SOMETHING
// #define RANDOM_ITERATOR_R123_SOMETHING (some boolean expression)
// #endif
// where the boolean expression might contain previously-defined RANDOM_ITERATOR_R123_SOMETHING_ELSE
// pp-symbols.
#ifndef RANDOM_ITERATOR_R123_USE_AES_NI
#define RANDOM_ITERATOR_R123_USE_AES_NI 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE4_2
#define RANDOM_ITERATOR_R123_USE_SSE4_2 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE4_1
#define RANDOM_ITERATOR_R123_USE_SSE4_1 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE
#define RANDOM_ITERATOR_R123_USE_SSE 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_AES_OPENSSL
#define RANDOM_ITERATOR_R123_USE_AES_OPENSSL 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_GNU_UINT128
#define RANDOM_ITERATOR_R123_USE_GNU_UINT128 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_ASM_GNU
#define RANDOM_ITERATOR_R123_USE_ASM_GNU 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CPUID_MSVC
#define RANDOM_ITERATOR_R123_USE_CPUID_MSVC 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_X86INTRIN_H
#define RANDOM_ITERATOR_R123_USE_X86INTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_IA32INTRIN_H
#define RANDOM_ITERATOR_R123_USE_IA32INTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_XMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_XMMINTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_EMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_EMMINTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_SMMINTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_WMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_WMMINTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_INTRIN_H
#define RANDOM_ITERATOR_R123_USE_INTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO16_ASM
#define RANDOM_ITERATOR_R123_USE_MULHILO16_ASM 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO32_ASM
#define RANDOM_ITERATOR_R123_USE_MULHILO32_ASM 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_ASM
#define RANDOM_ITERATOR_R123_USE_MULHILO64_ASM 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_PHILOX_64BIT
#define RANDOM_ITERATOR_R123_USE_PHILOX_64BIT 0
#endif
#ifndef __STDC_CONSTANT_MACROS
#define __STDC_CONSTANT_MACROS
#endif
#include <stdint.h>
#ifndef UINT64_C
#error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
#endif
// If you add something, it must go in all the other XXfeatures.hpp
// and in ../ut_features.cpp
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Copyright (c) 2013, Los Alamos National Security, LLC
All rights reserved.
Copyright 2013. Los Alamos National Security, LLC. This software was produced
under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
Laboratory (LANL), which is operated by Los Alamos National Security, LLC for
the U.S. Department of Energy. The U.S. Government has rights to use,
reproduce, and distribute this software. NEITHER THE GOVERNMENT NOR LOS
ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE. If software is modified
to produce derivative works, such modified software should be clearly marked,
so as not to confuse it with the version available from LANL.
*/
#ifndef __xlcfeatures_dot_hpp
#define __xlcfeatures_dot_hpp
#if !defined(__x86_64__) && !defined(__i386__) && !defined(__powerpc__)
# error "This code has only been tested on x86 and PowerPC platforms."
#include <including_a_nonexistent_file_will_stop_some_compilers_from_continuing_with_a_hopeless_task>
{ /* maybe an unbalanced brace will terminate the compilation */
/* Feel free to try the Random123 library on other architectures by changing
the conditions that reach this error, but you should consider it a
porting exercise and expect to encounter bugs and deficiencies.
Please let the authors know of any successes (or failures). */
#endif
#ifdef __cplusplus
/* builtins are automatically available to xlc. To use them with xlc++,
one must include builtins.h. c.f
http://publib.boulder.ibm.com/infocenter/cellcomp/v101v121/index.jsp?topic=/com.ibm.xlcpp101.cell.doc/compiler_ref/compiler_builtins.html
*/
#include <builtins.h>
#endif
#ifndef RANDOM_ITERATOR_R123_STATIC_INLINE
#define RANDOM_ITERATOR_R123_STATIC_INLINE static inline
#endif
#ifndef RANDOM_ITERATOR_R123_FORCE_INLINE
#define RANDOM_ITERATOR_R123_FORCE_INLINE(decl) decl __attribute__((__always_inline__))
#endif
#ifndef RANDOM_ITERATOR_R123_CUDA_DEVICE
#define RANDOM_ITERATOR_R123_CUDA_DEVICE
#endif
#ifndef RANDOM_ITERATOR_R123_ASSERT
#include <assert.h>
#define RANDOM_ITERATOR_R123_ASSERT(x) assert(x)
#endif
#ifndef RANDOM_ITERATOR_R123_BUILTIN_EXPECT
#define RANDOM_ITERATOR_R123_BUILTIN_EXPECT(expr,likely) __builtin_expect(expr,likely)
#endif
#ifndef RANDOM_ITERATOR_R123_USE_AES_NI
#define RANDOM_ITERATOR_R123_USE_AES_NI 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE4_2
#define RANDOM_ITERATOR_R123_USE_SSE4_2 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE4_1
#define RANDOM_ITERATOR_R123_USE_SSE4_1 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SSE
#define RANDOM_ITERATOR_R123_USE_SSE 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_AES_OPENSSL
/* There isn't really a good way to tell at compile time whether
openssl is available. Without a pre-compilation configure-like
tool, it's less error-prone to guess that it isn't available. Add
-DRANDOM_ITERATOR_R123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to
play with openssl */
#define RANDOM_ITERATOR_R123_USE_AES_OPENSSL 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_GNU_UINT128
#define RANDOM_ITERATOR_R123_USE_GNU_UINT128 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_ASM_GNU
#define RANDOM_ITERATOR_R123_USE_ASM_GNU 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CPUID_MSVC
#define RANDOM_ITERATOR_R123_USE_CPUID_MSVC 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_X86INTRIN_H
#define RANDOM_ITERATOR_R123_USE_X86INTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_IA32INTRIN_H
#define RANDOM_ITERATOR_R123_USE_IA32INTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_XMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_XMMINTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_EMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_EMMINTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_SMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_SMMINTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_WMMINTRIN_H
#define RANDOM_ITERATOR_R123_USE_WMMINTRIN_H 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_INTRIN_H
#ifdef __ABM__
#define RANDOM_ITERATOR_R123_USE_INTRIN_H 1
#else
#define RANDOM_ITERATOR_R123_USE_INTRIN_H 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO32_ASM
#define RANDOM_ITERATOR_R123_USE_MULHILO32_ASM 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_MULHI_INTRIN
#if (defined(__powerpc64__))
#define RANDOM_ITERATOR_R123_USE_MULHILO64_MULHI_INTRIN 1
#else
#define RANDOM_ITERATOR_R123_USE_MULHILO64_MULHI_INTRIN 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_MULHILO64_MULHI_INTRIN
#define RANDOM_ITERATOR_R123_MULHILO64_MULHI_INTRIN __mulhdu
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO32_MULHI_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO32_MULHI_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_MULHILO32_MULHI_INTRIN
#define RANDOM_ITERATOR_R123_MULHILO32_MULHI_INTRIN __mulhwu
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_ASM
#if defined(__powerpc64__)
#define RANDOM_ITERATOR_R123_USE_MULHILO64_ASM (1 /*defined(__powerpc64__)*/ && !(RANDOM_ITERATOR_R123_USE_MULHILO64_MULHI_INTRIN))
#else
#define RANDOM_ITERATOR_R123_USE_MULHILO64_ASM (0 /*defined(__powerpc64__)*/ && !(RANDOM_ITERATOR_R123_USE_MULHILO64_MULHI_INTRIN))
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN 0
#endif
#ifndef __STDC_CONSTANT_MACROS
#define __STDC_CONSTANT_MACROS
#endif
#include <stdint.h>
#ifndef UINT64_C
#error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
#endif
/* If you add something, it must go in all the other XXfeatures.hpp
and in ../ut_features.cpp */
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __r123_gslmicrorng_dot_h__
#define __r123_gslmicrorng_dot_h__
#include <gsl/gsl_rng.h>
#include <string.h>
/** The macro: GSL_MICRORNG(NAME, CBRNGNAME) is the GSL
analog analog of the C++ r123::MicroURNG template. It declares a gsl_rng
type named gsl_rng_NAME which uses the underlying CBRNGNAME
and can be invoked a limited number of times between calls to NAME_reset.
When the underlying CBRNG's \c ctr_t is an \ref arrayNxW "r123arrayNxW",
and the gsl_rng_NAME may called up to \c N*2^32 times
between calls to \c NAME_reset.
\c NAME_reset takes a gsl_rng_NAME type, a counter and a key as arguments.
It restarts the micro-rng with a new base counter and key.
Note that you must call NAME_reset before the first use
of a gsl_rng. NAME_reset is not called automatically by
gsl_rng_alloc().
@code
#include <Random123/threefry.h>
#include <Random123/gsl_microrng.h> // this file
GSL_MICRORNG(microcbrng, threefry4x64, 20) // creates gsl_rng_microcbrng
int main(int argc, char** argv) {
gsl_rng *r = gsl_rng_alloc(gsl_rng_microcbrng);
threefry4x64_ctr_t c = {{}};
threefry4x64_key_t k = {{}};
for (...) {
c.v[0] = ??; // some application variable
microcbrng_reset(r, c, k);
for (...) {
// gaussian calls r several times. It is safe for
// r to be used upto 2^20 times in this loop
something[i] = gsl_ran_gaussian(r, 1.5);
}
}
}
@endcode
*/
#define GSL_MICRORNG(NAME, CBRNGNAME) \
const gsl_rng_type *gsl_rng_##NAME; \
\
typedef struct{ \
CBRNGNAME##_ctr_t ctr; \
CBRNGNAME##_ctr_t r; \
CBRNGNAME##_key_t key; \
RANDOM_ITERATOR_R123_ULONG_LONG n; \
int elem; \
} NAME##_state; \
\
static unsigned long int NAME##_get(void *vstate){ \
NAME##_state *st = (NAME##_state *)vstate; \
const int N=sizeof(st->ctr.v)/sizeof(st->ctr.v[0]); \
if( st->elem == 0 ){ \
CBRNGNAME##_ctr_t c = st->ctr; \
c.v[N-1] |= st->n<<(RANDOM_ITERATOR_R123_W(CBRNGNAME##_ctr_t)-32); \
st->n++; \
st->r = CBRNGNAME(c, st->key); \
st->elem = N; \
} \
return 0xffffffff & st->r.v[--st->elem]; \
} \
\
static double \
NAME##_get_double (void * vstate) \
{ \
return NAME##_get (vstate)/4294967296.; \
} \
\
static void NAME##_set(void *vstate, unsigned long int s){ \
NAME##_state *st = (NAME##_state *)vstate; \
(void)s; /* ignored */ \
st->elem = 0; \
st->n = ~0; /* will abort if _reset is not called */ \
} \
\
static const gsl_rng_type NAME##_type = { \
#NAME, \
0xffffffffUL, \
0, \
sizeof(NAME##_state), \
&NAME##_set, \
&NAME##_get, \
&NAME##_get_double \
}; \
\
RANDOM_ITERATOR_R123_STATIC_INLINE void NAME##_reset(const gsl_rng* gr, CBRNGNAME##_ctr_t c, CBRNGNAME##_key_t k) { \
NAME##_state* state = (NAME##_state *)gr->state; \
state->ctr = c; \
state->key = k; \
state->n = 0; \
state->elem = 0; \
} \
\
const gsl_rng_type *gsl_rng_##NAME = &NAME##_type
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _philox_dot_h_
#define _philox_dot_h_
/** \cond HIDDEN_FROM_DOXYGEN */
#include "features/compilerfeatures.h"
#include "array.h"
/*
// Macros _Foo_tpl are code generation 'templates' They define
// inline functions with names obtained by mangling Foo and the
// macro arguments. E.g.,
// _mulhilo_tpl(32, uint32_t, uint64_t)
// expands to a definition of:
// mulhilo32(uint32_t, uint32_t, uint32_t *, uint32_t *)
// We then 'instantiate the template' to define
// several different functions, e.g.,
// mulhilo32
// mulhilo64
// These functions will be visible to user code, and may
// also be used later in subsequent templates and definitions.
// A template for mulhilo using a temporary of twice the word-width.
// Gcc figures out that this can be reduced to a single 'mul' instruction,
// despite the apparent use of double-wide variables, shifts, etc. It's
// obviously not guaranteed that all compilers will be that smart, so
// other implementations might be preferable, e.g., using an intrinsic
// or an asm block. On the other hand, for 32-bit multiplies,
// this *is* perfectly standard C99 - any C99 compiler should
// understand it and produce correct code. For 64-bit multiplies,
// it's only usable if the compiler recognizes that it can do
// arithmetic on a 128-bit type. That happens to be true for gcc on
// x86-64, and powerpc64 but not much else.
*/
#define _mulhilo_dword_tpl(W, Word, Dword) \
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word* hip){ \
Dword product = ((Dword)a)*((Dword)b); \
*hip = product>>W; \
return (Word)product; \
}
/*
// A template for mulhilo using gnu-style asm syntax.
// INSN can be "mulw", "mull" or "mulq".
// FIXME - porting to other architectures, we'll need still-more conditional
// branching here. Note that intrinsics are usually preferable.
*/
#ifdef __powerpc__
#define _mulhilo_asm_tpl(W, Word, INSN) \
RANDOM_ITERATOR_R123_STATIC_INLINE Word mulhilo##W(Word ax, Word b, Word *hip){ \
Word dx = 0; \
__asm__("\n\t" \
INSN " %0,%1,%2\n\t" \
: "=r"(dx) \
: "r"(b), "r"(ax) \
); \
*hip = dx; \
return ax*b; \
}
#else
#define _mulhilo_asm_tpl(W, Word, INSN) \
RANDOM_ITERATOR_R123_STATIC_INLINE Word mulhilo##W(Word ax, Word b, Word *hip){ \
Word dx; \
__asm__("\n\t" \
INSN " %2\n\t" \
: "=a"(ax), "=d"(dx) \
: "r"(b), "0"(ax) \
); \
*hip = dx; \
return ax; \
}
#endif /* __powerpc__ */
/*
// A template for mulhilo using MSVC-style intrinsics
// For example,_umul128 is an msvc intrinsic, c.f.
// http://msdn.microsoft.com/en-us/library/3dayytw9.aspx
*/
#define _mulhilo_msvc_intrin_tpl(W, Word, INTRIN) \
RANDOM_ITERATOR_R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word* hip){ \
return INTRIN(a, b, hip); \
}
/* N.B. This really should be called _mulhilo_mulhi_intrin. It just
happens that CUDA was the first time we used the idiom. */
#define _mulhilo_cuda_intrin_tpl(W, Word, INTRIN) \
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, RANDOM_ITERATOR_R123_METAL_THREAD_ADDRESS_SPACE Word* hip){ \
*hip = INTRIN(a, b); \
return a*b; \
}
/*
// A template for mulhilo using only word-size operations and
// C99 operators (no adc, no mulhi). It
// requires four multiplies and a dozen or so shifts, adds
// and tests. It's *SLOW*. It can be used to
// implement philoxNx32 on platforms that completely lack
// 64-bit types, e.g., Metal.
// On 32-bit platforms, it could be used to
// implement philoxNx64, but on such platforms both the philoxNx32
// and the threefryNx64 cbrngs are going to have much better
// performance. It is enabled below by RANDOM_ITERATOR_R123_USE_MULHILO64_C99,
// but that is currently (Feb 2019) only set by
// features/metalfeatures.h headers. It can, of course, be
// set with a compile-time -D option.
*/
#define _mulhilo_c99_tpl(W, Word) \
RANDOM_ITERATOR_R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, RANDOM_ITERATOR_R123_METAL_THREAD_ADDRESS_SPACE Word *hip){ \
const unsigned WHALF = W/2; \
const Word LOMASK = ((((Word)1)<<WHALF)-1); \
Word lo = a*b; /* full low multiply */ \
Word ahi = a>>WHALF; \
Word alo = a& LOMASK; \
Word bhi = b>>WHALF; \
Word blo = b& LOMASK; \
\
Word ahbl = ahi*blo; \
Word albh = alo*bhi; \
\
Word ahbl_albh = ((ahbl&LOMASK) + (albh&LOMASK)); \
Word hi = ahi*bhi + (ahbl>>WHALF) + (albh>>WHALF); \
hi += ahbl_albh >> WHALF; /* carry from the sum of lo(ahbl) + lo(albh) ) */ \
/* carry from the sum with alo*blo */ \
hi += ((lo >> WHALF) < (ahbl_albh&LOMASK)); \
*hip = hi; \
return lo; \
}
/*
// A template for mulhilo on a platform that can't do it
// We could put a C version here, but is it better to run *VERY*
// slowly or to just stop and force the user to find another CBRNG?
*/
#define _mulhilo_fail_tpl(W, Word) \
RANDOM_ITERATOR_R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word *hip){ \
RANDOM_ITERATOR_R123_STATIC_ASSERT(0, "mulhilo" #W " is not implemented on this machine\n"); \
}
/*
// N.B. There's an MSVC intrinsic called _emul,
// which *might* compile into better code than
// _mulhilo_dword_tpl
*/
#if RANDOM_ITERATOR_R123_USE_MULHILO32_ASM
#ifdef __powerpc__
_mulhilo_asm_tpl(32, uint32_t, "mulhwu")
#else
_mulhilo_asm_tpl(32, uint32_t, "mull")
#endif /* __powerpc__ */
#else
#if RANDOM_ITERATOR_R123_USE_64BIT
_mulhilo_dword_tpl(32, uint32_t, uint64_t)
#elif RANDOM_ITERATOR_R123_USE_MULHILO32_MULHI_INTRIN
_mulhilo_cuda_intrin_tpl(32, uint32_t, RANDOM_ITERATOR_R123_MULHILO32_MULHI_INTRIN)
#else
_mulhilo_c99_tpl(32, uint32_t)
#endif
#endif
#if RANDOM_ITERATOR_R123_USE_PHILOX_64BIT
#if RANDOM_ITERATOR_R123_USE_MULHILO64_ASM
#ifdef __powerpc64__
_mulhilo_asm_tpl(64, uint64_t, "mulhdu")
#else
_mulhilo_asm_tpl(64, uint64_t, "mulq")
#endif /* __powerpc64__ */
#elif RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN
_mulhilo_msvc_intrin_tpl(64, uint64_t, _umul128)
#elif RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN
_mulhilo_cuda_intrin_tpl(64, uint64_t, __umul64hi)
#elif RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN
_mulhilo_cuda_intrin_tpl(64, uint64_t, mul_hi)
#elif RANDOM_ITERATOR_R123_USE_MULHILO64_MULHI_INTRIN
_mulhilo_cuda_intrin_tpl(64, uint64_t, RANDOM_ITERATOR_R123_MULHILO64_MULHI_INTRIN)
#elif RANDOM_ITERATOR_R123_USE_GNU_UINT128
_mulhilo_dword_tpl(64, uint64_t, __uint128_t)
#elif RANDOM_ITERATOR_R123_USE_MULHILO64_C99
_mulhilo_c99_tpl(64, uint64_t)
#else
_mulhilo_fail_tpl(64, uint64_t)
#endif
#endif
/*
// The multipliers and Weyl constants are "hard coded".
// To change them, you can #define them with different
// values before #include-ing this file.
// This isn't terribly elegant, but it works for C as
// well as C++. A nice C++-only solution would be to
// use template parameters in the style of <random>
*/
#ifndef PHILOX_M2x64_0
#define PHILOX_M2x64_0 RANDOM_ITERATOR_R123_64BIT(0xD2B74407B1CE6E93)
#endif
#ifndef PHILOX_M4x64_0
#define PHILOX_M4x64_0 RANDOM_ITERATOR_R123_64BIT(0xD2E7470EE14C6C93)
#endif
#ifndef PHILOX_M4x64_1
#define PHILOX_M4x64_1 RANDOM_ITERATOR_R123_64BIT(0xCA5A826395121157)
#endif
#ifndef PHILOX_M2x32_0
#define PHILOX_M2x32_0 ((uint32_t)0xd256d193)
#endif
#ifndef PHILOX_M4x32_0
#define PHILOX_M4x32_0 ((uint32_t)0xD2511F53)
#endif
#ifndef PHILOX_M4x32_1
#define PHILOX_M4x32_1 ((uint32_t)0xCD9E8D57)
#endif
#ifndef PHILOX_W64_0
#define PHILOX_W64_0 RANDOM_ITERATOR_R123_64BIT(0x9E3779B97F4A7C15) /* golden ratio */
#endif
#ifndef PHILOX_W64_1
#define PHILOX_W64_1 RANDOM_ITERATOR_R123_64BIT(0xBB67AE8584CAA73B) /* sqrt(3)-1 */
#endif
#ifndef PHILOX_W32_0
#define PHILOX_W32_0 ((uint32_t)0x9E3779B9)
#endif
#ifndef PHILOX_W32_1
#define PHILOX_W32_1 ((uint32_t)0xBB67AE85)
#endif
/** \endcond */
#ifndef PHILOX2x32_DEFAULT_ROUNDS
#define PHILOX2x32_DEFAULT_ROUNDS 10
#endif
#ifndef PHILOX2x64_DEFAULT_ROUNDS
#define PHILOX2x64_DEFAULT_ROUNDS 10
#endif
#ifndef PHILOX4x32_DEFAULT_ROUNDS
#define PHILOX4x32_DEFAULT_ROUNDS 10
#endif
#ifndef PHILOX4x64_DEFAULT_ROUNDS
#define PHILOX4x64_DEFAULT_ROUNDS 10
#endif
/** \cond HIDDEN_FROM_DOXYGEN */
/* The ignored fourth argument allows us to instantiate the
same macro regardless of N. */
#define _philox2xWround_tpl(W, T) \
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE RANDOM_ITERATOR_R123_FORCE_INLINE(struct r123array2x##W _philox2x##W##round(struct r123array2x##W ctr, struct r123array1x##W key)); \
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE struct r123array2x##W _philox2x##W##round(struct r123array2x##W ctr, struct r123array1x##W key){ \
T hi; \
T lo = mulhilo##W(PHILOX_M2x##W##_0, ctr.v[0], &hi); \
struct r123array2x##W out = {{hi^key.v[0]^ctr.v[1], lo}}; \
return out; \
}
#define _philox2xWbumpkey_tpl(W) \
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE struct r123array1x##W _philox2x##W##bumpkey( struct r123array1x##W key) { \
key.v[0] += PHILOX_W##W##_0; \
return key; \
}
#define _philox4xWround_tpl(W, T) \
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE RANDOM_ITERATOR_R123_FORCE_INLINE(struct r123array4x##W _philox4x##W##round(struct r123array4x##W ctr, struct r123array2x##W key)); \
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE struct r123array4x##W _philox4x##W##round(struct r123array4x##W ctr, struct r123array2x##W key){ \
T hi0; \
T hi1; \
T lo0 = mulhilo##W(PHILOX_M4x##W##_0, ctr.v[0], &hi0); \
T lo1 = mulhilo##W(PHILOX_M4x##W##_1, ctr.v[2], &hi1); \
struct r123array4x##W out = {{hi1^ctr.v[1]^key.v[0], lo1, \
hi0^ctr.v[3]^key.v[1], lo0}}; \
return out; \
}
#define _philox4xWbumpkey_tpl(W) \
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE struct r123array2x##W _philox4x##W##bumpkey( struct r123array2x##W key) { \
key.v[0] += PHILOX_W##W##_0; \
key.v[1] += PHILOX_W##W##_1; \
return key; \
}
/** \endcond */
#define _philoxNxW_tpl(N, Nhalf, W, T) \
/** @ingroup PhiloxNxW */ \
enum r123_enum_philox##N##x##W { philox##N##x##W##_rounds = PHILOX##N##x##W##_DEFAULT_ROUNDS }; \
typedef struct r123array##N##x##W philox##N##x##W##_ctr_t; \
typedef struct r123array##Nhalf##x##W philox##N##x##W##_key_t; \
typedef struct r123array##Nhalf##x##W philox##N##x##W##_ukey_t; \
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE philox##N##x##W##_key_t philox##N##x##W##keyinit(philox##N##x##W##_ukey_t uk) { return uk; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE RANDOM_ITERATOR_R123_FORCE_INLINE(philox##N##x##W##_ctr_t philox##N##x##W##_R(unsigned int R, philox##N##x##W##_ctr_t ctr, philox##N##x##W##_key_t key)); \
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE philox##N##x##W##_ctr_t philox##N##x##W##_R(unsigned int R, philox##N##x##W##_ctr_t ctr, philox##N##x##W##_key_t key) { \
RANDOM_ITERATOR_R123_ASSERT(R<=16); \
if(R>0){ ctr = _philox##N##x##W##round(ctr, key); } \
if(R>1){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
if(R>2){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
if(R>3){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
if(R>4){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
if(R>5){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
if(R>6){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
if(R>7){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
if(R>8){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
if(R>9){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
if(R>10){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
if(R>11){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
if(R>12){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
if(R>13){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
if(R>14){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
if(R>15){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
return ctr; \
}
_philox2xWbumpkey_tpl(32)
_philox4xWbumpkey_tpl(32)
_philox2xWround_tpl(32, uint32_t) /* philox2x32round */
_philox4xWround_tpl(32, uint32_t) /* philo4x32round */
_philoxNxW_tpl(2, 1, 32, uint32_t) /* philox2x32bijection */
_philoxNxW_tpl(4, 2, 32, uint32_t) /* philox4x32bijection */
#if RANDOM_ITERATOR_R123_USE_PHILOX_64BIT
/** \cond HIDDEN_FROM_DOXYGEN */
_philox2xWbumpkey_tpl(64)
_philox4xWbumpkey_tpl(64)
_philox2xWround_tpl(64, uint64_t) /* philo2x64round */
_philox4xWround_tpl(64, uint64_t) /* philo4x64round */
/** \endcond */
_philoxNxW_tpl(2, 1, 64, uint64_t) /* philox2x64bijection */
_philoxNxW_tpl(4, 2, 64, uint64_t) /* philox4x64bijection */
#endif /* RANDOM_ITERATOR_R123_USE_PHILOX_64BIT */
#define philox2x32(c,k) philox2x32_R(philox2x32_rounds, c, k)
#define philox4x32(c,k) philox4x32_R(philox4x32_rounds, c, k)
#if RANDOM_ITERATOR_R123_USE_PHILOX_64BIT
#define philox2x64(c,k) philox2x64_R(philox2x64_rounds, c, k)
#define philox4x64(c,k) philox4x64_R(philox4x64_rounds, c, k)
#endif /* RANDOM_ITERATOR_R123_USE_PHILOX_64BIT */
#if defined(__cplusplus)
#define _PhiloxNxW_base_tpl(CType, KType, N, W) \
namespace random_iterator_r123{ \
template<unsigned int ROUNDS> \
struct Philox##N##x##W##_R{ \
typedef CType ctr_type; \
typedef KType key_type; \
typedef KType ukey_type; \
static const RANDOM_ITERATOR_R123_METAL_CONSTANT_ADDRESS_SPACE unsigned int rounds=ROUNDS; \
inline RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){ \
RANDOM_ITERATOR_R123_STATIC_ASSERT(ROUNDS<=16, "philox is only unrolled up to 16 rounds\n"); \
return philox##N##x##W##_R(ROUNDS, ctr, key); \
} \
}; \
typedef Philox##N##x##W##_R<philox##N##x##W##_rounds> Philox##N##x##W; \
} // namespace random_iterator_r123
_PhiloxNxW_base_tpl(r123array2x32, r123array1x32, 2, 32) // Philox2x32_R<R>
_PhiloxNxW_base_tpl(r123array4x32, r123array2x32, 4, 32) // Philox4x32_R<R>
#if RANDOM_ITERATOR_R123_USE_PHILOX_64BIT
_PhiloxNxW_base_tpl(r123array2x64, r123array1x64, 2, 64) // Philox2x64_R<R>
_PhiloxNxW_base_tpl(r123array4x64, r123array2x64, 4, 64) // Philox4x64_R<R>
#endif
/* The _tpl macros don't quite work to do string-pasting inside comments.
so we just write out the boilerplate documentation four times... */
/**
@defgroup PhiloxNxW Philox Classes and Typedefs
The PhiloxNxW classes export the member functions, typedefs and
operator overloads required by a @ref CBRNG "CBRNG" class.
As described in
<a href="http://dl.acm.org/citation.cfm?doid=2063405"><i>Parallel Random Numbers: As Easy as 1, 2, 3</i> </a>.
The Philox family of counter-based RNGs use integer multiplication, xor and permutation of W-bit words
to scramble its N-word input key. Philox is a mnemonic for Product HI LO Xor).
@class r123::Philox2x32_R
@ingroup PhiloxNxW
exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
The template argument, ROUNDS, is the number of times the Philox round
function will be applied.
As of November 2011, the authors know of no statistical flaws with
ROUNDS=6 or more for Philox2x32.
@typedef r123::Philox2x32
@ingroup PhiloxNxW
Philox2x32 is equivalent to Philox2x32_R<10>. With 10 rounds,
Philox2x32 has a considerable safety margin over the minimum number
of rounds with no known statistical flaws, but still has excellent
performance.
@class r123::Philox2x64_R
@ingroup PhiloxNxW
exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
The template argument, ROUNDS, is the number of times the Philox round
function will be applied.
As of September 2011, the authors know of no statistical flaws with
ROUNDS=6 or more for Philox2x64.
@typedef r123::Philox2x64
@ingroup PhiloxNxW
Philox2x64 is equivalent to Philox2x64_R<10>. With 10 rounds,
Philox2x64 has a considerable safety margin over the minimum number
of rounds with no known statistical flaws, but still has excellent
performance.
@class r123::Philox4x32_R
@ingroup PhiloxNxW
exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
The template argument, ROUNDS, is the number of times the Philox round
function will be applied.
In November 2011, the authors recorded some suspicious p-values (approximately 1.e-7) from
some very long (longer than the default BigCrush length) SimpPoker tests. Despite
the fact that even longer tests reverted to "passing" p-values, a cloud remains over
Philox4x32 with 7 rounds. The authors know of no statistical flaws with
ROUNDS=8 or more for Philox4x32.
@typedef r123::Philox4x32
@ingroup PhiloxNxW
Philox4x32 is equivalent to Philox4x32_R<10>. With 10 rounds,
Philox4x32 has a considerable safety margin over the minimum number
of rounds with no known statistical flaws, but still has excellent
performance.
@class r123::Philox4x64_R
@ingroup PhiloxNxW
exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
The template argument, ROUNDS, is the number of times the Philox round
function will be applied.
As of September 2011, the authors know of no statistical flaws with
ROUNDS=7 or more for Philox4x64.
@typedef r123::Philox4x64
@ingroup PhiloxNxW
Philox4x64 is equivalent to Philox4x64_R<10>. With 10 rounds,
Philox4x64 has a considerable safety margin over the minimum number
of rounds with no known statistical flaws, but still has excellent
performance.
*/
#endif /* __cplusplus */
#endif /* _philox_dot_h_ */