IAP GITLAB

Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • AirShowerPhysics/corsika
  • rulrich/corsika
  • AAAlvesJr/corsika
  • Andre/corsika
  • arrabito/corsika
  • Nikos/corsika
  • olheiser73/corsika
  • AirShowerPhysics/papers/corsika
  • pranav/corsika
9 results
Show changes
Showing
with 3624 additions and 219 deletions
/*
* (c) Copyright 2020 CORSIKA Project, corsika-project@lists.kit.edu
*
* This software is distributed under the terms of the GNU General Public
* Licence version 3 (GPL Version 3). See file LICENSE for a full version of
* the license.
* This software is distributed under the terms of the 3-clause BSD license.
* See file LICENSE for a full version of the license.
*/
#pragma once
#include <corsika/framework/core/PhysicalUnits.hpp>
#include <corsika/framework/process/BaseProcess.hpp>
#include <corsika/framework/process/BoundaryCrossingProcess.hpp>
#include <corsika/framework/process/ContinuousProcess.hpp>
......@@ -19,6 +19,8 @@
#include <corsika/framework/process/ProcessReturn.hpp>
#include <corsika/framework/process/SecondariesProcess.hpp>
#include <corsika/framework/process/StackProcess.hpp>
#include <corsika/framework/process/CascadeEquationsProcess.hpp>
#include <corsika/framework/core/Step.hpp>
#include <cmath>
#include <limits>
......@@ -93,10 +95,10 @@ namespace corsika {
template <typename TProcess1, typename TProcess2, int IndexStart, int IndexProcess1,
int IndexProcess2>
template <typename TParticle, typename TTrack>
template <typename TParticle>
inline ProcessReturn
ProcessSequence<TProcess1, TProcess2, IndexStart, IndexProcess1, IndexProcess2>::
doContinuous(TParticle& particle, TTrack& vT,
doContinuous(Step<TParticle>& step,
[[maybe_unused]] ContinuousProcessIndex const limitId) {
ProcessReturn ret = ProcessReturn::Ok;
......@@ -104,44 +106,46 @@ namespace corsika {
// errors if process1_type is invalid
if constexpr (process1_type::is_process_sequence) {
ret |= A_.doContinuous(particle, vT, limitId);
ret |= A_.doContinuous(step, limitId);
} else if constexpr (is_continuous_process_v<process1_type>) {
// interface checking on TProcess1
static_assert(
has_method_doContinuous_v<TProcess1, ProcessReturn, TParticle&, TTrack&> ||
has_method_doContinuous_v<TProcess1, ProcessReturn, TParticle&,
TTrack const&> ||
has_method_doContinuous_v<TProcess1, ProcessReturn, TParticle const&,
TTrack const&>,
"TDerived has no method with correct signature \"ProcessReturn "
"doContinuous(TParticle [const]&,TTrack [const]&,bool)\" required for "
"ContinuousProcess<TDerived>. ");
ret |= A_.doContinuous(particle, vT,
limitId == ContinuousProcessIndex(IndexProcess1));
//~ static_assert(
//~ has_method_doContinuous_v<TProcess1, ProcessReturn, TParticle&, TTrack&> ||
//~ has_method_doContinuous_v<TProcess1, ProcessReturn, TParticle&,
//~ TTrack const&> ||
//~ has_method_doContinuous_v<TProcess1, ProcessReturn, TParticle const&,
//~ TTrack const&>,
//~ "TDerived has no method with correct signature \"ProcessReturn "
//~ "doContinuous(TParticle [const]&,TTrack [const]&,bool)\" required for "
//~ "ContinuousProcess<TDerived>. ");
ret |= A_.doContinuous(
step, limitId == ContinuousProcessIndex(
static_cast<void const*>(std::addressof(A_))));
}
}
if constexpr (is_process_v<process2_type>) { // to protect from further compiler
// errors if process2_type is invalid
if constexpr (process2_type::is_process_sequence) {
ret |= B_.doContinuous(particle, vT, limitId);
ret |= B_.doContinuous(step, limitId);
} else if constexpr (is_continuous_process_v<process2_type>) {
// interface checking on TProcess2
static_assert(
has_method_doContinuous_v<TProcess2, ProcessReturn, TParticle&, TTrack&> ||
has_method_doContinuous_v<TProcess2, ProcessReturn, TParticle&,
TTrack const&> ||
has_method_doContinuous_v<TProcess2, ProcessReturn, TParticle const&,
TTrack const&>,
"TDerived has no method with correct signature \"ProcessReturn "
"doContinuous(TParticle [const]&,TTrack [const]&,bool)\" required for "
"ContinuousProcess<TDerived>. ");
ret |= B_.doContinuous(particle, vT,
limitId == ContinuousProcessIndex(IndexProcess2));
//~ static_assert(
//~ has_method_doContinuous_v<TProcess2, ProcessReturn, TParticle&, TTrack&> ||
//~ has_method_doContinuous_v<TProcess2, ProcessReturn, TParticle&,
//~ TTrack const&> ||
//~ has_method_doContinuous_v<TProcess2, ProcessReturn, TParticle const&,
//~ TTrack const&>,
//~ "TDerived has no method with correct signature \"ProcessReturn "
//~ "doContinuous(TParticle [const]&,TTrack [const]&,bool)\" required for "
//~ "ContinuousProcess<TDerived>. ");
ret |= B_.doContinuous(
step, limitId == ContinuousProcessIndex(
static_cast<void const*>(std::addressof(B_))));
}
}
......@@ -258,7 +262,8 @@ namespace corsika {
template <typename TParticle, typename TTrack>
inline ContinuousProcessStepLength
ProcessSequence<TProcess1, TProcess2, IndexStart, IndexProcess1,
IndexProcess2>::getMaxStepLength(TParticle& particle, TTrack& vTrack) {
IndexProcess2>::getMaxStepLength(TParticle&& particle,
TTrack&& vTrack) {
// if no other process in the sequence implements it
ContinuousProcessStepLength max_length(std::numeric_limits<double>::infinity() *
meter);
......@@ -277,8 +282,9 @@ namespace corsika {
"getMaxStepLength(TParticle const&, TTrack const&)\" required for "
"ContinuousProcess<TDerived>. ");
ContinuousProcessStepLength const step(A_.getMaxStepLength(particle, vTrack),
ContinuousProcessIndex(IndexProcess1));
ContinuousProcessStepLength const step(
A_.getMaxStepLength(particle, vTrack),
ContinuousProcessIndex(static_cast<void const*>(std::addressof(A_))));
max_length = std::min(max_length, step);
}
}
......@@ -297,8 +303,9 @@ namespace corsika {
"getMaxStepLength(TParticle const&, TTrack const&)\" required for "
"ContinuousProcess<TDerived>. ");
ContinuousProcessStepLength const step(B_.getMaxStepLength(particle, vTrack),
ContinuousProcessIndex(IndexProcess2));
ContinuousProcessStepLength const step(
B_.getMaxStepLength(particle, vTrack),
ContinuousProcessIndex(static_cast<void const*>(std::addressof(B_))));
max_length = std::min(max_length, step);
}
}
......@@ -308,24 +315,58 @@ namespace corsika {
template <typename TProcess1, typename TProcess2, int IndexStart, int IndexProcess1,
int IndexProcess2>
template <typename TParticle>
inline InverseGrammageType
ProcessSequence<TProcess1, TProcess2, IndexStart, IndexProcess1,
IndexProcess2>::getInverseInteractionLength(TParticle&& particle) {
inline CrossSectionType
ProcessSequence<TProcess1, TProcess2, IndexStart, IndexProcess1, IndexProcess2>::
getCrossSection([[maybe_unused]] TParticle const& projectile,
[[maybe_unused]] Code const targetId,
[[maybe_unused]] FourMomentum const& targetP4) const {
InverseGrammageType tot = 0 * meter * meter / gram; // default value
CrossSectionType tot = CrossSectionType::zero();
if constexpr (is_process_v<process1_type>) { // to protect from further compiler
// errors if process1_type is invalid
if constexpr (is_interaction_process_v<process1_type> ||
process1_type::is_process_sequence) {
tot += A_.getInverseInteractionLength(particle);
if constexpr (is_interaction_process_v<process1_type>) {
bool constexpr has_signature_cx1 =
has_method_getCrossSection_v<TProcess1, // process object
CrossSectionType, // return type
Code, Code, // parameters
FourMomentum const&, FourMomentum const&>;
if constexpr (has_signature_cx1) {
tot += A_.getCrossSection(projectile.getPID(), targetId,
{projectile.getEnergy(), projectile.getMomentum()},
targetP4);
} else { // for PROPOSAL
tot += A_.getCrossSection(projectile, projectile.getPID(),
{projectile.getEnergy(), projectile.getMomentum()});
}
} else if constexpr (process1_type::is_process_sequence) {
tot += A_.getCrossSection(projectile, targetId, targetP4);
}
}
if constexpr (is_process_v<process2_type>) { // to protect from further compiler
// errors if process2_type is invalid
if constexpr (is_interaction_process_v<process2_type> ||
process2_type::is_process_sequence) {
tot += B_.getInverseInteractionLength(particle);
if constexpr (is_interaction_process_v<process2_type>) {
bool constexpr has_signature_cx1 =
has_method_getCrossSection_v<TProcess2, // process object
CrossSectionType, // return type
Code, Code, // parameters
FourMomentum const&, FourMomentum const&>;
if constexpr (has_signature_cx1) {
tot += B_.getCrossSection(projectile.getPID(), targetId,
{projectile.getEnergy(), projectile.getMomentum()},
targetP4);
} else { // for PROPOSAL
tot += B_.getCrossSection(projectile, projectile.getPID(),
{projectile.getEnergy(), projectile.getMomentum()});
}
} else if constexpr (process2_type::is_process_sequence) {
tot += B_.getCrossSection(projectile, targetId, targetP4);
}
}
return tot;
......@@ -333,65 +374,263 @@ namespace corsika {
template <typename TProcess1, typename TProcess2, int IndexStart, int IndexProcess1,
int IndexProcess2>
template <typename TSecondaryView>
template <typename TStack>
void ProcessSequence<TProcess1, TProcess2, IndexStart, IndexProcess1,
IndexProcess2>::doCascadeEquations(TStack& stack) {
if constexpr (is_process_v<process1_type>) { // to protect from further compiler
// errors if process1_type is invalid
if constexpr (process1_type::is_process_sequence &&
!process1_type::is_switch_process_sequence) {
A_.doCascadeEquations(stack);
} else if constexpr (is_cascade_equations_process_v<process1_type>) {
// interface checking on TProcess1
static_assert(has_method_doCascadeEquations_v<TProcess1,
void, // return type
TStack&>, // parameter
"TDerived has no method with correct signature \"void "
"doCascadeEquations(TStack&)\" required for "
"CascadeEquationsProcess<TDerived>. ");
A_.doCascadeEquations(stack);
}
}
if constexpr (is_process_v<process2_type>) { // to protect from further compiler
// errors if process2_type is invalid
if constexpr (process2_type::is_process_sequence &&
!process2_type::is_switch_process_sequence) {
B_.doCascadeEquations(stack);
} else if constexpr (is_cascade_equations_process_v<process2_type>) {
// interface checking on TProcess2
static_assert(has_method_doCascadeEquations_v<TProcess2,
void, // return type
TStack&>, // parameter
"TDerived has no method with correct signature \"void "
"doCascadeEquations(TStack&)\" required for "
"CascadeEquationsProcess<TDerived>. ");
B_.doCascadeEquations(stack);
}
}
}
template <typename TProcess1, typename TProcess2, int IndexStart, int IndexProcess1,
int IndexProcess2>
void ProcessSequence<TProcess1, TProcess2, IndexStart, IndexProcess1,
IndexProcess2>::initCascadeEquations() {
if constexpr (is_process_v<process1_type>) { // to protect from further compiler
// errors if process1_type is invalid
if constexpr ((process1_type::is_process_sequence &&
!process1_type::is_switch_process_sequence) ||
is_cascade_equations_process_v<process1_type>) {
A_.initCascadeEquations();
}
}
if constexpr (is_process_v<process2_type>) { // to protect from further compiler
// errors if process2_type is invalid
if constexpr ((process2_type::is_process_sequence &&
!process2_type::is_switch_process_sequence) ||
is_cascade_equations_process_v<process2_type>) {
B_.initCascadeEquations();
}
}
} // namespace corsika
template <typename TProcess1, typename TProcess2, int IndexStart, int IndexProcess1,
int IndexProcess2>
template <typename TSecondaryView, typename TRNG>
inline ProcessReturn
ProcessSequence<TProcess1, TProcess2, IndexStart, IndexProcess1, IndexProcess2>::
selectInteraction(TSecondaryView& view,
[[maybe_unused]] InverseGrammageType lambda_inv_select,
[[maybe_unused]] InverseGrammageType lambda_inv_sum) {
selectInteraction(TSecondaryView&& view, FourMomentum const& projectileP4,
[[maybe_unused]] NuclearComposition const& composition,
[[maybe_unused]] TRNG&& rng,
[[maybe_unused]] CrossSectionType const cx_select,
[[maybe_unused]] CrossSectionType cx_sum) {
// TODO: add check for lambda_inv_select > lambda_inv_tot
// TODO: add check for cx_select > cx_tot
if constexpr (is_process_v<process1_type>) { // to protect from further compiler
// errors if process1_type is invalid
if constexpr (process1_type::is_process_sequence) {
// if A is a process sequence --> check inside
ProcessReturn const ret =
A_.selectInteraction(view, lambda_inv_select, lambda_inv_sum);
A_.selectInteraction(view, projectileP4, composition, rng, cx_select, cx_sum);
// if A_ did succeed, stop routine. Not checking other static branch B_.
if (ret != ProcessReturn::Ok) { return ret; }
} else if constexpr (is_interaction_process_v<process1_type>) {
// if this is not a ContinuousProcess --> evaluate probability
lambda_inv_sum += A_.getInverseInteractionLength(view.parent());
// check if we should execute THIS process and then EXIT
if (lambda_inv_select <= lambda_inv_sum) {
// interface checking on TProcess1
static_assert(has_method_doInteract_v<TProcess1, void, TSecondaryView&>,
"TDerived has no method with correct signature \"void "
"doInteraction(TSecondaryView&)\" required for "
"InteractionProcess<TDerived>. ");
auto const& projectile = view.parent();
Code const projectileId = projectile.getPID();
// get cross section vector for all material components
// for selected process A
bool constexpr has_signature_cx1 =
has_method_getCrossSection_v<TProcess1, // process object
CrossSectionType, // return type
Code, Code, // parameters
FourMomentum const&, FourMomentum const&>;
bool constexpr has_signature_cx2 = // needed for PROPOSAL interface
has_method_getCrossSectionTemplate_v<
TProcess1, // process object
CrossSectionType, // return type
decltype(projectile) const&, // template argument
decltype(projectile) const&, // parameters
Code, FourMomentum const&>;
static_assert((has_signature_cx1 || has_signature_cx2),
"TProcess1 has no method with correct signature \"CrossSectionType "
"getCrossSection(Code, Code, FourMomentum const&, FourMomentum "
"const&)\" required by "
"InteractionProcess<TProcess1>. ");
std::vector<CrossSectionType> weightedCrossSections;
if constexpr (has_signature_cx1) {
/*std::vector<CrossSectionType> const*/ weightedCrossSections =
composition.getWeighted([=](Code const targetId) -> CrossSectionType {
FourMomentum const targetP4(
get_mass(targetId),
MomentumVector(projectile.getMomentum().getCoordinateSystem(),
{0_GeV, 0_GeV, 0_GeV}));
return A_.getCrossSection(projectileId, targetId, projectileP4, targetP4);
});
cx_sum +=
std::accumulate(weightedCrossSections.cbegin(),
weightedCrossSections.cend(), CrossSectionType::zero());
} else { // this is for PROPOSAL
cx_sum += A_.template getCrossSection(projectile, projectileId, projectileP4);
}
// check if we should execute THIS process and then EXIT
if (cx_select < cx_sum) {
if constexpr (has_signature_cx1) {
// now also sample targetId from weighted cross sections
Code const targetId = composition.sampleTarget(weightedCrossSections, rng);
FourMomentum const targetP4(
get_mass(targetId),
MomentumVector(projectile.getMomentum().getCoordinateSystem(),
{0_GeV, 0_GeV, 0_GeV}));
// interface checking on TProcess1
static_assert(
has_method_doInteract_v<TProcess1, // process object
void, // return type
TSecondaryView, // template argument
TSecondaryView&, // method parameters
Code, Code, FourMomentum const&,
FourMomentum const&>,
"TProcess1 has no method with correct signature \"void "
"doInteraction<TSecondaryView>(TSecondaryView&, "
"Code, Code, FourMomentum const&, FourMomentum const&)\" required for "
"InteractionProcess<TProcess1>. ");
A_.template doInteraction(view, projectileId, targetId, projectileP4,
targetP4);
} else { // this is for PROPOSAL
A_.template doInteraction(view, projectileId, projectileP4);
}
A_.template doInteraction(view);
return ProcessReturn::Interacted;
}
} // end branch A
}
}
} // end branch A
if constexpr (is_process_v<process2_type>) { // to protect from further compiler
// errors if process2_type is invalid
if constexpr (process2_type::is_process_sequence) {
// if B_ is a process sequence --> check inside
return B_.selectInteraction(view, lambda_inv_select, lambda_inv_sum);
return B_.selectInteraction(view, projectileP4, composition, rng, cx_select,
cx_sum);
} else if constexpr (is_interaction_process_v<process2_type>) {
// if this is not a ContinuousProcess --> evaluate probability
lambda_inv_sum += B_.getInverseInteractionLength(view.parent());
// soon as SecondaryView::parent() is migrated!
// check if we should execute THIS process and then EXIT
if (lambda_inv_select <= lambda_inv_sum) {
// interface checking on TProcess1
static_assert(has_method_doInteract_v<TProcess2, void, TSecondaryView&>,
"TDerived has no method with correct signature \"void "
"doInteraction(TSecondaryView&)\" required for "
"InteractionProcess<TDerived>. ");
auto const& projectile = view.parent();
Code const projectileId = projectile.getPID();
// get cross section vector for all material components, for selected process B
bool constexpr has_signature_cx1 =
has_method_getCrossSection_v<TProcess2, // process object
CrossSectionType, // return type
Code, Code, // parameters
FourMomentum const&, FourMomentum const&>;
bool constexpr has_signature_cx2 = // needed for PROPOSAL interface
has_method_getCrossSectionTemplate_v<
TProcess2, // process object
CrossSectionType, // return type
decltype(*projectile) const&, // template argument
decltype(*projectile) const&, // parameters
Code, // parameters
FourMomentum const&>;
static_assert((has_signature_cx1 || has_signature_cx2),
"TProcess2 has no method with correct signature \"CrossSectionType "
"getCrossSection(Code, Code, FourMomentum const&, FourMomentum "
"const&)\" required by "
"InteractionProcess<TProcess1>. ");
std::vector<CrossSectionType> weightedCrossSections;
if constexpr (has_signature_cx1) {
/* std::vector<CrossSectionType> const*/ weightedCrossSections =
composition.getWeighted([=](Code const targetId) -> CrossSectionType {
FourMomentum const targetP4(
get_mass(targetId),
MomentumVector(projectile.getMomentum().getCoordinateSystem(),
{0_GeV, 0_GeV, 0_GeV}));
return B_.getCrossSection(projectileId, targetId, projectileP4, targetP4);
});
cx_sum +=
std::accumulate(weightedCrossSections.begin(), weightedCrossSections.end(),
CrossSectionType::zero());
} else { // this is for PROPOSAL
cx_sum += B_.template getCrossSection(projectile, projectileId, projectileP4);
}
B_.doInteraction(view);
// check if we should execute THIS process and then EXIT
if (cx_select < cx_sum) {
if constexpr (has_signature_cx1) {
// now also sample targetId from weighted cross sections
Code const targetId = composition.sampleTarget(weightedCrossSections, rng);
FourMomentum const targetP4(
get_mass(targetId),
MomentumVector(projectile.getMomentum().getCoordinateSystem(),
{0_GeV, 0_GeV, 0_GeV}));
// interface checking on TProcess2
static_assert(
has_method_doInteract_v<TProcess2, // process object
void, // return type
TSecondaryView, // template argument
TSecondaryView&, // method parameters
Code, Code, FourMomentum const&,
FourMomentum const&>,
"TProcess1 has no method with correct signature \"void "
"doInteraction<TSecondaryView>(TSecondaryView&, "
"Code, Code, FourMomentum const&, FourMomentum const&)\" required for "
"InteractionProcess<TProcess2>. ");
B_.doInteraction(view, projectileId, targetId, projectileP4, targetP4);
} else { // this is for PROPOSAL
B_.doInteraction(view, projectileId, projectileP4);
}
return ProcessReturn::Interacted;
}
} // end branch B_
}
}
} // end branch B_
return ProcessReturn::Ok;
}
......@@ -427,7 +666,7 @@ namespace corsika {
template <typename TSecondaryView>
inline ProcessReturn ProcessSequence<
TProcess1, TProcess2, IndexStart, IndexProcess1,
IndexProcess2>::selectDecay(TSecondaryView& view,
IndexProcess2>::selectDecay(TSecondaryView&& view,
[[maybe_unused]] InverseTimeType decay_inv_select,
[[maybe_unused]] InverseTimeType decay_inv_sum) {
......@@ -444,8 +683,8 @@ namespace corsika {
// if this is not a ContinuousProcess --> evaluate probability
decay_inv_sum += A_.getInverseLifetime(view.parent());
// check if we should execute THIS process and then EXIT
if (decay_inv_select <= decay_inv_sum) { // more pedagogical: rndm_select <
// decay_inv_sum / decay_inv_tot
if (decay_inv_select < decay_inv_sum) { // more pedagogical: rndm_select <
// decay_inv_sum / decay_inv_tot
// interface checking on TProcess1
static_assert(has_method_doDecay_v<TProcess1, void, TSecondaryView&>,
"TDerived has no method with correct signature \"void "
......@@ -467,7 +706,7 @@ namespace corsika {
// if this is not a ContinuousProcess --> evaluate probability
decay_inv_sum += B_.getInverseLifetime(view.parent());
// check if we should execute THIS process and then EXIT
if (decay_inv_select <= decay_inv_sum) {
if (decay_inv_select < decay_inv_sum) {
// interface checking on TProcess1
static_assert(has_method_doDecay_v<TProcess2, void, TSecondaryView&>,
......@@ -485,8 +724,8 @@ namespace corsika {
}
/**
* traits marker to identify objects containing any StackProcesses
**/
* traits marker to identify objects containing any StackProcesses.
*/
namespace detail {
// need helper alias to achieve this:
template <
......
/*
* (c) Copyright 2021 CORSIKA Project, corsika-project@lists.kit.edu
*
* This software is distributed under the terms of the GNU General Public
* Licence version 3 (GPL Version 3). See file LICENSE for a full version of
* the license.
* This software is distributed under the terms of the 3-clause BSD license.
* See file LICENSE for a full version of the license.
*/
#pragma once
#include <corsika/framework/process/ProcessTraits.hpp>
#include <corsika/framework/utility/HasMethodSignature.hpp>
namespace corsika {
/**
traits test for SecondariesProcess::doSecondaries method
*/
* traits test for SecondariesProcess::doSecondaries method.
*/
template <class TProcess, typename TReturn, typename... TArg>
struct has_method_doSecondaries
: public detail::has_method_signature<TReturn, TArg...> {
......@@ -37,16 +37,19 @@ namespace corsika {
public:
/**
@name traits results
@{
*/
* @name traits results
* @{
*/
using type = decltype(test<std::decay_t<TProcess>>(nullptr));
static const bool value = type::value;
//! @}
};
//! @file DecayProcess.hpp
//! value traits type
/**
* @file SecondariesProcess.hpp
*
* @brief value traits type.
*/
template <class TProcess, typename TReturn, typename... TArg>
bool constexpr has_method_doSecondaries_v =
has_method_doSecondaries<TProcess, TReturn, TArg...>::value;
......
/*
* (c) Copyright 2021 CORSIKA Project, corsika-project@lists.kit.edu
*
* This software is distributed under the terms of the GNU General Public
* Licence version 3 (GPL Version 3). See file LICENSE for a full version of
* the license.
* This software is distributed under the terms of the 3-clause BSD license.
* See file LICENSE for a full version of the license.
*/
#pragma once
#include <corsika/framework/process/ProcessTraits.hpp>
#include <corsika/framework/utility/HasMethodSignature.hpp>
namespace corsika {
......
/*
* (c) Copyright 2018 CORSIKA Project, corsika-project@lists.kit.edu
*
* This software is distributed under the terms of the GNU General Public
* Licence version 3 (GPL Version 3). See file LICENSE for a full version of
* the license.
* This software is distributed under the terms of the 3-clause BSD license.
* See file LICENSE for a full version of the license.
*/
#pragma once
......@@ -76,49 +75,55 @@ namespace corsika {
template <typename TCondition, typename TSequence, typename USequence, int IndexStart,
int IndexProcess1, int IndexProcess2>
template <typename TParticle, typename TTrack>
inline ProcessReturn SwitchProcessSequence<
TCondition, TSequence, USequence, IndexStart, IndexProcess1,
IndexProcess2>::doContinuous(TParticle& particle, TTrack& vT,
ContinuousProcessIndex const idLimit) {
if (select_(particle)) {
template <typename TParticle>
inline ProcessReturn SwitchProcessSequence<TCondition, TSequence, USequence, IndexStart,
IndexProcess1, IndexProcess2>::
doContinuous(Step<TParticle>& step,
[[maybe_unused]] ContinuousProcessIndex const idLimit) {
if (select_(step.getParticlePre())) {
if constexpr (process1_type::is_process_sequence) {
return A_.doContinuous(particle, vT, idLimit);
return A_.doContinuous(step, idLimit);
}
if constexpr (is_continuous_process_v<process1_type>) {
static_assert(
has_method_doContinuous_v<TSequence, ProcessReturn, TParticle&, TTrack&> ||
has_method_doContinuous_v<TSequence, ProcessReturn, TParticle&,
TTrack const&> ||
has_method_doContinuous_v<TSequence, ProcessReturn, TParticle const&,
TTrack const&>,
"TDerived has no method with correct signature \"ProcessReturn "
"doContinuous(TParticle[const]&,TTrack[const]&,bool)\" required for "
"ContinuousProcess<TDerived>. ");
return A_.doContinuous(particle, vT,
idLimit == ContinuousProcessIndex(IndexProcess1));
// static_assert(
// has_method_doContinuous_v<TSequence, ProcessReturn, TParticle&,
// TTrack&> ||
// has_method_doContinuous_v<TSequence, ProcessReturn, TParticle&,
// TTrack const&> ||
// has_method_doContinuous_v<TSequence, ProcessReturn, TParticle
// const&,
// TTrack const&>,
// "TDerived has no method with correct signature \"ProcessReturn "
// "doContinuous(TParticle[const]&,TTrack[const]&,bool)\" required for
// " "ContinuousProcess<TDerived>. ");
return A_.doContinuous(
step, idLimit == ContinuousProcessIndex(
static_cast<void const*>(std::addressof(A_))));
}
} else {
if constexpr (process2_type::is_process_sequence) {
return B_.doContinuous(particle, vT, idLimit);
return B_.doContinuous(step, idLimit);
}
if constexpr (is_continuous_process_v<process2_type>) {
// interface checking on USequence
static_assert(
has_method_doContinuous_v<USequence, ProcessReturn, TParticle&, TTrack&> ||
has_method_doContinuous_v<USequence, ProcessReturn, TParticle&,
TTrack const&> ||
has_method_doContinuous_v<USequence, ProcessReturn, TParticle const&,
TTrack const&>,
"TDerived has no method with correct signature \"ProcessReturn "
"doContinuous(TParticle [const]&,TTrack[const]&,bool)\" required for "
"ContinuousProcess<TDerived>. ");
return B_.doContinuous(particle, vT,
idLimit == ContinuousProcessIndex(IndexProcess2));
// static_assert(
// has_method_doContinuous_v<USequence, ProcessReturn, TParticle&,
// TTrack&> ||
// has_method_doContinuous_v<USequence, ProcessReturn, TParticle&,
// TTrack const&> ||
// has_method_doContinuous_v<USequence, ProcessReturn, TParticle
// const&,
// TTrack const&>,
// "TDerived has no method with correct signature \"ProcessReturn "
// "doContinuous(TParticle [const]&,TTrack[const]&,bool)\" required for
// " "ContinuousProcess<TDerived>. ");
return B_.doContinuous(
step, idLimit == ContinuousProcessIndex(
static_cast<void const*>(std::addressof(B_))));
}
}
return ProcessReturn::Ok;
......@@ -182,8 +187,9 @@ namespace corsika {
"getMaxStepLength(TParticle const&, TTrack const&)\" required for "
"ContinuousProcess<TDerived>. ");
return ContinuousProcessStepLength(A_.getMaxStepLength(particle, vTrack),
ContinuousProcessIndex(IndexProcess1));
return ContinuousProcessStepLength(
A_.getMaxStepLength(particle, vTrack),
ContinuousProcessIndex(static_cast<void const*>(std::addressof(A_))));
}
} else {
if constexpr (process2_type::is_process_sequence) {
......@@ -198,8 +204,9 @@ namespace corsika {
"getMaxStepLength(TParticle const&, TTrack const&)\" required for "
"ContinuousProcess<TDerived>. ");
return ContinuousProcessStepLength(B_.getMaxStepLength(particle, vTrack),
ContinuousProcessIndex(IndexProcess2));
return ContinuousProcessStepLength(
B_.getMaxStepLength(particle, vTrack),
ContinuousProcessIndex(static_cast<void const*>(std::addressof(B_))));
}
}
......@@ -210,82 +217,233 @@ namespace corsika {
template <typename TCondition, typename TSequence, typename USequence, int IndexStart,
int IndexProcess1, int IndexProcess2>
template <typename TParticle>
inline InverseGrammageType SwitchProcessSequence<
TCondition, TSequence, USequence, IndexStart, IndexProcess1,
IndexProcess2>::getInverseInteractionLength(TParticle&& particle) {
if (select_(particle)) {
if constexpr (is_interaction_process_v<process1_type> ||
process1_type::is_process_sequence) {
return A_.getInverseInteractionLength(particle);
CrossSectionType SwitchProcessSequence<TCondition, TSequence, USequence, IndexStart,
IndexProcess1, IndexProcess2>::
getCrossSection(TParticle const& projectile, [[maybe_unused]] Code const targetId,
[[maybe_unused]] FourMomentum const& targetP4) const {
if (select_(projectile)) {
if constexpr (is_interaction_process_v<process1_type>) {
bool constexpr has_signature_cx1 =
has_method_getCrossSection_v<TSequence, // process object
CrossSectionType, // return type
Code, Code, // parameters
FourMomentum const&, FourMomentum const&>;
if constexpr (has_signature_cx1) {
return A_.getCrossSection(projectile.getPID(), targetId,
{projectile.getEnergy(), projectile.getMomentum()},
targetP4);
} else {
return A_.getCrossSection(projectile, projectile.getPID(),
{projectile.getEnergy(), projectile.getMomentum()});
}
} else if constexpr (process1_type::is_process_sequence) {
return A_.getCrossSection(projectile, targetId, targetP4);
}
} else {
if constexpr (is_interaction_process_v<process2_type> ||
process2_type::is_process_sequence) {
return B_.getInverseInteractionLength(particle);
if constexpr (is_interaction_process_v<process2_type>) {
bool constexpr has_signature_cx1 =
has_method_getCrossSection_v<USequence, // process object
CrossSectionType, // return type
Code, Code, // parameters
FourMomentum const&, FourMomentum const&>;
if constexpr (has_signature_cx1) {
return B_.getCrossSection(projectile.getPID(), targetId,
{projectile.getEnergy(), projectile.getMomentum()},
targetP4);
} else {
return B_.getCrossSection(projectile, targetId, targetP4);
}
} else if constexpr (process2_type::is_process_sequence) {
return B_.getCrossSection(projectile, targetId, targetP4);
}
}
return 0 * meter * meter / gram; // default value
return CrossSectionType::zero(); // default value
}
template <typename TCondition, typename TSequence, typename USequence, int IndexStart,
int IndexProcess1, int IndexProcess2>
template <typename TSecondaryView>
inline ProcessReturn SwitchProcessSequence<TCondition, TSequence, USequence, IndexStart,
IndexProcess1, IndexProcess2>::
selectInteraction(TSecondaryView& view,
[[maybe_unused]] InverseGrammageType lambda_inv_select,
[[maybe_unused]] InverseGrammageType lambda_inv_sum) {
template <typename TSecondaryView, typename TRNG>
inline ProcessReturn SwitchProcessSequence<
TCondition, TSequence, USequence, IndexStart, IndexProcess1,
IndexProcess2>::selectInteraction(TSecondaryView& view,
FourMomentum const& projectileP4,
NuclearComposition const& composition, TRNG& rng,
[[maybe_unused]] CrossSectionType const cx_select,
[[maybe_unused]] CrossSectionType cx_sum) {
if (select_(view.parent())) {
if constexpr (process1_type::is_process_sequence) {
// if A_ is a process sequence --> check inside
ProcessReturn const ret =
A_.selectInteraction(view, lambda_inv_select, lambda_inv_sum);
// if A_ did succeed, stop routine. Not checking other static branch B_.
if (ret != ProcessReturn::Ok) { return ret; }
return A_.selectInteraction(view, projectileP4, composition, rng, cx_select,
cx_sum);
} else if constexpr (is_interaction_process_v<process1_type>) {
// if this is not a ContinuousProcess --> evaluate probability
lambda_inv_sum += A_.getInverseInteractionLength(view.parent());
// check if we should execute THIS process and then EXIT
if (lambda_inv_select < lambda_inv_sum) {
// interface checking on TSequence
static_assert(has_method_doInteract_v<TSequence, void, TSecondaryView&>,
"TDerived has no method with correct signature \"void "
"doInteraction(TSecondaryView&)\" required for "
"InteractionProcess<TDerived>. ");
auto const& projectile = view.parent();
Code const projectileId = projectile.getPID();
// get cross section vector for all material components
// for selected process A
bool constexpr has_signature_cx1 =
has_method_getCrossSection_v<TSequence, // process object
CrossSectionType, // return type
Code, Code, // parameters
FourMomentum const&, FourMomentum const&>;
bool constexpr has_signature_cx2 = // needed for PROPOSAL interface
has_method_getCrossSectionTemplate_v<
TSequence, // process object
CrossSectionType, // return type
decltype(projectile) const&, // template argument
decltype(projectile) const&, // parameters
Code, FourMomentum const&>;
static_assert((has_signature_cx1 || has_signature_cx2),
"TSequence has no method with correct signature \"CrossSectionType "
"getCrossSection(Code, Code, FourMomentum const&, FourMomentum "
"const&)\" required by "
"InteractionProcess<TSequence>. ");
std::vector<CrossSectionType> weightedCrossSections;
if constexpr (has_signature_cx1) {
/*std::vector<CrossSectionType> const*/ weightedCrossSections =
composition.getWeighted([=](Code const targetId) -> CrossSectionType {
FourMomentum const targetP4(
get_mass(targetId),
MomentumVector(projectile.getMomentum().getCoordinateSystem(),
{0_GeV, 0_GeV, 0_GeV}));
return A_.getCrossSection(projectileId, targetId, projectileP4, targetP4);
});
cx_sum +=
std::accumulate(weightedCrossSections.cbegin(),
weightedCrossSections.cend(), CrossSectionType::zero());
} else { // this is for PROPOSAL
cx_sum += A_.template getCrossSection(projectile, projectileId, projectileP4);
}
if (cx_select < cx_sum) {
if constexpr (has_signature_cx1) {
// now also sample targetId from weighted cross sections
Code const targetId = composition.sampleTarget(weightedCrossSections, rng);
FourMomentum const targetP4(
get_mass(targetId),
MomentumVector(projectile.getMomentum().getCoordinateSystem(),
{0_GeV, 0_GeV, 0_GeV}));
// interface checking on TProcess1
static_assert(
has_method_doInteract_v<TSequence, // process object
void, // return type
TSecondaryView, // template argument
TSecondaryView&, // method parameters
Code, Code, FourMomentum const&,
FourMomentum const&>,
"TSequence has no method with correct signature \"void "
"doInteraction<TSecondaryView>(TSecondaryView&, "
"Code, Code, FourMomentum const&, FourMomentum const&)\" required for "
"InteractionProcess<TSequence>. ");
A_.template doInteraction(view, projectileId, targetId, projectileP4,
targetP4);
} else { // this is for PROPOSAL
A_.template doInteraction(view, projectileId, projectileP4);
}
A_.doInteraction(view);
return ProcessReturn::Interacted;
}
} // end branch A_
} // end collision branch A
}
} else {
} else { // selection: end branch A, start branch B
if constexpr (process2_type::is_process_sequence) {
// if B_ is a process sequence --> check inside
return B_.selectInteraction(view, lambda_inv_select, lambda_inv_sum);
return B_.selectInteraction(view, projectileP4, composition, rng, cx_select,
cx_sum);
} else if constexpr (is_interaction_process_v<process2_type>) {
// if this is not a ContinuousProcess --> evaluate probability
lambda_inv_sum += B_.getInverseInteractionLength(view.parent());
// check if we should execute THIS process and then EXIT
if (lambda_inv_select < lambda_inv_sum) {
// interface checking on TSequence
static_assert(has_method_doInteract_v<USequence, void, TSecondaryView&>,
"TDerived has no method with correct signature \"void "
"doInteraction(TSecondaryView&)\" required for "
"InteractionProcess<TDerived>. ");
auto const& projectile = view.parent();
Code const projectileId = projectile.getPID();
// get cross section vector for all material components, for selected process B
bool constexpr has_signature_cx1 =
has_method_getCrossSection_v<USequence, // process object
CrossSectionType, // return type
Code, Code, // parameters
FourMomentum const&, FourMomentum const&>;
bool constexpr has_signature_cx2 = // needed for PROPOSAL interface
has_method_getCrossSectionTemplate_v<
USequence, // process object
CrossSectionType, // return type
decltype(projectile) const&, // template argument
decltype(projectile) const&, // parameters
Code, FourMomentum const&>;
static_assert((has_signature_cx1 || has_signature_cx2),
"USequence has no method with correct signature \"CrossSectionType "
"getCrossSection(Code, Code, FourMomentum const&, FourMomentum "
"const&)\" required by "
"InteractionProcess<USequence>. ");
std::vector<CrossSectionType> weightedCrossSections;
if constexpr (has_signature_cx1) {
/* std::vector<CrossSectionType> const*/ weightedCrossSections =
composition.getWeighted([=](Code const targetId) -> CrossSectionType {
FourMomentum const targetP4(
get_mass(targetId),
MomentumVector(projectile.getMomentum().getCoordinateSystem(),
{0_GeV, 0_GeV, 0_GeV}));
return B_.getCrossSection(projectileId, targetId, projectileP4, targetP4);
});
cx_sum +=
std::accumulate(weightedCrossSections.begin(), weightedCrossSections.end(),
CrossSectionType::zero());
} else { // this is for PROPOSAL
cx_sum += B_.template getCrossSection(projectile, projectileId, projectileP4);
}
// check if we should execute THIS process and then EXIT
if (cx_select < cx_sum) {
if constexpr (has_signature_cx1) {
// now also sample targetId from weighted cross sections
Code const targetId = composition.sampleTarget(weightedCrossSections, rng);
FourMomentum const targetP4(
get_mass(targetId),
MomentumVector(projectile.getMomentum().getCoordinateSystem(),
{0_GeV, 0_GeV, 0_GeV}));
// interface checking on TProcess2
static_assert(
has_method_doInteract_v<USequence, // process object
void, // return type
TSecondaryView, // template argument
TSecondaryView&, // method parameters
Code, Code, FourMomentum const&,
FourMomentum const&>,
"USequence has no method with correct signature \"void "
"doInteraction<TSecondaryView>(TSecondaryView&, "
"Code, Code, FourMomentum const&, FourMomentum const&)\" required for "
"InteractionProcess<USequence>. ");
B_.doInteraction(view, projectileId, targetId, projectileP4, targetP4);
} else { // this is for PROPOSAL
B_.doInteraction(view, projectileId, projectileP4);
}
B_.doInteraction(view);
return ProcessReturn::Interacted;
}
} // end branch B_
}
} // end collision in branch B
}
} // end branch B_
return ProcessReturn::Ok;
}
} // namespace corsika
template <typename TCondition, typename TSequence, typename USequence, int IndexStart,
int IndexProcess1, int IndexProcess2>
......
/*
* (c) Copyright 2020 CORSIKA Project, corsika-project@lists.kit.edu
* (c) Copyright 2018 CORSIKA Project, corsika-project@lists.kit.edu
*
* This software is distributed under the terms of the GNU General Public
* Licence version 3 (GPL Version 3). See file LICENSE for a full version of
* the license.
* This software is distributed under the terms of the 3-clause BSD license.
* See file LICENSE for a full version of the license.
*/
#pragma once
......@@ -11,21 +10,25 @@
#include <iterator>
#include <random>
#include <sstream>
#include <tuple>
#include <utility>
namespace corsika {
inline void RNGManager::registerRandomStream(string_type const& pStreamName) {
prng_type rng;
template <typename CBPRNG>
inline void RNGManager<CBPRNG>::registerRandomStream(string_type const& pStreamName) {
if (auto const& it = seeds_.find(pStreamName); it != seeds_.end()) {
rng.seed(it->second);
}
auto const& it = rngs_.find(pStreamName);
rngs_[pStreamName] = std::move(rng);
if (it == rngs_.end()) // key not in container, so create one and initialize the value
rngs_.emplace(std::piecewise_construct, std::forward_as_tuple(pStreamName.c_str()),
std::forward_as_tuple(seed_, uint32_t(rngs_.size())));
}
inline RNGManager::prng_type& RNGManager::getRandomStream(
template <typename CBPRNG>
inline typename RNGManager<CBPRNG>::prng_type& RNGManager<CBPRNG>::getRandomStream(
string_type const& pStreamName) {
if (isRegistered(pStreamName)) {
return rngs_.at(pStreamName);
} else { // this stream name is not in the map
......@@ -33,11 +36,13 @@ namespace corsika {
}
}
inline bool RNGManager::isRegistered(string_type const& pStreamName) const {
template <typename CBPRNG>
inline bool RNGManager<CBPRNG>::isRegistered(string_type const& pStreamName) const {
return rngs_.count(pStreamName) > 0;
}
inline std::stringstream RNGManager::dumpState() const {
template <typename CBPRNG>
inline std::stringstream RNGManager<CBPRNG>::dumpState() const {
std::stringstream buffer;
for (auto const& [streamName, rng] : rngs_) {
buffer << '"' << streamName << "\" = \"" << rng << '"' << std::endl;
......@@ -46,23 +51,4 @@ namespace corsika {
return buffer;
}
inline void RNGManager::seedAll(seed_type vSeed) {
for (auto& entry : rngs_) { entry.second.seed(vSeed++); }
}
inline void RNGManager::seedAll(void) {
std::random_device rd;
for (auto& [streamName, rng] : rngs_) {
std::seed_seq sseq{rd(), rd(), rd(), rd(), rd(), rd()}; // 6 really random values
// for logging collect sseq input values in string
std::stringstream ss;
sseq.param(std::ostream_iterator<int>{ss, " "});
CORSIKA_LOG_DEBUG("Random seed stream {} seed {}", streamName, ss.str());
rng.seed(sseq);
}
}
} // namespace corsika
/*----------------------------------------------------------------------------
*
* Copyright (C) 2021 - 2024 Antonio Augusto Alves Junior
*
* This file is part of RandomIterator.
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ----------------------------------------------------------------------------*/
/*
* RandomIterator.h
*
* Created on: 21/02/2021
* Author: Antonio Augusto Alves Junior
*/
#pragma once
// This is the only RandomIterator header that is guaranteed to
// change with every RandomIterator release.
//
// RandomIterator_VERSION % 100 is the sub-minor version
// RandomIterator_VERSION / 100 % 1000 is the minor version
// RandomIterator_VERSION / 100000 is the major version
//
// Because this header does not #include <RandomIterator/detail/Config.h>,
// it is the only RandomIterator header that does not cause
// RandomIterator_HOST_SYSTEM and RandomIterator_DEVICE_SYSTEM to be defined.
/*! \def RandomIterator_VERSION
* \brief The preprocessor macro \p RandomIterator_VERSION encodes the version
* number of the RandomIterator.
*
* <tt>RandomIterator_VERSION % 100</tt> is the patch version.
* <tt>RandomIterator_VERSION / 100 % 1000</tt> is the feature version.
* <tt>RandomIterator_VERSION / 100000</tt> is the major version.
*/
#define RandomIterator_VERSION 100001
/*! \def RandomIterator_MAJOR_VERSION
* \brief The preprocessor macro \p RandomIterator_MAJOR_VERSION encodes the
* major version number of RandomIterator.
*/
#define RandomIterator_MAJOR_VERSION (RandomIterator_VERSION / 100000)
/*! \def RandomIterator_MINOR_VERSION
* \brief The preprocessor macro \p RandomIterator_MINOR_VERSION encodes the
* minor version number of RandomIterator.
*/
#define RandomIterator_MINOR_VERSION (RandomIterator_VERSION / 100 % 1000)
/*! \def RandomIterator_PATCH_NUMBER
* \brief The preprocessor macro \p RandomIterator_PATCH_NUMBER encodes the
* patch number of the RandomIterator library.
*/
#define RandomIterator_PATCH_NUMBER 0
// Declare these namespaces here for the purpose of Doxygenating them
/*!
* \brief \p random_iterator is the top-level namespace which contains all RandomIterator
* functions and types.
*/
namespace random_iterator{ }
/*
* (c) Copyright 2020 CORSIKA Project, corsika-project@lists.kit.edu
*
* This software is distributed under the terms of the 3-clause BSD license.
* See file LICENSE for a full version of the license.
*/
/*
* Stream.hpp
*
* Created on: 23/02/2021
* Author: Antonio Augusto Alves Junior
*/
#pragma once
#include <stdint.h>
#include "detail/tbb/iterators.h"
#include "detail/Engine.hpp"
#include "detail/functors/EngineCaller.hpp"
namespace random_iterator {
/// philox engine
typedef detail::philox philox;
/// threefry engine
typedef detail::threefry threefry;
/// squares3_128 engine
typedef detail::squares3_128 squares3_128;
/// squares4_128 engine
typedef detail::squares4_128 squares4_128;
/// squares3_64 engine
typedef detail::squares3_64 squares3_64;
/// squares4_64 engine
typedef detail::squares4_64 squares4_64;
#if RANDOM_ITERATOR_R123_USE_AES_NI
/// ars engine
typedef detail::ars ars;
#endif
/**
* \class Stream
*
* \brief representation for streams of pseudorandom numbers
*
* Objects of this class represent collections of up to \f$ {2}^{32} \f$ streams of
* random numbers for each seed. Each of such streams has a length of \f$ {2}^{64} \f$.
* If the chosen distribution generates data with 64bits, like ``double``, then each
* stream can handle up to 128 ExaByte of data. Stream objects are thread-safe and
* lazy-evaluated. Output is produced only when requested. It is user responsibility to
* retrieve and store, if needed, the generated output.
*
* \tparam DistibutionType STL compliant distribution type. For example:
* ```std::uniform_real_distribution```, ```std::exponential_distribution```.
*
* \tparam EngineType RNG type.
*
* There are seven options:
*
* * random_iterator::philox
* * random_iterator::threefry
* * random_iterator::ars
* * random_iterator::squares3_128
* * random_iterator::squares4_128
* * random_iterator::squares3_64
* * random_iterator::squares4_64
*
* Usage:
*
* 1) Direct class instantiation:
*
* \code
* \\this code will print the 10 doubles randomly distributed in the range [0.0, 1.0].
*
* \\instantiate real uniform distribution from C++ standard libray
* std::uniform_real_distribution<double> udist(0.0, 1.0);
*
* \\
* Stream<std::uniform_real_distribution<double>, random_iterator::philox>
* uniform_stream(udist, 0x1a2b3c4d, 1 );
*
* for( size_t i=0; i<10; ++i )
* std::cout << uniform_stream[i] << std::endl;
* \endcode
*
* 2) Using ```random_iterator::make_stream(...)```:
*
* \code
* \\this code will print the 10 doubles randomly distributed in the range [0.0, 1.0].
*
* \\instantiate real uniform distribution from C++ standard libray
* std::uniform_real_distribution<double> udist(0.0, 1.0);
*
* \\instantiate the generator
* random_iterator::philox rng(0x1a2b3c4d);
*
* \\create stream
* auto uniform_stream = random_iterator::make_stream(udist, rng, 1 );
*
* for( size_t i=0; i<10; ++i )
* std::cout << uniform_stream[i] << std::endl;
* \endcode
*/
template <typename DistibutionType, typename EngineType>
class Stream {
typedef detail::EngineCaller<DistibutionType, EngineType> caller_type; //!
typedef random_iterator_tbb::counting_iterator<typename EngineType::advance_type>
counting_iterator; //!
public:
typedef EngineType engine_type; //! Engine type
typedef typename engine_type::state_type state_type; //! Type of RNG state
typedef typename engine_type::seed_type seed_type; //! Type of RNG seed
typedef typename engine_type::advance_type advance_type; //! Type of RNG displacement
typedef typename engine_type::init_type init_type; //! Type of RNG initializer
typedef DistibutionType distribution_type; //! type of the STL compliant distribution
typedef typename distribution_type::result_type result_type; //! type of the result
typedef random_iterator_tbb::transform_iterator<caller_type, counting_iterator>
iterator; //! type of stream iterator
Stream() = delete;
/**
* Constructor.
*
* @param distribution Distribution with stl compliant interface.
* @param seed Seed for pseudorandom number generation.
* @param stream Pseudorandom number stream.
*/
Stream(distribution_type const& distribution, seed_type seed, uint32_t stream = 0)
: distribution_(distribution)
, engine_(seed, stream)
, seed_(seed)
, stream_(stream) {}
/**
* Copy constructor
*
* @param other
*/
Stream(Stream<DistibutionType, EngineType> const& other)
: distribution_(other.getDistribution())
, engine_(other.getEngine())
, seed_(other.getSeed())
, stream_(other.getStream()) {}
/**
* Returns an iterator to the first element of the stream.
* @return iterator to the first element of the stream.
*/
inline iterator begin() const {
counting_iterator first(0);
caller_type caller(distribution_, seed_, stream_);
return iterator(first, caller);
}
/**
* Returns an iterator to the past last element of the stream.
* @return iterator to the past last element of the stream.
*/
inline iterator end() const {
counting_iterator last(std::numeric_limits<advance_type>::max());
caller_type caller(distribution_, seed_, stream_);
return iterator(last, caller);
}
/**
* Returns the element at specified location n. No bounds checking is performed.
*
* @param n position in the sequence.
* @return element at location n
*/
inline result_type operator[](advance_type n) const {
caller_type caller(distribution_, seed_, stream_);
return caller(n);
}
/**
* At each call, this function will produce a pseudorandom number and advance the
* engine state.
*
* @return pseudorandom number distributed according with `DistributionType`
*/
inline result_type operator()(void) { return distribution_(engine_); }
/**
* Get the distribution
*
* @return Copy of the object's distribution.
*/
inline distribution_type getDistribution() const { return distribution_; }
/**
* Set the distribution
* @param dist
*
*/
inline void setDistribution(distribution_type dist) { distribution_ = dist; }
/**
* Get the associated engine
*
* @return Copy of the object's engine
*/
inline engine_type getEngine() const { return engine_; }
/**
* Set the object's engine
*
* @param engine
*/
inline void setEngine(engine_type engine) { engine_ = engine; }
/**
* Get the seed
*
* @return Copy of the object's seed
*/
inline const seed_type& getSeed() const { return seed_; }
/**
* Set the seed
*
* @param seed
*/
inline void setSeed(const seed_type& seed) { seed_ = seed; }
/**
* Get the object's stream number
*
* @return stream number
*/
inline uint32_t getStream() const { return stream_; }
/**
* Set the object's stream number
*
* @param stream
*/
inline void setStream(uint32_t stream) { stream_ = stream; }
friend inline std::ostream& operator<<(
std::ostream& os, const Stream<DistibutionType, EngineType>& be) {
return os << "engine: " << be.getEngine() << " stream: " << be.getStream()
<< " distribution: " << be.getDistribution();
}
private:
distribution_type distribution_;
engine_type engine_;
seed_type seed_;
uint32_t stream_;
};
/**
* Stream
*
* @param dist STL compliant distribution instance.
* @param eng Engine instance
* @param stream Stream number, in the range [0, 2^{32} - 1]
* @return Stream object.
*/
template <typename DistibutionType, typename EngineType>
Stream<DistibutionType, EngineType> make_stream(DistibutionType const& dist,
EngineType eng, uint32_t stream) {
return Stream<DistibutionType, EngineType>(dist, eng.getSeed(), stream);
}
} // namespace random_iterator
/*
* (c) Copyright 2020 CORSIKA Project, corsika-project@lists.kit.edu
*
* This software is distributed under the terms of the 3-clause BSD license.
* See file LICENSE for a full version of the license.
*/
/*
* Engine.hpp
*
* Created on: 22 de fev. de 2021
* Author: Antonio Augusto Alves Junior
*/
#pragma once
#include <limits>
#include "EngineTraits.hpp"
#include "SplitMix.hpp"
#include "Squares3_128.hpp"
#include "Squares4_128.hpp"
#include "Squares3_64.hpp"
#include "Squares4_64.hpp"
namespace random_iterator {
namespace detail {
template <typename Engine123>
class Engine {
typedef unsigned trigger_type;
public:
typedef Engine123 engine_type;
typedef typename detail::random_traits<engine_type>::state_type state_type;
typedef typename detail::random_traits<engine_type>::seed_type seed_type;
typedef typename detail::random_traits<engine_type>::advance_type advance_type;
typedef typename detail::random_traits<engine_type>::init_type init_type;
typedef typename detail::random_traits<engine_type>::result_type result_type;
static const unsigned arity = detail::random_traits<engine_type>::arity;
Engine() = delete;
Engine(result_type seed)
: engine_(engine_type{})
, cache_(state_type{})
, state_(state_type{})
, seed_(seed_type{})
, trigger_(arity) {
init_type temp{{}};
for (unsigned i = 0; i < temp.size(); ++i) {
temp[i] = detail::splitmix<result_type>(seed);
}
seed_ = temp;
}
Engine(result_type seed, uint32_t stream)
: engine_(engine_type{})
, cache_(state_type{})
, state_(state_type{})
, seed_(seed_type{})
, trigger_(arity) {
init_type temp{{}};
for (unsigned i = 0; i < temp.size(); ++i) {
temp[i] = detail::splitmix<result_type>(seed);
}
state_[arity - 1] = stream;
state_[arity - 1] = state_[arity - 1] << 32;
seed_ = temp;
}
Engine(init_type seed)
: engine_(engine_type{})
, cache_(state_type{})
, state_(state_type{})
, seed_(seed)
, trigger_(arity) {}
Engine(init_type seed, uint32_t stream)
: engine_(engine_type{})
, cache_(state_type{})
, state_(state_type{})
, seed_(seed)
, trigger_(arity) {
state_[arity - 1] = stream;
state_[arity - 1] << 32;
}
Engine(Engine<Engine123> const& other)
: engine_(engine_type{})
, cache_(other.getCache())
, state_(other.getState())
, seed_(other.getSeed())
, trigger_(other.getTrigger()) {}
inline Engine<Engine123>& operator=(Engine<Engine123> const& other) {
if (this == &other) return *this;
engine_ = engine_type{};
cache_ = other.getCache();
state_ = other.getState();
seed_ = other.getSeed();
trigger_ = other.getTrigger();
return *this;
}
inline result_type operator()(void) {
result_type result = 0;
if (trigger_ == arity) {
trigger_ = 0;
cache_ = engine_(state_.incr(), seed_);
result = cache_[trigger_];
++trigger_;
} else {
result = cache_[trigger_];
++trigger_;
}
return result;
}
inline void discard(advance_type n) {
state_.incr(n);
trigger_ = arity;
}
inline void reset(void) {
trigger_ = arity;
state_ = state_type{};
cache_ = state_type{};
}
inline const seed_type& getSeed() const { return seed_; }
inline void setSeed(seed_type seed) { seed_ = seed; }
inline void setSeed(result_type seed) {
init_type temp{{}};
for (unsigned i = 0; i < temp.size(); ++i) {
temp[i] = detail::splitmix<result_type>(seed);
}
seed_ = temp;
}
inline const state_type& getState() const { return state_; }
inline void setState(const state_type& state) { state_ = state; }
inline trigger_type getTrigger() const { return trigger_; }
inline void setTrigger(trigger_type trigger) { trigger_ = trigger; }
static constexpr result_type min() { return 0; }
static constexpr result_type max() {
return std::numeric_limits<result_type>::max();
}
friend inline std::ostream& operator<<(std::ostream& os,
const Engine<Engine123>& be) {
return os << "state: " << be.getState() << " seed: " << be.getSeed()
<< " trigger: " << be.getTrigger();
}
private:
inline state_type getCache() const { return cache_; }
engine_type engine_;
state_type cache_;
state_type state_;
seed_type seed_;
trigger_type trigger_;
};
typedef Engine<random_iterator_r123::Philox2x64> philox;
typedef Engine<random_iterator_r123::Threefry2x64> threefry;
typedef detail::Squares3_64 squares3_64;
typedef detail::Squares4_64 squares4_64;
typedef detail::Squares3_128 squares3_128;
typedef detail::Squares4_128 squares4_128;
#if RANDOM_ITERATOR_R123_USE_AES_NI
typedef Engine<random_iterator_r123::ARS2x64> ars;
#endif
} // namespace detail
} // namespace random_iterator
/*
* (c) Copyright 2020 CORSIKA Project, corsika-project@lists.kit.edu
*
* This software is distributed under the terms of the 3-clause BSD license.
* See file LICENSE for a full version of the license.
*/
/*
* EngineTraits.hpp
*
* Created on: 23 de fev. de 2021
* Author: Antonio Augusto Alves Junior
*/
#pragma once
#include <stdint.h>
#include "Random123/array.h"
#include "Random123/philox.h"
#include "Random123/threefry.h"
#include "Random123/ars.h"
#include "Random123/ReinterpretCtr.hpp"
namespace random_iterator {
namespace detail {
template <typename Engine>
struct random_traits;
/*
* random_traits<T>::state_type { counter, state}
* random_traits<T>::advance_type;
* random_traits<T>::init_type;
* random_traits<T>::result_type;
*/
// philox
template <>
struct random_traits<random_iterator_r123::Philox2x64> {
typedef typename random_iterator_r123::Philox2x64::ctr_type state_type;
typedef typename random_iterator_r123::Philox2x64::key_type seed_type;
typedef typename random_iterator_r123::Philox2x64::ukey_type init_type;
typedef uint64_t advance_type;
typedef state_type::value_type result_type;
enum { arity = 2 };
};
template <>
struct random_traits<random_iterator_r123::Philox4x64> {
typedef typename random_iterator_r123::Philox4x64::ctr_type state_type;
typedef typename random_iterator_r123::Philox4x64::key_type seed_type;
typedef typename random_iterator_r123::Philox4x64::ukey_type init_type;
typedef uint64_t advance_type;
typedef state_type::value_type result_type;
enum { arity = 4 };
};
//
template <>
struct random_traits<random_iterator_r123::Threefry2x64> {
typedef typename random_iterator_r123::Threefry2x64::ctr_type state_type;
typedef typename random_iterator_r123::Threefry2x64::key_type seed_type;
typedef typename random_iterator_r123::Threefry2x64::ukey_type init_type;
typedef uint64_t advance_type;
typedef state_type::value_type result_type;
enum { arity = 2 };
};
//
template <>
struct random_traits<random_iterator_r123::Threefry4x64> {
typedef typename random_iterator_r123::Threefry4x64::ctr_type state_type;
typedef typename random_iterator_r123::Threefry4x64::key_type seed_type;
typedef typename random_iterator_r123::Threefry4x64::ukey_type init_type;
typedef uint64_t advance_type;
typedef state_type::value_type result_type;
enum { arity = 4 };
};
#if RANDOM_ITERATOR_R123_USE_AES_NI
template <>
struct random_traits<random_iterator_r123::ARS4x32> {
typedef typename random_iterator_r123::ARS4x32::ctr_type state_type;
typedef typename random_iterator_r123::ARS4x32::key_type seed_type;
typedef typename random_iterator_r123::ARS4x32::ukey_type init_type;
typedef uint64_t advance_type;
typedef state_type::value_type result_type;
enum { arity = 4 };
};
template <>
struct random_traits<random_iterator_r123::ARS2x64> {
typedef typename random_iterator_r123::ARS2x64::ctr_type state_type;
typedef typename random_iterator_r123::ARS2x64::key_type seed_type;
typedef typename random_iterator_r123::ARS2x64::ukey_type init_type;
typedef uint64_t advance_type;
typedef state_type::value_type result_type;
enum { arity = 2 };
};
#endif
} // namespace detail
} // namespace random_iterator
/*----------------------------------------------------------------------------
*
* Copyright (C) 2021 - 2024 Antonio Augusto Alves Junior
*
* This file is part of RandomIterator.
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation and/or
* other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*
* ----------------------------------------------------------------------------*/
/*
* Macros.h
*
* Created on: 04/03/2021
* Author: Antonio Augusto Alves Junior
*/
#pragma once
#if defined(__has_builtin)
//__has_builtin(__builtin_expect)
#if __has_builtin(__builtin_expect)
#define _LIKELY_(x) __builtin_expect(x, 1)
#define _UNLIKELY_(x) __builtin_expect(x, 0)
#else
#define _LIKELY_(x) x
#define _UNLIKELY_(x) x
#endif //__has_builtin(__builtin_expect)
#else
#define _LIKELY_(x) x
#define _UNLIKELY_(x) x
#endif //defined(__has_builtin)
//detect endianess
#if defined(__BYTE_ORDER) && __BYTE_ORDER == __BIG_ENDIAN || \
defined(__BIG_ENDIAN__) || \
defined(__ARMEB__) || \
defined(__THUMBEB__) || \
defined(__AARCH64EB__) || \
defined(_MIBSEB) || defined(__MIBSEB) || defined(__MIBSEB__)
#ifndef __BIG_ENDIAN__
#define __BIG_ENDIAN__
#endif
#elif defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN || \
defined(__LITTLE_ENDIAN__) || \
defined(__ARMEL__) || \
defined(__THUMBEL__) || \
defined(__AARCH64EL__) || \
defined(_MIPSEL) || defined(__MIPSEL) || defined(__MIPSEL__) || \
defined(_WIN32) || defined(__i386__) || defined(__x86_64__) || \
defined(_X86_) || defined(_IA64_)
#ifndef __LITTLE_ENDIAN__
#define __LITTLE_ENDIAN__
#endif
#else
#error "I don't know what architecture this is!"
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __MicroURNG_dot_hpp__
#define __MicroURNG_dot_hpp__
#include <stdexcept>
#include <limits>
namespace random_iterator_r123 {
/**
Given a CBRNG whose ctr_type has an unsigned integral value_type,
MicroURNG<CBRNG>(c, k) is a type that satisfies the
requirements of a C++11 Uniform Random Number Generator.
The intended purpose is for a MicroURNG to be passed
as an argument to a C++11 Distribution, e.g.,
std::normal_distribution. See examples/MicroURNG.cpp.
The MicroURNG functor has a period of "only"
ctr_type.size()*2^32,
after which it will silently repeat.
The high 32 bits of the highest word in the counter c, passed to
the constructor must be zero. MicroURNG uses these bits to
"count".
Older versions of the library permitted a second template
parameter by which the caller could control the number of
bits devoted to the URNG's internal counter. This flexibility
has been disabled because URNGs created with different
numbers of counter bits could, conceivably "collide".
\code
typedef ?someCBRNG? RNG;
RNG::ctr_type c = ...; // under application control
RNG::key_type k = ...; //
std::normal_distribution<float> nd;
MicroURNG<RNG> urng(c, k);
for(???){
...
nd(urng); // may be called several hundred times with BITS=10
...
}
\endcode
*/
template <typename CBRNG>
class MicroURNG {
// According to C++11, a URNG requires only a result_type,
// operator()(), min() and max() methods. Everything else
// (ctr_type, key_type, reset() method, etc.) is "value added"
// for the benefit of users that "know" that they're dealing with
// a MicroURNG.
public:
typedef CBRNG cbrng_type;
static const int BITS = 32;
typedef typename cbrng_type::ctr_type ctr_type;
typedef typename cbrng_type::key_type key_type;
typedef typename cbrng_type::ukey_type ukey_type;
typedef typename ctr_type::value_type result_type;
RANDOM_ITERATOR_R123_STATIC_ASSERT(std::numeric_limits<result_type>::digits >= BITS,
"The result_type must have at least 32 bits");
result_type operator()() {
if (last_elem == 0) {
// jam n into the high bits of c
const size_t W = std::numeric_limits<result_type>::digits;
ctr_type c = c0;
c[c0.size() - 1] |= n << (W - BITS);
rdata = b(c, k);
n++;
last_elem = rdata.size();
}
return rdata[--last_elem];
}
MicroURNG(cbrng_type _b, ctr_type _c0, ukey_type _uk)
: b(_b)
, c0(_c0)
, k(_uk)
, n(0)
, last_elem(0) {
chkhighbits();
}
MicroURNG(ctr_type _c0, ukey_type _uk)
: b()
, c0(_c0)
, k(_uk)
, n(0)
, last_elem(0) {
chkhighbits();
}
// _Min and _Max work around a bug in the library shipped with MacOS Xcode 4.5.2.
// See the commment in conventional/Engine.hpp.
const static result_type _Min = 0;
const static result_type _Max = ~((result_type)0);
static RANDOM_ITERATOR_R123_CONSTEXPR result_type min
RANDOM_ITERATOR_R123_NO_MACRO_SUBST() {
return _Min;
}
static RANDOM_ITERATOR_R123_CONSTEXPR result_type max
RANDOM_ITERATOR_R123_NO_MACRO_SUBST() {
return _Max;
}
// extra methods:
const ctr_type& counter() const { return c0; }
void reset(ctr_type _c0, ukey_type _uk) {
c0 = _c0;
chkhighbits();
k = _uk;
n = 0;
last_elem = 0;
}
private:
cbrng_type b;
ctr_type c0;
key_type k;
RANDOM_ITERATOR_R123_ULONG_LONG n;
size_t last_elem;
ctr_type rdata;
void chkhighbits() {
result_type r = c0[c0.size() - 1];
result_type mask = ((uint64_t)std::numeric_limits<result_type>::max
RANDOM_ITERATOR_R123_NO_MACRO_SUBST()) >>
BITS;
if ((r & mask) != r)
throw std::runtime_error("MicroURNG: c0, does not have high bits clear");
}
};
} // namespace random_iterator_r123
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __ReinterpretCtr_dot_hpp__
#define __ReinterpretCtr_dot_hpp__
#include "features/compilerfeatures.h"
#include <cstring>
namespace random_iterator_r123 {
/*!
ReinterpretCtr uses memcpy to map back and forth
between a CBRNG's ctr_type and the specified ToType. For example,
after:
typedef ReinterpretCtr<r123array4x32, Philox2x64> G;
G is a bona fide CBRNG with ctr_type r123array4x32.
WARNING: ReinterpretCtr is endian dependent. The
values returned by G, declared as above,
will depend on the endianness of the machine on which it runs.
*/
template <typename ToType, typename CBRNG>
struct ReinterpretCtr {
typedef ToType ctr_type;
typedef typename CBRNG::key_type key_type;
typedef typename CBRNG::ctr_type bctype;
typedef typename CBRNG::ukey_type ukey_type;
RANDOM_ITERATOR_R123_STATIC_ASSERT(
sizeof(ToType) == sizeof(bctype) && sizeof(typename bctype::value_type) != 16,
"ReinterpretCtr: sizeof(ToType) is not the same as sizeof(CBRNG::ctr_type) or "
"CBRNG::ctr_type::value_type looks like it might be __m128i");
// It's amazingly difficult to safely do conversions with __m128i.
// If we use the operator() implementation below with a CBRNG
// whose ctr_type is r123array1xm128i, gcc4.6 optimizes away the
// memcpys, inlines the operator()(c,k), and produces assembly
// language that ends with an aesenclast instruction with a
// destination operand pointing to an unaligned memory address ...
// Segfault! See: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444
// MSVC also produces code that crashes. We suspect a
// similar mechanism but haven't done the debugging necessary to
// be sure. We were able to 'fix' gcc4.6 by making bc a mutable
// data member rather than declaring it in the scope of
// operator(). That didn't fix the MSVC problems, though.
//
// Conclusion - don't touch __m128i, at least for now. The
// easiest (but highly imprecise) way to do that is the static
// assertion above that rejects bctype::value_types of size 16. -
// Sep 2011.
ctr_type operator()(ctr_type c, key_type k) {
bctype bc;
std::memcpy(&bc, &c, sizeof(c));
CBRNG b;
bc = b(bc, k);
std::memcpy(&c, &bc, sizeof(bc));
return c;
}
};
} // namespace random_iterator_r123
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __Random123_aes_dot_hpp__
#define __Random123_aes_dot_hpp__
#include "features/compilerfeatures.h"
#include "array.h"
/* Implement a bona fide AES block cipher. It's minimally
// checked against the test vector in FIPS-197 in ut_aes.cpp. */
#if RANDOM_ITERATOR_R123_USE_AES_NI
/** @ingroup AESNI */
typedef struct r123array1xm128i aesni1xm128i_ctr_t;
/** @ingroup AESNI */
typedef struct r123array1xm128i aesni1xm128i_ukey_t;
/** @ingroup AESNI */
typedef struct r123array4x32 aesni4x32_ukey_t;
/** @ingroup AESNI */
enum r123_enum_aesni1xm128i { aesni1xm128i_rounds = 10 };
/** \cond HIDDEN_FROM_DOXYGEN */
RANDOM_ITERATOR_R123_STATIC_INLINE __m128i AES_128_ASSIST (__m128i temp1, __m128i temp2) {
__m128i temp3;
temp2 = _mm_shuffle_epi32 (temp2 ,0xff);
temp3 = _mm_slli_si128 (temp1, 0x4);
temp1 = _mm_xor_si128 (temp1, temp3);
temp3 = _mm_slli_si128 (temp3, 0x4);
temp1 = _mm_xor_si128 (temp1, temp3);
temp3 = _mm_slli_si128 (temp3, 0x4);
temp1 = _mm_xor_si128 (temp1, temp3);
temp1 = _mm_xor_si128 (temp1, temp2);
return temp1;
}
RANDOM_ITERATOR_R123_STATIC_INLINE void aesni1xm128iexpand(aesni1xm128i_ukey_t uk, __m128i ret[11])
{
__m128i rkey = uk.v[0].m;
__m128i tmp2;
ret[0] = rkey;
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x1);
rkey = AES_128_ASSIST(rkey, tmp2);
ret[1] = rkey;
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x2);
rkey = AES_128_ASSIST(rkey, tmp2);
ret[2] = rkey;
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x4);
rkey = AES_128_ASSIST(rkey, tmp2);
ret[3] = rkey;
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x8);
rkey = AES_128_ASSIST(rkey, tmp2);
ret[4] = rkey;
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x10);
rkey = AES_128_ASSIST(rkey, tmp2);
ret[5] = rkey;
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x20);
rkey = AES_128_ASSIST(rkey, tmp2);
ret[6] = rkey;
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x40);
rkey = AES_128_ASSIST(rkey, tmp2);
ret[7] = rkey;
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x80);
rkey = AES_128_ASSIST(rkey, tmp2);
ret[8] = rkey;
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x1b);
rkey = AES_128_ASSIST(rkey, tmp2);
ret[9] = rkey;
tmp2 = _mm_aeskeygenassist_si128(rkey, 0x36);
rkey = AES_128_ASSIST(rkey, tmp2);
ret[10] = rkey;
}
/** \endcond */
#ifdef __cplusplus
/** @ingroup AESNI */
struct aesni1xm128i_key_t{
__m128i k[11];
aesni1xm128i_key_t(){
aesni1xm128i_ukey_t uk;
uk.v[0].m = _mm_setzero_si128();
aesni1xm128iexpand(uk, k);
}
aesni1xm128i_key_t(const aesni1xm128i_ukey_t& uk){
aesni1xm128iexpand(uk, k);
}
aesni1xm128i_key_t(const aesni4x32_ukey_t& uk){
aesni1xm128i_ukey_t uk128;
uk128.v[0].m = _mm_set_epi32(uk.v[3], uk.v[2], uk.v[1], uk.v[0]);
aesni1xm128iexpand(uk128, k);
}
aesni1xm128i_key_t& operator=(const aesni1xm128i_ukey_t& uk){
aesni1xm128iexpand(uk, k);
return *this;
}
aesni1xm128i_key_t& operator=(const aesni4x32_ukey_t& uk){
aesni1xm128i_ukey_t uk128;
uk128.v[0].m = _mm_set_epi32(uk.v[3], uk.v[2], uk.v[1], uk.v[0]);
aesni1xm128iexpand(uk128, k);
return *this;
}
bool operator==(const aesni1xm128i_key_t& rhs) const{
for(int i=0; i<11; ++i){
// Sigh... No r123m128i(__m128i) constructor!
r123m128i li; li.m = k[i];
r123m128i ri; ri.m = rhs.k[i];
if( li != ri ) return false;
}
return true;
}
bool operator!=(const aesni1xm128i_key_t& rhs) const{
return !(*this == rhs);
}
friend std::ostream& operator<<(std::ostream& os, const aesni1xm128i_key_t& v){
r123m128i ki;
for(int i=0; i<10; ++i){
ki.m = v.k[i];
os << ki << " ";
}
ki.m = v.k[10];
return os << ki;
}
friend std::istream& operator>>(std::istream& is, aesni1xm128i_key_t& v){
r123m128i ki;
for(int i=0; i<11; ++i){
is >> ki;
v.k[i] = ki;
}
return is;
}
};
#else
typedef struct {
__m128i k[11];
}aesni1xm128i_key_t;
/** @ingroup AESNI */
RANDOM_ITERATOR_R123_STATIC_INLINE aesni1xm128i_key_t aesni1xm128ikeyinit(aesni1xm128i_ukey_t uk){
aesni1xm128i_key_t ret;
aesni1xm128iexpand(uk, ret.k);
return ret;
}
#endif
/** @ingroup AESNI */
RANDOM_ITERATOR_R123_STATIC_INLINE aesni1xm128i_ctr_t aesni1xm128i(aesni1xm128i_ctr_t in, aesni1xm128i_key_t k) {
__m128i x = _mm_xor_si128(k.k[0], in.v[0].m);
x = _mm_aesenc_si128(x, k.k[1]);
x = _mm_aesenc_si128(x, k.k[2]);
x = _mm_aesenc_si128(x, k.k[3]);
x = _mm_aesenc_si128(x, k.k[4]);
x = _mm_aesenc_si128(x, k.k[5]);
x = _mm_aesenc_si128(x, k.k[6]);
x = _mm_aesenc_si128(x, k.k[7]);
x = _mm_aesenc_si128(x, k.k[8]);
x = _mm_aesenc_si128(x, k.k[9]);
x = _mm_aesenclast_si128(x, k.k[10]);
{
aesni1xm128i_ctr_t ret;
ret.v[0].m = x;
return ret;
}
}
/** @ingroup AESNI */
RANDOM_ITERATOR_R123_STATIC_INLINE aesni1xm128i_ctr_t aesni1xm128i_R(unsigned R, aesni1xm128i_ctr_t in, aesni1xm128i_key_t k){
RANDOM_ITERATOR_R123_ASSERT(R==10);
return aesni1xm128i(in, k);
}
/** @ingroup AESNI */
typedef struct r123array4x32 aesni4x32_ctr_t;
/** @ingroup AESNI */
typedef aesni1xm128i_key_t aesni4x32_key_t;
/** @ingroup AESNI */
enum r123_enum_aesni4x32 { aesni4x32_rounds = 10 };
/** @ingroup AESNI */
RANDOM_ITERATOR_R123_STATIC_INLINE aesni4x32_key_t aesni4x32keyinit(aesni4x32_ukey_t uk){
aesni1xm128i_ukey_t uk128;
aesni4x32_key_t ret;
uk128.v[0].m = _mm_set_epi32(uk.v[3], uk.v[2], uk.v[1], uk.v[0]);
aesni1xm128iexpand(uk128, ret.k);
return ret;
}
/** @ingroup AESNI */
/** The aesni4x32_R function provides a C API to the @ref AESNI "AESNI" CBRNG, allowing the number of rounds to be specified explicitly **/
RANDOM_ITERATOR_R123_STATIC_INLINE aesni4x32_ctr_t aesni4x32_R(unsigned int Nrounds, aesni4x32_ctr_t c, aesni4x32_key_t k){
aesni1xm128i_ctr_t c128;
c128.v[0].m = _mm_set_epi32(c.v[3], c.v[2], c.v[1], c.v[0]);
c128 = aesni1xm128i_R(Nrounds, c128, k);
_mm_storeu_si128((__m128i*)&c.v[0], c128.v[0].m);
return c;
}
#define aesni4x32_rounds aesni1xm128i_rounds
/** The aesni4x32 macro provides a C API to the @ref AESNI "AESNI" CBRNG, uses the default number of rounds i.e. \c aesni4x32_rounds **/
/** @ingroup AESNI */
#define aesni4x32(c,k) aesni4x32_R(aesni4x32_rounds, c, k)
#ifdef __cplusplus
namespace random_iterator_r123{
/**
@defgroup AESNI ARS and AESNI Classes and Typedefs
The ARS4x32, ARS1xm128i, AESNI4x32 and AESNI1xm128i classes export the member functions, typedefs and
operator overloads required by a @ref CBRNG "CBRNG" class.
ARS1xm128i and AESNI1xm128i are based on the AES block cipher and rely on the AES-NI hardware instructions
available on some some new (2011) CPUs.
The ARS1xm128i CBRNG and the use of AES for random number generation are described in
<a href="http://dl.acm.org/citation.cfm?doid=2063405"><i>Parallel Random Numbers: As Easy as 1, 2, 3</i> </a>.
Although it uses some cryptographic primitives, ARS1xm128i uses a cryptographically weak key schedule and is \b not suitable for cryptographic use.
@class AESNI1xm128i
@ingroup AESNI
AESNI exports the member functions, typedefs and operator overloads required by a @ref CBRNG class.
AESNI1xm128i uses the crypotgraphic AES round function, including the cryptographic key schedule.
In contrast to the other CBRNGs in the Random123 library, the AESNI1xm128i_R::key_type is opaque
and is \b not identical to the AESNI1xm128i_R::ukey_type. Creating a key_type, using either the constructor
or assignment operator, is significantly more time-consuming than running the bijection (hundreds
of clock cycles vs. tens of clock cycles).
AESNI1xm128i is only available when the feature-test macro RANDOM_ITERATOR_R123_USE_AES_NI is true, which
should occur only when the compiler is configured to generate AES-NI instructions (or
when defaults are overridden by compile-time, compiler-command-line options).
As of September 2011, the authors know of no statistical flaws with AESNI1xm128i. It
would be an event of major cryptographic note if any such flaws were ever found.
*/
struct AESNI1xm128i{
typedef aesni1xm128i_ctr_t ctr_type;
typedef aesni1xm128i_ukey_t ukey_type;
typedef aesni1xm128i_key_t key_type;
static const unsigned int rounds=10;
ctr_type operator()(ctr_type ctr, key_type key) const{
return aesni1xm128i(ctr, key);
}
};
/* @class AESNI4x32 */
struct AESNI4x32{
typedef aesni4x32_ctr_t ctr_type;
typedef aesni4x32_ukey_t ukey_type;
typedef aesni4x32_key_t key_type;
static const unsigned int rounds=10;
ctr_type operator()(ctr_type ctr, key_type key) const{
return aesni4x32(ctr, key);
}
};
/** @ingroup AESNI
@class AESNI1xm128i_R
AESNI1xm128i_R is provided for completeness, but is only instantiable with ROUNDS=10, in
which case it is identical to AESNI1xm128i */
template <unsigned ROUNDS=10>
struct AESNI1xm128i_R : public AESNI1xm128i{
RANDOM_ITERATOR_R123_STATIC_ASSERT(ROUNDS==10, "AESNI1xm128i_R<R> is only valid with R=10");
};
/** @class AESNI4x32_R **/
template <unsigned ROUNDS=10>
struct AESNI4x32_R : public AESNI4x32{
RANDOM_ITERATOR_R123_STATIC_ASSERT(ROUNDS==10, "AESNI4x32_R<R> is only valid with R=10");
};
} // namespace random_iterator_r123
#endif /* __cplusplus */
#endif /* RANDOM_ITERATOR_R123_USE_AES_NI */
#if RANDOM_ITERATOR_R123_USE_AES_OPENSSL
#include "string.h"
#include <openssl/aes.h>
typedef struct r123array16x8 aesopenssl16x8_ctr_t;
typedef struct r123array16x8 aesopenssl16x8_ukey_t;
#ifdef __cplusplus
struct aesopenssl16x8_key_t{
AES_KEY k;
aesopenssl16x8_key_t(){
aesopenssl16x8_ukey_t ukey={{}};
AES_set_encrypt_key((const unsigned char *)&ukey.v[0], 128, &k);
}
aesopenssl16x8_key_t(const aesopenssl16x8_ukey_t& ukey){
AES_set_encrypt_key((const unsigned char *)&ukey.v[0], 128, &k);
}
aesopenssl16x8_key_t& operator=(const aesopenssl16x8_ukey_t& ukey){
AES_set_encrypt_key((const unsigned char *)&ukey.v[0], 128, &k);
return *this;
}
bool operator==(const aesopenssl16x8_key_t& rhs) const{
return (k.rounds == rhs.k.rounds) && 0==::memcmp(&k.rd_key[0], &rhs.k.rd_key[0], (k.rounds+1) * 4 * sizeof(uint32_t));
}
bool operator!=(const aesopenssl16x8_key_t& rhs) const{
return !(*this == rhs);
}
friend std::ostream& operator<<(std::ostream& os, const aesopenssl16x8_key_t& v){
os << v.k.rounds;
const unsigned int *p = &v.k.rd_key[0];
for(int i=0; i<(v.k.rounds+1); ++i){
os << " " << p[0] << " " << p[1] << " " << p[2] << " " << p[3];
p += 4;
}
return os;
}
friend std::istream& operator>>(std::istream& is, aesopenssl16x8_key_t& v){
is >> v.k.rounds;
unsigned int *p = &v.k.rd_key[0];
for(int i=0; i<(v.k.rounds+1); ++i){
is >> p[0] >> p[1] >> p[2] >> p[3];
p += 4;
}
return is;
}
};
#else
typedef struct aesopenssl16x8_key_t{
AES_KEY k;
}aesopenssl16x8_key_t;
RANDOM_ITERATOR_R123_STATIC_INLINE struct aesopenssl16x8_key_t aesopenssl16x8keyinit(aesopenssl16x8_ukey_t uk){
aesopenssl16x8_key_t ret;
AES_set_encrypt_key((const unsigned char *)&uk.v[0], 128, &ret.k);
return ret;
}
#endif
RANDOM_ITERATOR_R123_STATIC_INLINE RANDOM_ITERATOR_R123_FORCE_INLINE(aesopenssl16x8_ctr_t aesopenssl16x8_R(aesopenssl16x8_ctr_t ctr, aesopenssl16x8_key_t key));
RANDOM_ITERATOR_R123_STATIC_INLINE
aesopenssl16x8_ctr_t aesopenssl16x8_R(aesopenssl16x8_ctr_t ctr, aesopenssl16x8_key_t key){
aesopenssl16x8_ctr_t ret;
AES_encrypt((const unsigned char*)&ctr.v[0], (unsigned char *)&ret.v[0], &key.k);
return ret;
}
#define aesopenssl16x8_rounds aesni4x32_rounds
#define aesopenssl16x8(c,k) aesopenssl16x8_R(aesopenssl16x8_rounds)
#ifdef __cplusplus
namespace random_iterator_r123{
struct AESOpenSSL16x8{
typedef aesopenssl16x8_ctr_t ctr_type;
typedef aesopenssl16x8_key_t key_type;
typedef aesopenssl16x8_ukey_t ukey_type;
static const unsigned int rounds=10;
ctr_type operator()(const ctr_type& in, const key_type& k){
ctr_type out;
AES_encrypt((const unsigned char *)&in[0], (unsigned char *)&out[0], &k.k);
return out;
}
};
} // namespace random_iterator_r123
#endif /* __cplusplus */
#endif /* RANDOM_ITERATOR_R123_USE_AES_OPENSSL */
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef _r123array_dot_h__
#define _r123array_dot_h__
#include "features/compilerfeatures.h"
#include "features/sse.h"
#if !defined(__cplusplus) || defined(__METAL_MACOS__)
#define CXXMETHODS(_N, W, T)
#define CXXOVERLOADS(_N, W, T)
#define CXXMETHODS_REQUIRING_STL
#else
#include <stddef.h>
#include <algorithm>
#include <stdexcept>
#include <iterator>
#include <limits>
#include <iostream>
/** @defgroup arrayNxW The r123arrayNxW classes
Each of the r123arrayNxW is a fixed size array of N W-bit unsigned integers.
It is functionally equivalent to the C++11 std::array<N, uintW_t>,
but does not require C++11 features or libraries.
In addition to meeting most of the requirements of a Container,
it also has a member function, incr(), which increments the zero-th
element and carrys overflows into higher indexed elements. Thus,
by using incr(), sequences of up to 2^(N*W) distinct values
can be produced.
If SSE is supported by the compiler, then the class
r123array1xm128i is also defined, in which the data member is an
array of one r123m128i object.
When compiling with __CUDA_ARCH__ defined, the reverse iterator
methods (rbegin, rend, crbegin, crend) are not defined because
CUDA does not support std::reverse_iterator.
*/
/** @cond HIDDEN_FROM_DOXYGEN */
template <typename value_type>
inline RANDOM_ITERATOR_R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t *p32){
value_type v=0;
for(size_t i=0; i<(3+sizeof(value_type))/4; ++i)
v |= ((value_type)(*p32++)) << (32*i);
return v;
}
/** @endcond */
#ifdef __CUDA_ARCH__
/* CUDA can't handle std::reverse_iterator. We *could* implement it
ourselves, but let's not bother until somebody really feels a need
to reverse-iterate through an r123array */
#define CXXMETHODS_REQUIRING_STL
#else
#define CXXMETHODS_REQUIRING_STL \
public: \
typedef std::reverse_iterator<iterator> reverse_iterator; \
typedef std::reverse_iterator<const_iterator> const_reverse_iterator; \
RANDOM_ITERATOR_R123_CUDA_DEVICE reverse_iterator rbegin(){ return reverse_iterator(end()); } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } \
RANDOM_ITERATOR_R123_CUDA_DEVICE reverse_iterator rend(){ return reverse_iterator(begin()); } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_reverse_iterator rend() const{ return const_reverse_iterator(begin()); } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_reverse_iterator crbegin() const{ return const_reverse_iterator(cend()); } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_reverse_iterator crend() const{ return const_reverse_iterator(cbegin()); }
#endif
// Work-alike methods and typedefs modeled on std::array:
#define CXXMETHODS(_N, W, T) \
typedef T value_type; \
typedef T* iterator; \
typedef const T* const_iterator; \
typedef value_type& reference; \
typedef const value_type& const_reference; \
typedef size_t size_type; \
typedef ptrdiff_t difference_type; \
typedef T* pointer; \
typedef const T* const_pointer; \
/* Boost.array has static_size. C++11 specializes tuple_size */ \
enum {static_size = _N}; \
RANDOM_ITERATOR_R123_CUDA_DEVICE reference operator[](size_type i){return v[i];} \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];} \
RANDOM_ITERATOR_R123_CUDA_DEVICE reference at(size_type i){ if(i >= _N) RANDOM_ITERATOR_R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >= _N) RANDOM_ITERATOR_R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE size_type size() const { return _N; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE size_type max_size() const { return _N; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE bool empty() const { return _N==0; }; \
RANDOM_ITERATOR_R123_CUDA_DEVICE iterator begin() { return &v[0]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE iterator end() { return &v[_N]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE pointer data(){ return &v[0]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_pointer data() const{ return &v[0]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE reference front(){ return v[0]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_reference front() const{ return v[0]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE reference back(){ return v[_N-1]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE const_reference back() const{ return v[_N-1]; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const{ \
/* CUDA3 does not have std::equal */ \
for (size_t i = 0; i < _N; ++i) \
if (v[i] != rhs.v[i]) return false; \
return true; \
} \
RANDOM_ITERATOR_R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const{ return !(*this == rhs); } \
/* CUDA3 does not have std::fill_n */ \
RANDOM_ITERATOR_R123_CUDA_DEVICE void fill(const value_type& val){ for (size_t i = 0; i < _N; ++i) v[i] = val; } \
RANDOM_ITERATOR_R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs){ \
/* CUDA3 does not have std::swap_ranges */ \
for (size_t i = 0; i < _N; ++i) { \
T tmp = v[i]; \
v[i] = rhs.v[i]; \
rhs.v[i] = tmp; \
} \
} \
RANDOM_ITERATOR_R123_CUDA_DEVICE r123array##_N##x##W& incr(RANDOM_ITERATOR_R123_ULONG_LONG n=1){ \
/* This test is tricky because we're trying to avoid spurious \
complaints about illegal shifts, yet still be compile-time \
evaulated. */ \
if(sizeof(T)<sizeof(n) && n>>((sizeof(T)<sizeof(n))?8*sizeof(T):0) ) \
return incr_carefully(n); \
if(n==1){ \
++v[0]; \
if(_N==1 || RANDOM_ITERATOR_R123_BUILTIN_EXPECT(!!v[0], 1)) return *this; \
}else{ \
v[0] += n; \
if(_N==1 || RANDOM_ITERATOR_R123_BUILTIN_EXPECT(n<=v[0], 1)) return *this; \
} \
/* We expect that the N==?? tests will be \
constant-folded/optimized away by the compiler, so only the \
overflow tests (!!v[i]) remain to be done at runtime. For \
small values of N, it would be better to do this as an \
uncondtional sequence of adc. An experiment/optimization \
for another day... \
N.B. The weird subscripting: v[_N>3?3:0] is to silence \
a spurious error from icpc \
*/ \
++v[_N>1?1:0]; \
if(_N==2 || RANDOM_ITERATOR_R123_BUILTIN_EXPECT(!!v[_N>1?1:0], 1)) return *this; \
++v[_N>2?2:0]; \
if(_N==3 || RANDOM_ITERATOR_R123_BUILTIN_EXPECT(!!v[_N>2?2:0], 1)) return *this; \
++v[_N>3?3:0]; \
for(size_t i=4; i<_N; ++i){ \
if( RANDOM_ITERATOR_R123_BUILTIN_EXPECT(!!v[i-1], 1) ) return *this; \
++v[i]; \
} \
return *this; \
} \
/* seed(SeedSeq) would be a constructor if having a constructor */ \
/* didn't cause headaches with defaults */ \
template <typename SeedSeq> \
RANDOM_ITERATOR_R123_CUDA_DEVICE static r123array##_N##x##W seed(SeedSeq &ss){ \
r123array##_N##x##W ret; \
const size_t Ngen = _N*((3+sizeof(value_type))/4); \
uint32_t u32[Ngen]; \
uint32_t *p32 = &u32[0]; \
ss.generate(&u32[0], &u32[Ngen]); \
for(size_t i=0; i<_N; ++i){ \
ret.v[i] = assemble_from_u32<value_type>(p32); \
p32 += (3+sizeof(value_type))/4; \
} \
return ret; \
} \
protected: \
RANDOM_ITERATOR_R123_CUDA_DEVICE r123array##_N##x##W& incr_carefully(RANDOM_ITERATOR_R123_ULONG_LONG n){ \
/* n may be greater than the maximum value of a single value_type */ \
value_type vtn; \
vtn = n; \
v[0] += n; \
const unsigned rshift = 8* ((sizeof(n)>sizeof(value_type))? sizeof(value_type) : 0); \
for(size_t i=1; i<_N; ++i){ \
if(rshift){ \
n >>= rshift; \
}else{ \
n=0; \
} \
if( v[i-1] < vtn ) \
++n; \
if( n==0 ) break; \
vtn = n; \
v[i] += n; \
} \
return *this; \
} \
/** @cond HIDDEN_FROM_DOXYGEN */
// There are several tricky considerations for the insertion and extraction
// operators:
// - we would like to be able to print r123array16x8 as a sequence of 16 integers,
// not as 16 bytes.
// - we would like to be able to print r123array1xm128i.
// - we do not want an int conversion operator in r123m128i because it causes
// lots of ambiguity problems with automatic promotions.
// Solution: r123arrayinsertable and r123arrayextractable
template<typename T>
struct r123arrayinsertable{
const T& v;
r123arrayinsertable(const T& t_) : v(t_) {}
friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<T>& t){
return os << t.v;
}
};
template<>
struct r123arrayinsertable<uint8_t>{
const uint8_t& v;
r123arrayinsertable(const uint8_t& t_) : v(t_) {}
friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<uint8_t>& t){
return os << (int)t.v;
}
};
template<typename T>
struct r123arrayextractable{
T& v;
r123arrayextractable(T& t_) : v(t_) {}
friend std::istream& operator>>(std::istream& is, r123arrayextractable<T>& t){
return is >> t.v;
}
};
template<>
struct r123arrayextractable<uint8_t>{
uint8_t& v;
r123arrayextractable(uint8_t& t_) : v(t_) {}
friend std::istream& operator>>(std::istream& is, r123arrayextractable<uint8_t>& t){
int i;
is >> i;
t.v = i;
return is;
}
};
/** @endcond */
#define CXXOVERLOADS(_N, W, T) \
\
inline std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a){ \
os << r123arrayinsertable<T>(a.v[0]); \
for(size_t i=1; i<_N; ++i) \
os << " " << r123arrayinsertable<T>(a.v[i]); \
return os; \
} \
\
inline std::istream& operator>>(std::istream& is, r123array##_N##x##W& a){ \
for(size_t i=0; i<_N; ++i){ \
r123arrayextractable<T> x(a.v[i]); \
is >> x; \
} \
return is; \
} \
\
namespace random_iterator_r123{ \
typedef r123array##_N##x##W Array##_N##x##W; \
}
#endif /* __cplusplus */
/* _r123array_tpl expands to a declaration of struct r123arrayNxW.
In C, it's nothing more than a struct containing an array of N
objects of type T.
In C++ it's the same, but endowed with an assortment of member
functions, typedefs and friends. In C++, r123arrayNxW looks a lot
like std::array<T,N>, has most of the capabilities of a container,
and satisfies the requirements outlined in compat/Engine.hpp for
counter and key types. ArrayNxW, in the r123 namespace is
a typedef equivalent to r123arrayNxW.
*/
#define _r123array_tpl(_N, W, T) \
/** @ingroup arrayNxW */ \
/** @see arrayNxW */ \
struct r123array##_N##x##W{ \
T v[_N]; \
CXXMETHODS(_N, W, T) \
CXXMETHODS_REQUIRING_STL \
}; \
\
CXXOVERLOADS(_N, W, T)
_r123array_tpl(1, 32, uint32_t) /* r123array1x32 */
_r123array_tpl(2, 32, uint32_t) /* r123array2x32 */
_r123array_tpl(4, 32, uint32_t) /* r123array4x32 */
_r123array_tpl(8, 32, uint32_t) /* r123array8x32 */
#if RANDOM_ITERATOR_R123_USE_64BIT
_r123array_tpl(1, 64, uint64_t) /* r123array1x64 */
_r123array_tpl(2, 64, uint64_t) /* r123array2x64 */
_r123array_tpl(4, 64, uint64_t) /* r123array4x64 */
#endif
_r123array_tpl(16, 8, uint8_t) /* r123array16x8 for ARSsw, AESsw */
#if RANDOM_ITERATOR_R123_USE_SSE
_r123array_tpl(1, m128i, r123m128i) /* r123array1x128i for ARSni, AESni */
#endif
/* In C++, it's natural to use sizeof(a::value_type), but in C it's
pretty convoluted to figure out the width of the value_type of an
r123arrayNxW:
*/
#define RANDOM_ITERATOR_R123_W(a) (8*sizeof(((a *)0)->v[0]))
/** @namespace random_iterator_r123
Most of the Random123 C++ API is contained in the r123 namespace.
*/
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __Random123_ars_dot_hpp__
#define __Random123_ars_dot_hpp__
#include "features/compilerfeatures.h"
#include "array.h"
#if RANDOM_ITERATOR_R123_USE_AES_NI
#ifndef ARS1xm128i_DEFAULT_ROUNDS
#define ARS1xm128i_DEFAULT_ROUNDS 7
#endif
/** @ingroup AESNI */
enum r123_enum_ars1xm128i {ars1xm128i_rounds = ARS1xm128i_DEFAULT_ROUNDS};
/* ARS1xm128i with Weyl keys. Fast, and Crush-resistant, but NOT CRYPTO. */
/** @ingroup AESNI */
typedef struct r123array1xm128i ars1xm128i_ctr_t;
/** @ingroup AESNI */
typedef struct r123array1xm128i ars1xm128i_key_t;
/** @ingroup AESNI */
typedef struct r123array1xm128i ars1xm128i_ukey_t;
/** @ingroup AESNI */
RANDOM_ITERATOR_R123_STATIC_INLINE ars1xm128i_key_t ars1xm128ikeyinit(ars1xm128i_ukey_t uk) { return uk; }
/** @ingroup AESNI */
RANDOM_ITERATOR_R123_STATIC_INLINE ars1xm128i_ctr_t ars1xm128i_R(unsigned int Nrounds, ars1xm128i_ctr_t in, ars1xm128i_key_t k){
__m128i kweyl = _mm_set_epi64x(RANDOM_ITERATOR_R123_64BIT(0xBB67AE8584CAA73B), /* sqrt(3) - 1.0 */
RANDOM_ITERATOR_R123_64BIT(0x9E3779B97F4A7C15)); /* golden ratio */
/* N.B. the aesenc instructions do the xor *after*
// so if we want to follow the AES pattern, we
// have to do the initial xor explicitly */
__m128i kk = k.v[0].m;
__m128i v = _mm_xor_si128(in.v[0].m, kk);
ars1xm128i_ctr_t ret;
RANDOM_ITERATOR_R123_ASSERT(Nrounds<=10);
if( Nrounds>1 ){
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenc_si128(v, kk);
}
if( Nrounds>2 ){
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenc_si128(v, kk);
}
if( Nrounds>3 ){
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenc_si128(v, kk);
}
if( Nrounds>4 ){
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenc_si128(v, kk);
}
if( Nrounds>5 ){
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenc_si128(v, kk);
}
if( Nrounds>6 ){
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenc_si128(v, kk);
}
if( Nrounds>7 ){
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenc_si128(v, kk);
}
if( Nrounds>8 ){
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenc_si128(v, kk);
}
if( Nrounds>9 ){
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenc_si128(v, kk);
}
kk = _mm_add_epi64(kk, kweyl);
v = _mm_aesenclast_si128(v, kk);
ret.v[0].m = v;
return ret;
}
/** @def ars1xm128i
@ingroup AESNI
The ars1mx128i macro provides a C API interface to the @ref AESNI "ARS" CBRNG with the default number of rounds i.e. \c ars1xm128i_rounds **/
#define ars1xm128i(c,k) ars1xm128i_R(ars1xm128i_rounds, c, k)
/** @ingroup AESNI */
typedef struct r123array4x32 ars4x32_ctr_t;
/** @ingroup AESNI */
typedef struct r123array4x32 ars4x32_key_t;
/** @ingroup AESNI */
typedef struct r123array4x32 ars4x32_ukey_t;
typedef struct r123array2x64 ars2x64_ctr_t;
/** @ingroup AESNI */
typedef struct r123array2x64 ars2x64_key_t;
/** @ingroup AESNI */
typedef struct r123array2x64 ars2x64_ukey_t;
/** @ingroup AESNI */
enum r123_enum_ars4x32 {ars4x32_rounds = ARS1xm128i_DEFAULT_ROUNDS};
/** @ingroup AESNI */
RANDOM_ITERATOR_R123_STATIC_INLINE ars4x32_key_t ars4x32keyinit(ars4x32_ukey_t uk) { return uk; }
/** @ingroup AESNI */
RANDOM_ITERATOR_R123_STATIC_INLINE ars4x32_ctr_t ars4x32_R(unsigned int Nrounds, ars4x32_ctr_t c, ars4x32_key_t k){
ars1xm128i_ctr_t c128;
ars1xm128i_key_t k128;
c128.v[0].m = _mm_set_epi32(c.v[3], c.v[2], c.v[1], c.v[0]);
k128.v[0].m = _mm_set_epi32(k.v[3], k.v[2], k.v[1], k.v[0]);
c128 = ars1xm128i_R(Nrounds, c128, k128);
_mm_storeu_si128((__m128i*)&c.v[0], c128.v[0].m);
return c;
}
/** @def ars4x32
@ingroup AESNI
The ars4x32 macro provides a C API interface to the @ref AESNI "ARS" CBRNG with the default number of rounds i.e. \c ars4x32_rounds **/
#define ars4x32(c,k) ars4x32_R(ars4x32_rounds, c, k)
#ifdef __cplusplus
namespace random_iterator_r123{
/**
@ingroup AESNI
ARS1xm128i_R exports the member functions, typedefs and operator overloads required by a @ref CBRNG class.
ARS1xm128i uses the crypotgraphic AES round function, but a @b non-cryptographc key schedule
to save time and space.
ARS1xm128i is only available when the feature-test macro RANDOM_ITERATOR_R123_USE_AES_NI is true, which
should occur only when the compiler is configured to generate AES-NI instructions (or
when defaults are overridden by compile-time, compiler-command-line options).
The template argument, ROUNDS, is the number of times the ARS round
functions will be applied.
As of September 2011, the authors know of no statistical flaws with
ROUNDS=5 or more.
@class ARS1xm128i_R
*/
template<unsigned int ROUNDS>
struct ARS1xm128i_R{
typedef ars1xm128i_ctr_t ctr_type;
typedef ars1xm128i_key_t key_type;
typedef ars1xm128i_key_t ukey_type;
static const unsigned int rounds=ROUNDS;
RANDOM_ITERATOR_R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
return ars1xm128i_R(ROUNDS, ctr, key);
}
};
/** @class ARS4x32_R
@ingroup AESNI
*/
template<unsigned int ROUNDS>
struct ARS4x32_R{
typedef ars4x32_ctr_t ctr_type;
typedef ars4x32_key_t key_type;
typedef ars4x32_key_t ukey_type;
static const unsigned int rounds=ROUNDS;
RANDOM_ITERATOR_R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
return ars4x32_R(ROUNDS, ctr, key);
}
};
template<unsigned int ROUNDS>
struct ARS2x64_R{
typedef ars2x64_ctr_t ctr_type;
typedef ars2x64_key_t key_type;
typedef ars2x64_key_t ukey_type;
static const unsigned int rounds=ROUNDS;
RANDOM_ITERATOR_R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
ars4x32_ctr_t ctr_{ctr.v[0], ctr.v[0]>>32 , ctr.v[1], ctr.v[1]>>32};
ars4x32_key_t key_{key.v[0], key.v[0]>>32 , key.v[1], key.v[1]>>32};
ars4x32_ctr_t res_ = ars4x32_R(ROUNDS, ctr_, key_);
ctr_type res{{}};
res.v[0] = (res.v[0] | res_[1])<<32;
res.v[0] = (res.v[0] | res_[0]);
res.v[1] = (res.v[1] | res_[3])<<32;
res.v[1] = (res.v[1] | res_[2]);
return res;
}
};
/**
@ingroup AESNI
@class ARS1xm128i_R
ARS1xm128i is equivalent to ARS1xm128i_R<7>. With 7 rounds,
the ARS1xm128i CBRNG has a considerable safety margin over the minimum number
of rounds with no known statistical flaws, but still has excellent
performance. */
typedef ARS1xm128i_R<ars1xm128i_rounds> ARS1xm128i;
typedef ARS4x32_R<ars4x32_rounds> ARS4x32;
typedef ARS2x64_R<ars4x32_rounds> ARS2x64;
} // namespace random_iterator_r123
#endif /* __cplusplus */
#endif /* RANDOM_ITERATOR_R123_USE_AES_NI */
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// This file implements the Box-Muller method for generating gaussian
// random variables (GRVs). Box-Muller has the advantage of
// deterministically requiring exactly two uniform random variables as
// input and producing exactly two GRVs as output, which makes it
// especially well-suited to the counter-based generators in
// Random123. Other methods (e.g., Ziggurat, polar) require an
// indeterminate number of inputs for each output and so require a
// 'MicroURNG' to be used with Random123. The down side of Box-Muller
// is that it calls sincos, log and sqrt, which may be slow. However,
// on GPUs, these functions are remarkably fast, which makes
// Box-Muller the fastest GRV generator we know of on GPUs.
//
// This file exports two structs and one overloaded function,
// all in the r123 namespace:
// struct r123::float2{ float x,y; }
// struct r123::double2{ double x,y; }
//
// r123::float2 r123::boxmuller(uint32_t u0, uint32_t u1);
// r123::double2 r123::boxmuller(uint64_t u0, uint64_t u1);
//
// float2 and double2 are identical to their synonymous global-
// namespace structures in CUDA.
//
// This file may not be as portable, and has not been tested as
// rigorously as other files in the library, e.g., the generators.
// Nevertheless, we hope it is useful and we encourage developers to
// copy it and modify it for their own use. We invite comments and
// improvements.
#ifndef _r123_BOXMULLER_HPP__
#define _r123_BOXMULLER_HPP__
#include <Random123/features/compilerfeatures.h>
#include <Random123/uniform.hpp>
#include <math.h>
namespace random_iterator_r123 {
#if !defined(__CUDACC__)
typedef struct {
float x, y;
} float2;
typedef struct {
double x, y;
} double2;
#else
typedef ::float2 float2;
typedef ::double2 double2;
#endif
#if !defined(RANDOM_ITERATOR_R123_NO_SINCOS) && defined(__APPLE__)
/* MacOS X 10.10.5 (2015) doesn't have sincosf */
#define RANDOM_ITERATOR_R123_NO_SINCOS 1
#endif
#if RANDOM_ITERATOR_R123_NO_SINCOS /* enable this if sincos and sincosf are not in the \
math library */
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE void sincosf(
float x, float* s, float* c) {
*s = sinf(x);
*c = cosf(x);
}
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE void sincos(
double x, double* s, double* c) {
*s = sin(x);
*c = cos(x);
}
#endif /* sincos is not in the math library */
#if !defined(CUDART_VERSION) || \
CUDART_VERSION < 5000 /* enabled if sincospi and sincospif are not in math lib */
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE void sincospif(
float x, float* s, float* c) {
const float PIf = 3.1415926535897932f;
sincosf(PIf * x, s, c);
}
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE void sincospi(
double x, double* s, double* c) {
const double PI = 3.1415926535897932;
sincos(PI * x, s, c);
}
#endif /* sincospi is not in math lib */
/*
* take two 32bit unsigned random values and return a float2 with
* two random floats in a normal distribution via a Box-Muller transform
*/
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE float2
boxmuller(uint32_t u0, uint32_t u1) {
float r;
float2 f;
sincospif(uneg11<float>(u0), &f.x, &f.y);
r = sqrtf(-2.f * logf(u01<float>(u1))); // u01 is guaranteed to avoid 0.
f.x *= r;
f.y *= r;
return f;
}
/*
* take two 64bit unsigned random values and return a double2 with
* two random doubles in a normal distribution via a Box-Muller transform
*/
RANDOM_ITERATOR_R123_CUDA_DEVICE RANDOM_ITERATOR_R123_STATIC_INLINE double2
boxmuller(uint64_t u0, uint64_t u1) {
double r;
double2 f;
sincospi(uneg11<double>(u0), &f.x, &f.y);
r = sqrt(-2. * log(u01<double>(u1))); // u01 is guaranteed to avoid 0.
f.x *= r;
f.y *= r;
return f;
}
} // namespace random_iterator_r123
#endif /* BOXMULLER_H__ */
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __Engine_dot_hpp_
#define __Engine_dot_hpp_
#include "../features/compilerfeatures.h"
#include "../array.h"
#include <limits>
#include <stdexcept>
#include <sstream>
#include <algorithm>
#include <vector>
#if RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS
#include <type_traits>
#endif
namespace random_iterator_r123 {
/**
If G satisfies the requirements of a CBRNG, and has a ctr_type whose
value_type is an unsigned integral type, then Engine<G> satisfies
the requirements of a C++11 "Uniform Random Number Engine" and can
be used in any context where such an object is expected.
Note that wrapping a counter based RNG with a traditional API in
this way obscures much of the power of counter based PRNGs.
Nevertheless, it may be of value in applications that are already
coded to work with the C++11 random number engines.
The MicroURNG template in MicroURNG.hpp
provides the more limited functionality of a C++11 "Uniform
Random Number Generator", but leaves the application in control
of counters and keys and hence may be preferable to the Engine template.
For example, a MicroURNG allows one to use C++11 "Random Number
Distributions" without giving up control over the counters
and keys.
*/
template <typename CBRNG>
struct Engine {
typedef CBRNG cbrng_type;
typedef typename CBRNG::ctr_type ctr_type;
typedef typename CBRNG::key_type key_type;
typedef typename CBRNG::ukey_type ukey_type;
typedef typename ctr_type::value_type result_type;
protected:
cbrng_type b;
key_type key;
ctr_type c;
ctr_type v;
void fix_invariant() {
if (v.back() != 0) {
result_type vv = v.back();
v = b(c, key);
v.back() = vv;
}
}
public:
explicit Engine()
: b()
, c() {
ukey_type x = {{}};
v.back() = 0;
key = x;
}
explicit Engine(result_type r)
: b()
, c() {
ukey_type x = {{typename ukey_type::value_type(r)}};
v.back() = 0;
key = x;
}
// 26.5.3 says that the SeedSeq templates shouldn't particpate in
// overload resolution unless the type qualifies as a SeedSeq.
// How that is determined is unspecified, except that "as a
// minimum a type shall not qualify as a SeedSeq if it is
// implicitly convertible to a result_type."
//
// First, we make sure that even the non-const copy constructor
// works as expected. In addition, if we've got C++11
// type_traits, we use enable_if and is_convertible to implement
// the convertible-to-result_type restriction. Otherwise, the
// template is unconditional and will match in some surpirsing
// and undesirable situations.
Engine(Engine& e)
: b(e.b)
, key(e.key)
, c(e.c) {
v.back() = e.v.back();
fix_invariant();
}
Engine(const Engine& e)
: b(e.b)
, key(e.key)
, c(e.c) {
v.back() = e.v.back();
fix_invariant();
}
template <typename SeedSeq>
explicit Engine(SeedSeq& s
#if RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS
,
typename std::enable_if<
!std::is_convertible<SeedSeq, result_type>::value>::type* = 0
#endif
)
: b()
, c() {
ukey_type ukey = ukey_type::seed(s);
key = ukey;
v.back() = 0;
}
void seed(result_type r) { *this = Engine(r); }
template <typename SeedSeq>
void seed(SeedSeq& s
#if RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS
,
typename std::enable_if<
!std::is_convertible<SeedSeq, result_type>::value>::type* = 0
#endif
) {
*this = Engine(s);
}
void seed() { *this = Engine(); }
friend bool operator==(const Engine& lhs, const Engine& rhs) {
return lhs.c == rhs.c && lhs.v.back() == rhs.v.back() && lhs.key == rhs.key;
}
friend bool operator!=(const Engine& lhs, const Engine& rhs) {
return lhs.c != rhs.c || lhs.v.back() != rhs.v.back() || lhs.key != rhs.key;
}
friend std::ostream& operator<<(std::ostream& os, const Engine& be) {
return os << be.c << " " << be.key << " " << be.v.back();
}
friend std::istream& operator>>(std::istream& is, Engine& be) {
is >> be.c >> be.key >> be.v.back();
be.fix_invariant();
return is;
}
// The <random> shipped with MacOS Xcode 4.5.2 imposes a
// non-standard requirement that URNGs also have static data
// members: _Min and _Max. Later versions of libc++ impose the
// requirement only when constexpr isn't supported. Although the
// Xcode 4.5.2 requirement is clearly non-standard, it is unlikely
// to be fixed and it is very easy work around. We certainly
// don't want to go to great lengths to accommodate every buggy
// library we come across, but in this particular case, the effort
// is low and the benefit is high, so it's worth doing. Thanks to
// Yan Zhou for pointing this out to us. See similar code in
// ../MicroURNG.hpp
const static result_type _Min = 0;
const static result_type _Max = ~((result_type)0);
static RANDOM_ITERATOR_R123_CONSTEXPR result_type min
RANDOM_ITERATOR_R123_NO_MACRO_SUBST() {
return _Min;
}
static RANDOM_ITERATOR_R123_CONSTEXPR result_type max
RANDOM_ITERATOR_R123_NO_MACRO_SUBST() {
return _Max;
}
result_type operator()() {
if (c.size() == 1) // short-circuit the scalar case. Compilers aren't mind-readers.
return b(c.incr(), key)[0];
result_type& elem = v.back();
if (elem == 0) {
v = b(c.incr(), key);
result_type ret = v.back();
elem = c.size() - 1;
return ret;
}
return v[--elem];
}
void discard(RANDOM_ITERATOR_R123_ULONG_LONG skip) {
// don't forget: elem counts down
size_t nelem = c.size();
size_t sub = skip % nelem;
result_type& elem = v.back();
skip /= nelem;
if (elem < sub) {
elem += nelem;
skip++;
}
elem -= sub;
c.incr(skip);
fix_invariant();
}
//--------------------------
// Some bonus methods, not required for a Random Number
// Engine
// Constructors and seed() method for ukey_type seem useful
// We need const and non-const to supersede the SeedSeq template.
explicit Engine(const ukey_type& uk)
: key(uk)
, c() {
v.back() = 0;
}
explicit Engine(ukey_type& uk)
: key(uk)
, c() {
v.back() = 0;
}
void seed(const ukey_type& uk) { *this = Engine(uk); }
void seed(ukey_type& uk) { *this = Engine(uk); }
#if RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS
template <typename DUMMY = void>
explicit Engine(const key_type& k,
typename std::enable_if<!std::is_same<ukey_type, key_type>::value,
DUMMY>::type* = 0)
: key(k)
, c() {
v.back() = 0;
}
template <typename DUMMY = void>
void seed(const key_type& k,
typename std::enable_if<!std::is_same<ukey_type, key_type>::value,
DUMMY>::type* = 0) {
*this = Engine(k);
}
#endif
// Forward the e(counter) to the CBRNG we are templated
// on, using the current value of the key.
ctr_type operator()(const ctr_type& c) const { return b(c, key); }
key_type getkey() const { return key; }
// N.B. setkey(k) is different from seed(k) because seed(k) zeros
// the counter (per the C++11 requirements for an Engine), whereas
// setkey does not.
void setkey(const key_type& k) {
key = k;
fix_invariant();
}
// Maybe the caller want's to know the details of
// the internal state, e.g., so it can call a different
// bijection with the same counter.
std::pair<ctr_type, result_type> getcounter() const {
return std::make_pair(c, v.back());
}
// And the inverse.
void setcounter(const ctr_type& _c, result_type _elem) {
static const size_t nelem = c.size();
if (_elem >= nelem)
throw std::range_error("Engine::setcounter called with elem out of range");
c = _c;
v.back() = _elem;
fix_invariant();
}
void setcounter(const std::pair<ctr_type, result_type>& ce) {
setcounter(ce.first, ce.second);
}
};
} // namespace random_iterator_r123
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __r123_compat_gslrng_dot_h__
#define __r123_compat_gslrng_dot_h__
#include <gsl/gsl_rng.h>
#include <string.h>
/**
The macro: GSL_CBRNG(NAME, CBRNGNAME)
declares the necessary structs and constants that define a
gsl_rng_NAME type based on the counter-based RNG CBRNGNAME. For example:
Usage:
@code
#include <Random123/threefry.h>
#include <Random123/conventional/gsl_cbrng.h> // this file
GSL_CBRNG(cbrng, threefry4x32); // creates gsl_rng_cbrng
int main(int argc, char **argv){
gsl_rng *r = gsl_rng_alloc(gsl_rng_cbrng);
... use r as you would use any other gsl_rng ...
}
@endcode
It requires that NAME be the name of a CBRNG that follows the
naming and stylistic conventions of the Random123 library.
Note that wrapping a \ref CBRNG "counter-based PRNG" with a traditional API in
this way obscures much of the power of the CBRNG API.
Nevertheless, it may be of value to applications that are already
coded to work with GSL random number generators, and that wish
to use the RNGs in the Random123 library.
*/
#define GSL_CBRNG(NAME, CBRNGNAME) \
const gsl_rng_type *gsl_rng_##NAME; \
\
typedef struct{ \
CBRNGNAME##_ctr_t ctr; \
CBRNGNAME##_ctr_t r; \
CBRNGNAME##_key_t key; \
int elem; \
} NAME##_state; \
\
static unsigned long int NAME##_get(void *vstate){ \
NAME##_state *st = (NAME##_state *)vstate; \
const int N=sizeof(st->ctr.v)/sizeof(st->ctr.v[0]); \
if( st->elem == 0 ){ \
++st->ctr.v[0]; \
if( N>1 && st->ctr.v[0] == 0 ) ++st->ctr.v[1]; \
if( N>2 && st->ctr.v[1] == 0 ) ++st->ctr.v[2]; \
if( N>3 && st->ctr.v[2] == 0 ) ++st->ctr.v[3]; \
st->r = CBRNGNAME(st->ctr, st->key); \
st->elem = N; \
} \
return 0xffffffffUL & st->r.v[--st->elem]; \
} \
\
static double \
NAME##_get_double (void * vstate) \
{ \
return NAME##_get (vstate)/4294967296.0; \
} \
\
static void NAME##_set(void *vstate, unsigned long int s){ \
NAME##_state *st = (NAME##_state *)vstate; \
st->elem = 0; \
/* Assume that key and ctr have an array member, v, \
as if they are r123arrayNxW. If not, this will fail \
to compile. In particular, this macro fails to compile \
when the underlying CBRNG requires use of keyinit */ \
memset(&st->ctr.v[0], 0, sizeof(st->ctr.v)); \
memset(&st->key.v[0], 0, sizeof(st->key.v)); \
/* GSL 1.15 documentation says this about gsl_rng_set: \
Note that the most generators only accept 32-bit seeds, with higher \
values being reduced modulo 2^32. For generators with smaller \
ranges the maximum seed value will typically be lower. \
so we won't jump through any hoops here to deal with \
high bits if sizeof(unsigned long) > sizeof(uint32_t). */ \
st->key.v[0] = s; \
} \
\
static const gsl_rng_type NAME##_type = { \
#NAME, \
0xffffffffUL, \
0, \
sizeof(NAME##_state), \
&NAME##_set, \
&NAME##_get, \
&NAME##_get_double \
}; \
\
const gsl_rng_type *gsl_rng_##NAME = &NAME##_type
#endif
/*
Copyright 2010-2016, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __clangfeatures_dot_hpp
#define __clangfeatures_dot_hpp
#ifndef RANDOM_ITERATOR_R123_USE_X86INTRIN_H
#if (defined(__x86_64__)||defined(__i386__))
#define RANDOM_ITERATOR_R123_USE_X86INTRIN_H 1
#else
#define RANDOM_ITERATOR_R123_USE_X86INTRIN_H 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_UNRESTRICTED_UNIONS
#define RANDOM_ITERATOR_R123_USE_CXX11_UNRESTRICTED_UNIONS __has_feature(cxx_unrestricted_unions)
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_STATIC_ASSERT
#define RANDOM_ITERATOR_R123_USE_CXX11_STATIC_ASSERT __has_feature(cxx_static_assert)
#endif
// With clang-3.6, -Wall warns about unused-local-typedefs.
// The "obvious" thing to do is to ignore -Wunused-local-typedefs,
// but that doesn't work because earlier versions of clang blow
// up on an 'unknown warning group'. So we briefly ignore -Wall...
// It's tempting to just give up on static assertions in pre-c++11 code.
#if !RANDOM_ITERATOR_R123_USE_CXX11_STATIC_ASSERT && !defined(RANDOM_ITERATOR_R123_STATIC_ASSERT)
#define RANDOM_ITERATOR_R123_STATIC_ASSERT(expr, msg) \
_Pragma("clang diagnostic push") \
_Pragma("clang diagnostic ignored \"-Wall\"") \
typedef char static_assertion[(!!(expr))*2-1] \
_Pragma("clang diagnostic pop")
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_CONSTEXPR
#define RANDOM_ITERATOR_R123_USE_CXX11_CONSTEXPR __has_feature(cxx_constexpr)
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_EXPLICIT_CONVERSIONS
#define RANDOM_ITERATOR_R123_USE_CXX11_EXPLICIT_CONVERSIONS __has_feature(cxx_explicit_conversions)
#endif
// With clang-3.0, the apparently simpler:
// #define RANDOM_ITERATOR_R123_USE_CXX11_RANDOM __has_include(<random>)
// dumps core.
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_RANDOM
#if __cplusplus>=201103L && __has_include(<random>)
#define RANDOM_ITERATOR_R123_USE_CXX11_RANDOM 1
#else
#define RANDOM_ITERATOR_R123_USE_CXX11_RANDOM 0
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS
#if __cplusplus>=201103L && __has_include(<type_traits>)
#define RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS 1
#else
#define RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS 0
#endif
#endif
#include "gccfeatures.h"
#endif
/*
Copyright 2010-2011, D. E. Shaw Research.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions, and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of D. E. Shaw Research nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/**
@page porting Preprocessor symbols for porting Random123 to different platforms.
The Random123 library is portable across C, C++, CUDA, OpenCL environments,
and multiple operating systems (Linux, Windows 7, Mac OS X, FreeBSD, Solaris).
This level of portability requires the abstraction of some features
and idioms that are either not standardized (e.g., asm statments), or for which
different vendors have their own standards (e.g., SSE intrinsics) or for
which vendors simply refuse to conform to well-established standards (e.g., <inttypes.h>).
Random123/features/compilerfeatures.h
conditionally includes a compiler-or-OS-specific Random123/featires/XXXfeatures.h file which
defines appropriate values for the preprocessor symbols which can be used with
a specific compiler or OS. Those symbols will then
be used by other header files and source files in the Random123
library (and may be used by applications) to control what actually
gets presented to the compiler.
Most of the symbols are boolean valued. In general, they will
\b always be defined with value either 1 or 0, so do
\b NOT use \#ifdef. Use \#if RANDOM_ITERATOR_R123_USE_SOMETHING instead.
Library users can override any value by defining the pp-symbol with a compiler option,
e.g.,
cc -DRANDOM_ITERATOR_R123_USE_MULHILO64_C99
will use a strictly c99 version of the full-width 64x64->128-bit multiplication
function, even if it would be disabled by default.
All boolean-valued pre-processor symbols in Random123/features/compilerfeatures.h start with the prefix RANDOM_ITERATOR_R123_USE_
@verbatim
AES_NI
AES_OPENSSL
SSE4_2
SSE4_1
SSE
STD_RANDOM
GNU_UINT128
ASM_GNU
ASM_MSASM
CPUID_MSVC
CXX11_RANDOM
CXX11_TYPE_TRAITS
CXX11_STATIC_ASSERT
CXX11_CONSTEXPR
CXX11_UNRESTRICTED_UNIONS
CXX11_EXPLICIT_CONVERSIONS
CXX11_LONG_LONG
CXX11_STD_ARRAY
CXX11
X86INTRIN_H
IA32INTRIN_H
XMMINTRIN_H
EMMINTRIN_H
SMMINTRIN_H
WMMINTRIN_H
INTRIN_H
MULHILO32_ASM
MULHILO64_ASM
MULHILO64_MSVC_INTRIN
MULHILO64_CUDA_INTRIN
MULHILO64_OPENCL_INTRIN
MULHILO64_C99
U01_DOUBLE
@endverbatim
Most have obvious meanings. Some non-obvious ones:
AES_NI and AES_OPENSSL are not mutually exclusive. You can have one,
both or neither.
GNU_UINT128 says that it's safe to use __uint128_t, but it
does not require its use. In particular, it should be
used in mulhilo<uint64_t> only if MULHILO64_ASM is unset.
If the XXXINTRIN_H macros are true, then one should
@code
#include <xxxintrin.h>
@endcode
to gain accesss to compiler intrinsics.
The CXX11_SOME_FEATURE macros allow the code to use specific
features of the C++11 language and library. The catchall
In the absence of a specific CXX11_SOME_FEATURE, the feature
is controlled by the catch-all RANDOM_ITERATOR_R123_USE_CXX11 macro.
U01_DOUBLE defaults on, and can be turned off (set to 0)
if one does not want the utility functions that convert to double
(i.e. u01_*_53()), e.g. on OpenCL without the cl_khr_fp64 extension.
There are a number of invariants that are always true. Application code may
choose to rely on these:
<ul>
<li>ASM_GNU and ASM_MASM are mutually exclusive
<li>The "higher" SSE values imply the lower ones.
</ul>
There are also non-boolean valued symbols:
<ul>
<li>RANDOM_ITERATOR_R123_STATIC_INLINE -
According to both C99 and GNU99, the 'static inline' declaration allows
the compiler to not emit code if the function is not used.
Note that the semantics of 'inline', 'static' and 'extern' in
gcc have changed over time and are subject to modification by
command line options, e.g., -std=gnu89, -fgnu-inline.
Nevertheless, it appears that the meaning of 'static inline'
has not changed over time and (with a little luck) the use of 'static inline'
here will be portable between versions of gcc and to other C99
compilers.
See: http://gcc.gnu.org/onlinedocs/gcc/Inline.html
http://www.greenend.org.uk/rjk/2003/03/inline.html
<li>RANDOM_ITERATOR_R123_FORCE_INLINE(decl) -
which expands to 'decl', adorned with the compiler-specific
embellishments to strongly encourage that the declared function be
inlined. If there is no such compiler-specific magic, it should
expand to decl, unadorned.
<li>RANDOM_ITERATOR_R123_CUDA_DEVICE - which expands to __device__ (or something else with
sufficiently similar semantics) when CUDA is in use, and expands
to nothing in other cases.
<li>RANDOM_ITERATOR_R123_METAL_THREAD_ADDRESS_SPACE - which expands to 'thread' (or
something else with sufficiently similar semantics) when compiling a
Metal kernel, and expands to nothing in other cases.
<li>RANDOM_ITERATOR_R123_ASSERT(x) - which expands to assert(x), or maybe to nothing at
all if we're in an environment so feature-poor that you can't even
call assert (I'm looking at you, CUDA and OpenCL), or even include
assert.h safely (OpenCL).
<li>RANDOM_ITERATOR_R123_STATIC_ASSERT(expr,msg) - which expands to
static_assert(expr,msg), or to an expression that
will raise a compile-time exception if expr is not true.
<li>RANDOM_ITERATOR_R123_ULONG_LONG - which expands to a declaration of the longest available
unsigned integer.
<li>RANDOM_ITERATOR_R123_64BIT(x) - expands to something equivalent to
UINT64_C(x) from <stdint.h>, even in environments where <stdint.h>
is not available, e.g., MSVC and OpenCL.
<li>RANDOM_ITERATOR_R123_BUILTIN_EXPECT(expr,likely_value) - expands to something with
the semantics of gcc's __builtin_expect(expr,likely_value). If
the environment has nothing like __builtin_expect, it should expand
to just expr.
</ul>
\cond HIDDEN_FROM_DOXYGEN
*/
/*
N.B. When something is added to the list of features, it should be
added to each of the *features.h files, AND to examples/ut_features.cpp.
*/
/* N.B. most other compilers (icc, nvcc, open64, llvm) will also define __GNUC__, so order matters. */
#if defined(__METAL_MACOS__)
#include "metalfeatures.h"
#elif defined(__OPENCL_VERSION__) && __OPENCL_VERSION__ > 0
#include "openclfeatures.h"
#elif defined(__CUDACC__)
#include "nvccfeatures.h"
#elif defined(__ICC)
#include "iccfeatures.h"
#elif defined(__xlC__)
#include "xlcfeatures.h"
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
#include "sunprofeatures.h"
#elif defined(__OPEN64__)
#include "open64features.h"
#elif defined(__clang__)
#include "clangfeatures.h"
#elif defined(__GNUC__)
#include "gccfeatures.h"
#elif defined(__PGI)
#include "pgccfeatures.h"
#elif defined(_MSC_FULL_VER)
#include "msvcfeatures.h"
#else
#error "Can't identify compiler. You'll need to add a new xxfeatures.hpp"
{ /* maybe an unbalanced brace will terminate the compilation */
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11
#define RANDOM_ITERATOR_R123_USE_CXX11 (__cplusplus >= 201103L)
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_UNRESTRICTED_UNIONS
#define RANDOM_ITERATOR_R123_USE_CXX11_UNRESTRICTED_UNIONS RANDOM_ITERATOR_R123_USE_CXX11
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_STATIC_ASSERT
#define RANDOM_ITERATOR_R123_USE_CXX11_STATIC_ASSERT RANDOM_ITERATOR_R123_USE_CXX11
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_CONSTEXPR
#define RANDOM_ITERATOR_R123_USE_CXX11_CONSTEXPR RANDOM_ITERATOR_R123_USE_CXX11
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_EXPLICIT_CONVERSIONS
#define RANDOM_ITERATOR_R123_USE_CXX11_EXPLICIT_CONVERSIONS RANDOM_ITERATOR_R123_USE_CXX11
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_RANDOM
#define RANDOM_ITERATOR_R123_USE_CXX11_RANDOM RANDOM_ITERATOR_R123_USE_CXX11
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS
#define RANDOM_ITERATOR_R123_USE_CXX11_TYPE_TRAITS RANDOM_ITERATOR_R123_USE_CXX11
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_LONG_LONG
#define RANDOM_ITERATOR_R123_USE_CXX11_LONG_LONG RANDOM_ITERATOR_R123_USE_CXX11
#endif
#ifndef RANDOM_ITERATOR_R123_USE_CXX11_STD_ARRAY
#define RANDOM_ITERATOR_R123_USE_CXX11_STD_ARRAY RANDOM_ITERATOR_R123_USE_CXX11
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_C99
#define RANDOM_ITERATOR_R123_USE_MULHILO64_C99 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO64_MULHI_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO64_MULHI_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_USE_MULHILO32_MULHI_INTRIN
#define RANDOM_ITERATOR_R123_USE_MULHILO32_MULHI_INTRIN 0
#endif
#ifndef RANDOM_ITERATOR_R123_STATIC_ASSERT
#if RANDOM_ITERATOR_R123_USE_CXX11_STATIC_ASSERT
#define RANDOM_ITERATOR_R123_STATIC_ASSERT(expr, msg) static_assert(expr, msg)
#else
/* if msg always_looked_like_this, we could paste it into the name. Worth it? */
#define RANDOM_ITERATOR_R123_STATIC_ASSERT(expr, msg) typedef char static_assertion[(!!(expr))*2-1]
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_CONSTEXPR
#if RANDOM_ITERATOR_R123_USE_CXX11_CONSTEXPR
#define RANDOM_ITERATOR_R123_CONSTEXPR constexpr
#else
#define RANDOM_ITERATOR_R123_CONSTEXPR
#endif
#endif
#ifndef RANDOM_ITERATOR_R123_USE_64BIT
#define RANDOM_ITERATOR_R123_USE_64BIT 1
#endif
#ifndef RANDOM_ITERATOR_R123_USE_PHILOX_64BIT
#define RANDOM_ITERATOR_R123_USE_PHILOX_64BIT (RANDOM_ITERATOR_R123_USE_64BIT && (RANDOM_ITERATOR_R123_USE_MULHILO64_ASM || RANDOM_ITERATOR_R123_USE_MULHILO64_MSVC_INTRIN || RANDOM_ITERATOR_R123_USE_MULHILO64_CUDA_INTRIN || RANDOM_ITERATOR_R123_USE_GNU_UINT128 || RANDOM_ITERATOR_R123_USE_MULHILO64_C99 || RANDOM_ITERATOR_R123_USE_MULHILO64_OPENCL_INTRIN || RANDOM_ITERATOR_R123_USE_MULHILO64_MULHI_INTRIN))
#endif
#ifndef RANDOM_ITERATOR_R123_ULONG_LONG
#if defined(__cplusplus) && !RANDOM_ITERATOR_R123_USE_CXX11_LONG_LONG
/* C++98 doesn't have long long. It doesn't have uint64_t either, but
we will have typedef'ed uint64_t to something in the xxxfeatures.h.
With luck, it won't elicit complaints from -pedantic. Cross your
fingers... */
#define RANDOM_ITERATOR_R123_ULONG_LONG uint64_t
#else
#define RANDOM_ITERATOR_R123_ULONG_LONG unsigned long long
#endif
#endif
/* UINT64_C should have been #defined by XXXfeatures.h, either by
#include <stdint.h> or through compiler-dependent hacks */
#ifndef RANDOM_ITERATOR_R123_64BIT
#define RANDOM_ITERATOR_R123_64BIT(x) UINT64_C(x)
#endif
#ifndef RANDOM_ITERATOR_R123_THROW
#define RANDOM_ITERATOR_R123_THROW(x) throw (x)
#endif
#ifndef RANDOM_ITERATOR_R123_METAL_THREAD_ADDRESS_SPACE
#define RANDOM_ITERATOR_R123_METAL_THREAD_ADDRESS_SPACE
#endif
#ifndef RANDOM_ITERATOR_R123_METAL_CONSTANT_ADDRESS_SPACE
#define RANDOM_ITERATOR_R123_METAL_CONSTANT_ADDRESS_SPACE
#endif
/*
* Windows.h (and perhaps other "well-meaning" code define min and
* max, so there's a high chance that our definition of min, max
* methods or use of std::numeric_limits min and max will cause
* complaints in any program that happened to include Windows.h or
* suchlike first. We use the null macro below in our own header
* files definition or use of min, max to defensively preclude
* this problem. It may not be enough; one might need to #define
* NOMINMAX before including Windows.h or compile with -DNOMINMAX.
*/
#define RANDOM_ITERATOR_R123_NO_MACRO_SUBST
/** \endcond */