diff --git a/Framework/Utilities/CMakeLists.txt b/Framework/Utilities/CMakeLists.txt index b5796bfd535759eae184faed36693902527255df..f0dbc8991b9ab8e835ef8dc867114c3fc86ef2fb 100644 --- a/Framework/Utilities/CMakeLists.txt +++ b/Framework/Utilities/CMakeLists.txt @@ -13,6 +13,7 @@ else () endif() endif () +find_package(ZLIB) # # library setup @@ -32,6 +33,7 @@ set ( sgn.h CorsikaFenv.h MetaProgramming.h + SaveBoostHistogram.hpp ) set ( @@ -57,6 +59,8 @@ target_link_libraries ( CORSIKAunits C8::ext::boost # so far only for MetaProgramming C8::ext::eigen3 # for COMboost + cnpy # for SaveBoostHistogram + ZLIB::ZLIB # for SaveBoostHistogram ) target_include_directories ( @@ -89,3 +93,10 @@ target_link_libraries ( CORSIKAutilities CORSIKAtesting ) + +CORSIKA_ADD_TEST(testSaveBoostHistogram) +target_link_libraries ( + testSaveBoostHistogram + CORSIKAutilities + CORSIKAtesting +) diff --git a/Framework/Utilities/SaveBoostHistogram.hpp b/Framework/Utilities/SaveBoostHistogram.hpp new file mode 100644 index 0000000000000000000000000000000000000000..94b5f1bd5a5d752a380372b2e88c810d360df9b7 --- /dev/null +++ b/Framework/Utilities/SaveBoostHistogram.hpp @@ -0,0 +1,88 @@ +#include <corsika/third_party/cnpy/cnpy.hpp> + +#include <boost/histogram.hpp> + +#include <functional> +#include <memory> +#include <numeric> +#include <utility> +#include <vector> + +namespace corsika { + namespace utl { + + template <typename hist_type> + void save_hist(hist_type const& h, std::string const& filename) { + auto const rank = h.rank(); + auto const size = h.size(); + using value_type = typename hist_type::value_type; + + std::vector<size_t> axes_dims; + axes_dims.reserve(rank); + + std::vector<char> axis_types; + axis_types.reserve(rank); + + for (int i = 0; i < rank; ++i) { + auto const& ax = h.axis(i); + int const has_underflow = (ax.options() & 0x01) ? 1 : 0; + int const has_overflow = (ax.options() & 0x02) ? 1 : 0; + + axes_dims.emplace_back(ax.size() + has_underflow + has_overflow); + + if (ax.continuous()) { + axis_types.push_back('c'); + std::vector<double> ax_edges; + ax_edges.reserve(ax.size()); + + for (int j = 0; j <= ax.size(); ++j) { ax_edges.push_back(ax.bin(j).lower()); } + + cnpy::npz_save(filename, std::string{"binedges_"} + std::to_string(i), + ax_edges.data(), {ax_edges.size()}, "a"); + } else { + axis_types.push_back('d'); + std::vector<int64_t> bins; // we assume that discrete axes have integer bins + bins.reserve(ax.size()); + + for (int j = 0; j < ax.size(); ++j) { bins.push_back(ax.bin(j).lower()); } + + cnpy::npz_save(filename, std::string{"bins_"} + std::to_string(i), bins.data(), + {bins.size()}, "a"); + } + + cnpy::npz_save(filename, std::string{"axistypes"}, axis_types.data(), + {axis_types.size()}, "a"); + } + + auto const prod_axis_size = + std::accumulate(axes_dims.cbegin(), axes_dims.cend(), 1, std::multiplies<>()); + auto temp = std::make_unique<value_type[]>(prod_axis_size); + + std::cout << "rank = " << rank << std::endl; + std::cout << "size = " << size << std::endl; + std::cout << "prod_axis_size = " << prod_axis_size << std::endl; + + // reduce multi-dim. to 1-dim, row-major (i.e., last axis index is contiguous in + // memory) take special care of underflow bins, which have -1 as index and thus need + // to be shifted by +1 + + for (auto&& x : indexed(h, boost::histogram::coverage::all)) { + int const offset_underflow = (h.axis(0).options() & 0x01) ? 1 : 0; + auto p = x.index(0) + offset_underflow; // 1-d-index + + for (size_t axis_index = 1; axis_index < rank; ++axis_index) { + int const offset_underflow = (h.axis(axis_index).options() & 0x01) ? 1 : 0; + auto k = x.index(axis_index) + offset_underflow; + p = k + p * axes_dims.at(axis_index); + std::cout << " " << x.index(axis_index); + } + + temp[p] = *x; + } + + cnpy::npz_save(filename, "data", temp.get(), axes_dims, "a"); + // In Python this array can directly be assigned to a histogram view if that + // histogram has its axes correspondingly: hist.view(flow=True)[:] = file['data'] + } + } // namespace utl +} // namespace corsika diff --git a/Framework/Utilities/testSaveBoostHistogram.cc b/Framework/Utilities/testSaveBoostHistogram.cc new file mode 100644 index 0000000000000000000000000000000000000000..b00777480d769ec330e6cc528ae7fe211ff17d68 --- /dev/null +++ b/Framework/Utilities/testSaveBoostHistogram.cc @@ -0,0 +1,34 @@ +/* + * (c) Copyright 2020 CORSIKA Project, corsika-project@lists.kit.edu + * + * This software is distributed under the terms of the GNU General Public + * Licence version 3 (GPL Version 3). See file LICENSE for a full version of + * the license. + */ + +#include <catch2/catch.hpp> +#include <corsika/utl/SaveBoostHistogram.hpp> + +#include <random> + +TEST_CASE("SaveHistogram") { + std::mt19937 rng; + std::normal_distribution n1{2., 6.}; + std::exponential_distribution e{1.}; + std::uniform_int_distribution u{1, 10}; + + auto h = + boost::histogram::make_histogram(boost::histogram::axis::regular{5, 0, 10, "x"}, + boost::histogram::axis::regular{3, 0, 4, "y"}, + boost::histogram::axis::category{1, 4, 8}); + + for (int i{0}; i < 100'000; ++i) { + auto const a = n1(rng); + auto const b = e(rng); + auto const c = u(rng); + + h(a, b, c); + } + + corsika::utl::save_hist(h, "hist.npz"); +}