From 7ec1580e356bc4d8ed9225d68ea098ac58fc561e Mon Sep 17 00:00:00 2001 From: Dominik Baack <dominik.baack@tu-dortmund.de> Date: Fri, 13 Nov 2020 22:33:15 +0100 Subject: [PATCH] Added Save Boost Histogram function --- .../framework/utility/SaveBootHistogram.hpp | 115 ++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 corsika/framework/utility/SaveBootHistogram.hpp diff --git a/corsika/framework/utility/SaveBootHistogram.hpp b/corsika/framework/utility/SaveBootHistogram.hpp new file mode 100644 index 000000000..765764062 --- /dev/null +++ b/corsika/framework/utility/SaveBootHistogram.hpp @@ -0,0 +1,115 @@ +/* + * (c) Copyright 2020 CORSIKA Project, corsika-project@lists.kit.edu + * + * This software is distributed under the terms of the GNU General Public + * Licence version 3 (GPL Version 3). See file LICENSE for a full version of + * the license. + */ + +#pragma once + +#include <cnpy/cnpy.hpp> + +#include <boost/histogram.hpp> + +#include <functional> +#include <memory> +#include <numeric> +#include <utility> +#include <vector> +#include <string> + +namespace corsika { + + enum class SaveMode { overwrite, append }; + + /** + * This functions saves a boost::histogram into a numpy file. Only rather basic axis + * types are supported: regular, variable, integer, category<int>. Only "ordinary" bin + * counts (i.e. a double or int) are supported, nothing fancy like profiles. + * + * Note that this function makes a temporary, dense copy of the histogram, which could + * be an issue for huge sizes (e.g. for high dimensions) + */ + template <class Axes, class Storage> + inline void save_hist(boost::histogram::histogram<Axes, Storage> const& h, + std::string const& filename, SaveMode mode = SaveMode::append) { + unsigned const rank = h.rank(); + + // append vs. overwrite + const std::string mode_str = (mode == SaveMode::append ? "a" : "w"); + + std::vector<size_t> axes_dims; + axes_dims.reserve(rank); + + auto overflow = std::make_unique<bool[]>(rank); + auto underflow = std::make_unique<bool[]>(rank); + + std::vector<char> axis_types; + axis_types.reserve(rank); + + for (unsigned int i = 0; i < rank; ++i) { + auto const& ax = h.axis(i); + unsigned const has_underflow = (ax.options() & 0x01) ? 1 : 0; + unsigned const has_overflow = (ax.options() & 0x02) ? 1 : 0; + + underflow[i] = has_underflow; + overflow[i] = has_overflow; + + axes_dims.emplace_back(ax.size() + has_underflow + has_overflow); + + if (ax.continuous()) { + axis_types.push_back('c'); + std::vector<double> ax_edges; + ax_edges.reserve(ax.size() + 1); + + for (int j = 0; j < ax.size(); ++j) { ax_edges.push_back(ax.bin(j).lower()); } + + ax_edges.push_back(ax.bin(ax.size() - 1).upper()); + + cnpy::npz_save(filename, std::string{"binedges_"} + std::to_string(i), + ax_edges.data(), {ax_edges.size()}, mode_str); + } else { + axis_types.push_back('d'); + std::vector<int64_t> bins; // we assume that discrete axes have integer bins + bins.reserve(ax.size()); + + for (int j = 0; j < ax.size(); ++j) { bins.push_back(ax.bin(j).lower()); } + + cnpy::npz_save(filename, std::string{"bins_"} + std::to_string(i), bins.data(), + {bins.size()}, mode_str); + } + } + + cnpy::npz_save(filename, std::string{"axistypes"}, axis_types.data(), {rank}, + mode_str); + cnpy::npz_save(filename, std::string{"overflow"}, overflow.get(), {rank}, mode_str); + cnpy::npz_save(filename, std::string{"underflow"}, underflow.get(), {rank}, mode_str); + + auto const prod_axis_size = std::accumulate(axes_dims.cbegin(), axes_dims.cend(), + unsigned{1}, std::multiplies<>()); + auto temp = std::make_unique<float[]>(prod_axis_size); + + assert(prod_axis_size == h.size()); + + // reduce multi-dim. to 1-dim, row-major (i.e., last axis index is contiguous in + // memory) take special care of underflow bins which have -1 as index and thus need + // to be shifted by +1 + + for (auto&& x : indexed(h, boost::histogram::coverage::all)) { + int p = 0; + + for (unsigned axis_index = 0; axis_index < rank; ++axis_index) { + int const offset_underflow = (h.axis(axis_index).options() & 0x01) ? 1 : 0; + auto k = x.index(axis_index) + offset_underflow; + p = k + p * axes_dims.at(axis_index); + } + + temp[p] = *x; + } + + cnpy::npz_save(filename, "data", temp.get(), axes_dims, mode_str); + // In Python this array can directly be assigned to a histogram view if that + // histogram has its axes correspondingly: hist.view(flow=True)[:] = file['data'] + } +} // namespace corsika -- GitLab