IAP GITLAB

Skip to content
Snippets Groups Projects
Commit d5e26106 authored by Dominik Baack's avatar Dominik Baack Committed by ralfulrich
Browse files

Added Save Boost Histogram function

parent 473433c6
No related branches found
No related tags found
No related merge requests found
/*
* (c) Copyright 2020 CORSIKA Project, corsika-project@lists.kit.edu
*
* This software is distributed under the terms of the GNU General Public
* Licence version 3 (GPL Version 3). See file LICENSE for a full version of
* the license.
*/
#pragma once
#include <cnpy/cnpy.hpp>
#include <boost/histogram.hpp>
#include <functional>
#include <memory>
#include <numeric>
#include <utility>
#include <vector>
#include <string>
namespace corsika {
enum class SaveMode { overwrite, append };
/**
* This functions saves a boost::histogram into a numpy file. Only rather basic axis
* types are supported: regular, variable, integer, category<int>. Only "ordinary" bin
* counts (i.e. a double or int) are supported, nothing fancy like profiles.
*
* Note that this function makes a temporary, dense copy of the histogram, which could
* be an issue for huge sizes (e.g. for high dimensions)
*/
template <class Axes, class Storage>
inline void save_hist(boost::histogram::histogram<Axes, Storage> const& h,
std::string const& filename, SaveMode mode = SaveMode::append) {
unsigned const rank = h.rank();
// append vs. overwrite
const std::string mode_str = (mode == SaveMode::append ? "a" : "w");
std::vector<size_t> axes_dims;
axes_dims.reserve(rank);
auto overflow = std::make_unique<bool[]>(rank);
auto underflow = std::make_unique<bool[]>(rank);
std::vector<char> axis_types;
axis_types.reserve(rank);
for (unsigned int i = 0; i < rank; ++i) {
auto const& ax = h.axis(i);
unsigned const has_underflow = (ax.options() & 0x01) ? 1 : 0;
unsigned const has_overflow = (ax.options() & 0x02) ? 1 : 0;
underflow[i] = has_underflow;
overflow[i] = has_overflow;
axes_dims.emplace_back(ax.size() + has_underflow + has_overflow);
if (ax.continuous()) {
axis_types.push_back('c');
std::vector<double> ax_edges;
ax_edges.reserve(ax.size() + 1);
for (int j = 0; j < ax.size(); ++j) { ax_edges.push_back(ax.bin(j).lower()); }
ax_edges.push_back(ax.bin(ax.size() - 1).upper());
cnpy::npz_save(filename, std::string{"binedges_"} + std::to_string(i),
ax_edges.data(), {ax_edges.size()}, mode_str);
} else {
axis_types.push_back('d');
std::vector<int64_t> bins; // we assume that discrete axes have integer bins
bins.reserve(ax.size());
for (int j = 0; j < ax.size(); ++j) { bins.push_back(ax.bin(j).lower()); }
cnpy::npz_save(filename, std::string{"bins_"} + std::to_string(i), bins.data(),
{bins.size()}, mode_str);
}
}
cnpy::npz_save(filename, std::string{"axistypes"}, axis_types.data(), {rank},
mode_str);
cnpy::npz_save(filename, std::string{"overflow"}, overflow.get(), {rank}, mode_str);
cnpy::npz_save(filename, std::string{"underflow"}, underflow.get(), {rank}, mode_str);
auto const prod_axis_size = std::accumulate(axes_dims.cbegin(), axes_dims.cend(),
unsigned{1}, std::multiplies<>());
auto temp = std::make_unique<float[]>(prod_axis_size);
assert(prod_axis_size == h.size());
// reduce multi-dim. to 1-dim, row-major (i.e., last axis index is contiguous in
// memory) take special care of underflow bins which have -1 as index and thus need
// to be shifted by +1
for (auto&& x : indexed(h, boost::histogram::coverage::all)) {
int p = 0;
for (unsigned axis_index = 0; axis_index < rank; ++axis_index) {
int const offset_underflow = (h.axis(axis_index).options() & 0x01) ? 1 : 0;
auto k = x.index(axis_index) + offset_underflow;
p = k + p * axes_dims.at(axis_index);
}
temp[p] = *x;
}
cnpy::npz_save(filename, "data", temp.get(), axes_dims, mode_str);
// In Python this array can directly be assigned to a histogram view if that
// histogram has its axes correspondingly: hist.view(flow=True)[:] = file['data']
}
} // namespace corsika
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment