From 7ec1580e356bc4d8ed9225d68ea098ac58fc561e Mon Sep 17 00:00:00 2001
From: Dominik Baack <dominik.baack@tu-dortmund.de>
Date: Fri, 13 Nov 2020 22:33:15 +0100
Subject: [PATCH] Added Save Boost Histogram function

---
 .../framework/utility/SaveBootHistogram.hpp   | 115 ++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 corsika/framework/utility/SaveBootHistogram.hpp

diff --git a/corsika/framework/utility/SaveBootHistogram.hpp b/corsika/framework/utility/SaveBootHistogram.hpp
new file mode 100644
index 000000000..765764062
--- /dev/null
+++ b/corsika/framework/utility/SaveBootHistogram.hpp
@@ -0,0 +1,115 @@
+/*
+ * (c) Copyright 2020 CORSIKA Project, corsika-project@lists.kit.edu
+ *
+ * This software is distributed under the terms of the GNU General Public
+ * Licence version 3 (GPL Version 3). See file LICENSE for a full version of
+ * the license.
+ */
+
+#pragma once
+
+#include <cnpy/cnpy.hpp>
+
+#include <boost/histogram.hpp>
+
+#include <functional>
+#include <memory>
+#include <numeric>
+#include <utility>
+#include <vector>
+#include <string>
+
+namespace corsika {
+
+  enum class SaveMode { overwrite, append };
+
+  /**
+   * This functions saves a boost::histogram into a numpy file. Only rather basic axis
+   * types are supported: regular, variable, integer, category<int>. Only "ordinary" bin
+   * counts (i.e. a double or int) are supported, nothing fancy like profiles.
+   *
+   * Note that this function makes a temporary, dense copy of the histogram, which could
+   * be an issue for huge sizes (e.g. for high dimensions)
+   */
+  template <class Axes, class Storage>
+  inline void save_hist(boost::histogram::histogram<Axes, Storage> const& h,
+                        std::string const& filename, SaveMode mode = SaveMode::append) {
+    unsigned const rank = h.rank();
+
+    // append vs. overwrite
+    const std::string mode_str = (mode == SaveMode::append ? "a" : "w");
+
+    std::vector<size_t> axes_dims;
+    axes_dims.reserve(rank);
+
+    auto overflow = std::make_unique<bool[]>(rank);
+    auto underflow = std::make_unique<bool[]>(rank);
+
+    std::vector<char> axis_types;
+    axis_types.reserve(rank);
+
+    for (unsigned int i = 0; i < rank; ++i) {
+      auto const& ax = h.axis(i);
+      unsigned const has_underflow = (ax.options() & 0x01) ? 1 : 0;
+      unsigned const has_overflow = (ax.options() & 0x02) ? 1 : 0;
+
+      underflow[i] = has_underflow;
+      overflow[i] = has_overflow;
+
+      axes_dims.emplace_back(ax.size() + has_underflow + has_overflow);
+
+      if (ax.continuous()) {
+        axis_types.push_back('c');
+        std::vector<double> ax_edges;
+        ax_edges.reserve(ax.size() + 1);
+
+        for (int j = 0; j < ax.size(); ++j) { ax_edges.push_back(ax.bin(j).lower()); }
+
+        ax_edges.push_back(ax.bin(ax.size() - 1).upper());
+
+        cnpy::npz_save(filename, std::string{"binedges_"} + std::to_string(i),
+                       ax_edges.data(), {ax_edges.size()}, mode_str);
+      } else {
+        axis_types.push_back('d');
+        std::vector<int64_t> bins; // we assume that discrete axes have integer bins
+        bins.reserve(ax.size());
+
+        for (int j = 0; j < ax.size(); ++j) { bins.push_back(ax.bin(j).lower()); }
+
+        cnpy::npz_save(filename, std::string{"bins_"} + std::to_string(i), bins.data(),
+                       {bins.size()}, mode_str);
+      }
+    }
+
+    cnpy::npz_save(filename, std::string{"axistypes"}, axis_types.data(), {rank},
+                   mode_str);
+    cnpy::npz_save(filename, std::string{"overflow"}, overflow.get(), {rank}, mode_str);
+    cnpy::npz_save(filename, std::string{"underflow"}, underflow.get(), {rank}, mode_str);
+
+    auto const prod_axis_size = std::accumulate(axes_dims.cbegin(), axes_dims.cend(),
+                                                unsigned{1}, std::multiplies<>());
+    auto temp = std::make_unique<float[]>(prod_axis_size);
+
+    assert(prod_axis_size == h.size());
+
+    // reduce multi-dim. to 1-dim, row-major (i.e., last axis index is contiguous in
+    // memory) take special care of underflow bins which have -1 as index and thus need
+    // to be shifted by +1
+
+    for (auto&& x : indexed(h, boost::histogram::coverage::all)) {
+      int p = 0;
+
+      for (unsigned axis_index = 0; axis_index < rank; ++axis_index) {
+        int const offset_underflow = (h.axis(axis_index).options() & 0x01) ? 1 : 0;
+        auto k = x.index(axis_index) + offset_underflow;
+        p = k + p * axes_dims.at(axis_index);
+      }
+
+      temp[p] = *x;
+    }
+
+    cnpy::npz_save(filename, "data", temp.get(), axes_dims, mode_str);
+    // In Python this array can directly be assigned to a histogram view if that
+    // histogram has its axes correspondingly: hist.view(flow=True)[:] = file['data']
+  }
+} // namespace corsika
-- 
GitLab