From f3cc1227f5e4f48ad6730f093e9e45d4063a74a6 Mon Sep 17 00:00:00 2001
From: Remy Prechelt <prechelt@hawaii.edu>
Date: Wed, 7 Apr 2021 22:55:03 -1000
Subject: [PATCH] Enable ZSTD compression for output files.

---
 conanfile.txt                                            | 3 ++-
 .../modules/writers/ObservationPlaneWriterParquet.inl    | 3 +++
 corsika/detail/output/ParquetStreamer.inl                | 9 ++++++---
 corsika/output/ParquetStreamer.hpp                       | 5 +++++
 4 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/conanfile.txt b/conanfile.txt
index 29ec2d5f3..a824f5567 100644
--- a/conanfile.txt
+++ b/conanfile.txt
@@ -6,6 +6,7 @@ boost/1.74.0
 zlib/1.2.11
 yaml-cpp/0.6.3
 arrow/2.0.0
+zstd/1.4.9
 
 [generators]
 cmake
@@ -13,7 +14,7 @@ cmake
 [options]
 arrow:shared=False
 arrow:parquet=True
-arrow:with_zlib=True
+arrow:with_zlib=False
 arrow:fPIC=False
 arrow:with_re2=False
 arrow:with_protobuf=False
diff --git a/corsika/detail/modules/writers/ObservationPlaneWriterParquet.inl b/corsika/detail/modules/writers/ObservationPlaneWriterParquet.inl
index c5f5e7da2..1e9a0c23a 100644
--- a/corsika/detail/modules/writers/ObservationPlaneWriterParquet.inl
+++ b/corsika/detail/modules/writers/ObservationPlaneWriterParquet.inl
@@ -19,6 +19,9 @@ namespace corsika {
     // setup the streamer
     output_.initStreamer((directory / "particles.parquet").string());
 
+    // enable compression with the default level
+    output_.enableCompression();
+
     // build the schema
     output_.addField("pdg", parquet::Repetition::REQUIRED, parquet::Type::INT32,
                      parquet::ConvertedType::INT_32);
diff --git a/corsika/detail/output/ParquetStreamer.inl b/corsika/detail/output/ParquetStreamer.inl
index d9b4672be..b08fa6c60 100644
--- a/corsika/detail/output/ParquetStreamer.inl
+++ b/corsika/detail/output/ParquetStreamer.inl
@@ -30,6 +30,11 @@ namespace corsika {
     fields_.push_back(parquet::schema::PrimitiveNode::Make(args...));
   }
 
+  void ParquetStreamer::enableCompression(int const level) {
+    builder_.compression(parquet::Compression::ZSTD);
+    builder_.compression_level(level);
+  }
+
   void ParquetStreamer::buildStreamer() {
 
     // build the top level schema
@@ -47,8 +52,6 @@ namespace corsika {
     outfile_->Close();
   }
 
-  std::shared_ptr<parquet::StreamWriter> ParquetStreamer::getWriter() {
-    return writer_;
-  }
+  std::shared_ptr<parquet::StreamWriter> ParquetStreamer::getWriter() { return writer_; }
 
 } // namespace corsika
diff --git a/corsika/output/ParquetStreamer.hpp b/corsika/output/ParquetStreamer.hpp
index 240e2af0f..c2ac2b8f0 100644
--- a/corsika/output/ParquetStreamer.hpp
+++ b/corsika/output/ParquetStreamer.hpp
@@ -50,6 +50,11 @@ namespace corsika {
     template <typename... TArgs>
     void addField(TArgs&&... args);
 
+    /**
+     * Enable compression for this streamer.
+     */
+    void enableCompression(int const level = 3);
+
     /**
      * Finalize the streamer construction.
      */
-- 
GitLab