diff --git a/conanfile.txt b/conanfile.txt index 29ec2d5f3074def9a6883da314ede381df38b7d6..a824f5567b67f0c7aa5b4eef0fa986a3c5713a7b 100644 --- a/conanfile.txt +++ b/conanfile.txt @@ -6,6 +6,7 @@ boost/1.74.0 zlib/1.2.11 yaml-cpp/0.6.3 arrow/2.0.0 +zstd/1.4.9 [generators] cmake @@ -13,7 +14,7 @@ cmake [options] arrow:shared=False arrow:parquet=True -arrow:with_zlib=True +arrow:with_zlib=False arrow:fPIC=False arrow:with_re2=False arrow:with_protobuf=False diff --git a/corsika/detail/modules/writers/ObservationPlaneWriterParquet.inl b/corsika/detail/modules/writers/ObservationPlaneWriterParquet.inl index c5f5e7da25dc785ae466b0f421dbc7ae8025b34f..1e9a0c23a3c01b9e827e994b10d9aa16d31064bf 100644 --- a/corsika/detail/modules/writers/ObservationPlaneWriterParquet.inl +++ b/corsika/detail/modules/writers/ObservationPlaneWriterParquet.inl @@ -19,6 +19,9 @@ namespace corsika { // setup the streamer output_.initStreamer((directory / "particles.parquet").string()); + // enable compression with the default level + output_.enableCompression(); + // build the schema output_.addField("pdg", parquet::Repetition::REQUIRED, parquet::Type::INT32, parquet::ConvertedType::INT_32); diff --git a/corsika/detail/output/ParquetStreamer.inl b/corsika/detail/output/ParquetStreamer.inl index d9b4672be3a4201a342a5a0664a3602731cbbdd4..b08fa6c6021f5d303f139225e0f4ff76cfb371f1 100644 --- a/corsika/detail/output/ParquetStreamer.inl +++ b/corsika/detail/output/ParquetStreamer.inl @@ -30,6 +30,11 @@ namespace corsika { fields_.push_back(parquet::schema::PrimitiveNode::Make(args...)); } + void ParquetStreamer::enableCompression(int const level) { + builder_.compression(parquet::Compression::ZSTD); + builder_.compression_level(level); + } + void ParquetStreamer::buildStreamer() { // build the top level schema @@ -47,8 +52,6 @@ namespace corsika { outfile_->Close(); } - std::shared_ptr<parquet::StreamWriter> ParquetStreamer::getWriter() { - return writer_; - } + std::shared_ptr<parquet::StreamWriter> ParquetStreamer::getWriter() { return writer_; } } // namespace corsika diff --git a/corsika/output/ParquetStreamer.hpp b/corsika/output/ParquetStreamer.hpp index 240e2af0f9f3c8538de3288ba9b021b95f1e3577..c2ac2b8f0676053129817bdcc3f586fbde01445e 100644 --- a/corsika/output/ParquetStreamer.hpp +++ b/corsika/output/ParquetStreamer.hpp @@ -50,6 +50,11 @@ namespace corsika { template <typename... TArgs> void addField(TArgs&&... args); + /** + * Enable compression for this streamer. + */ + void enableCompression(int const level = 3); + /** * Finalize the streamer construction. */