diff --git a/Python/corsika/__init__.py b/Python/corsika/__init__.py index f5331efad507aac2016ffe21fd4e35b027dad9b0..078d688098095f06e2a14d2db4c203370993d578 100644 --- a/Python/corsika/__init__.py +++ b/Python/corsika/__init__.py @@ -9,8 +9,9 @@ """ from . import io +from .io.library import Library # all imported objects -__all__ = ["io"] +__all__ = ["io", "Library"] __version__: str = "8.0.0-alpha" diff --git a/Python/corsika/io/__init__.py b/Python/corsika/io/__init__.py index bd8a0d8bc7aedbd071fe40f23925a463104ec434..296572d6dee46a4b95a61b8ef18c549f120a1205 100644 --- a/Python/corsika/io/__init__.py +++ b/Python/corsika/io/__init__.py @@ -9,6 +9,7 @@ """ from .hist import read_hist +from .library import Library # all exported objects -__all__ = ["read_hist"] +__all__ = ["read_hist", "Library"] diff --git a/Python/corsika/io/library.py b/Python/corsika/io/library.py new file mode 100644 index 0000000000000000000000000000000000000000..4f0bb685ca4fea9f8e8a0508dcecbceb4f041ecc --- /dev/null +++ b/Python/corsika/io/library.py @@ -0,0 +1,196 @@ +""" + This file allows for reading/working with C8 libraries. + + (c) Copyright 2020 CORSIKA Project, corsika-project@lists.kit.edu + + This software is distributed under the terms of the GNU General Public + Licence version 3 (GPL Version 3). See file LICENSE for a full version of + the license. +""" +import logging +import os +import os.path as op +import re +from typing import Any, Dict, Optional + +import yaml + +from . import outputs + + +class Library(object): + """ + Represents a library ("run") of showers produced by C8. + """ + + def __init__(self, path: str): + """ + + Parameters + ---------- + path: str + The path to the directory containing the library. + + + Raises + ------ + ValueError + If `path` does not contain a valid CORSIKA8 library. + """ + + # check that this is a valid library + if not self.__valid_library(path): + raise ValueError(f"'{path}' does not contain a valid CORSIKA8 library.") + + # store the top-level path + self.path = path + + # load the config file + self.config = self.load_config(path) + + # build the list of outputs + self.__outputs = self.__build_outputs(path) + + def get(self, name: str) -> Optional[outputs.Output]: + """ + Return the output with a given name. + """ + if name in self.__outputs: + return self.__outputs[name] + else: + msg = f"Output with name '{name}' not available in this library." + logging.getLogger("corsika").warn(msg) + return None + + @staticmethod + def load_config(path: str) -> Dict[str, Any]: + """ + Load the top-level config from a given library path. + + + Parameters + ---------- + path: str + The path to the directory containing the library. + + Returns + ------- + dict: + The config as a python dictionary. + + Raises + ------ + FileNotFoundError + If the config file cannot be found + + """ + with open(op.join(path, "config.yaml"), "r") as f: + return yaml.load(f, Loader=yaml.Loader) + + @staticmethod + def __valid_library(path: str) -> bool: + """ + Check if the library pointed to by 'path' is a valid C8 library. + + Parameters + ---------- + path: str + The path to the directory containing the library. + + Returns + ------- + bool: + True if this is a valid C8 library. + + """ + + # check that the config file exists + if not op.exists(op.join(path, "config.yaml")): + return False + + # the config file exists, we load it + config = Library.load_config(path) + + # and check that the config's "writer" key is correct + return config["creator"] == "CORSIKA8" + + @staticmethod + def __build_outputs(path: str) -> Dict[str, outputs.Output]: + """ + Build the outputs contained in this library. + + This will print a warning message if a particular + output is invalid but will continue to load additional + outputs afterwards. + + Parameters + ---------- + path: str + The path to the directory containing this library. + + Returns + ------- + Dict[str, Output]: + A dictionary mapping names to initialized outputs. + """ + + # get a list of the subdirectories in the library + _, dirs, _ = next(os.walk(path)) + + # this is the dictionary where we store our components + outputs: Dict[str, Any] = {} + + # loop over the subdirectories + for subdir in dirs: + + # read the config file for this output + config = Library.load_config(op.join(path, subdir)) + + # the name keyword is our unique identifier + name = config.get("name") + + # get the "type" keyword to identify this component + out_type = config.get("type") + + # if `out_type` was None, this is an invalid output + if out_type is None or name is None: + msg = ( + f"'{subdir}' does not contain a valid config." + "Missing 'type' or 'name' keyword." + ) + logging.getLogger("corsika").warn(msg) + continue # skip to the next output, don't error + + # we now have a valid component type, get the corresponding + # type from the proccesses subdirectory + try: + + # create the name of the module containing this output class + module_name = re.sub(r"(?<!^)(?=[A-Z])", "_", out_type).lower() + + # instantiate the output and store it in our dict + # we use a regex to go from CamelCase to snake_case + component = getattr(getattr(outputs, module_name), out_type)( + op.join(path, subdir) + ) + + # check if the read failed + if not component.is_good(): + msg = ( + f"'{name}' encountered an error while reading. " + "This process will be not be loaded." + ) + logging.getLogger("corsika").warn(msg) + else: + outputs[name] = component + + except AttributeError: + msg = ( + f"Unable to instantiate an instance of '{out_type}' " + "for a process called '{name}'" + ) + logging.getLogger("corsika").warn(msg) + continue # skip to the next output, don't error + + # and we are done building - return the constructed outputs + return outputs diff --git a/Python/corsika/io/outputs/__init__.py b/Python/corsika/io/outputs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..67fb35a7bb269697d246e3db2db33aeb0d2c78ac --- /dev/null +++ b/Python/corsika/io/outputs/__init__.py @@ -0,0 +1,13 @@ +""" + + (c) Copyright 2020 CORSIKA Project, corsika-project@lists.kit.edu + + This software is distributed under the terms of the GNU General Public + Licence version 3 (GPL Version 3). See file LICENSE for a full version of + the license. +""" + +from .observation_plane import ObservationPlane +from .output import Output + +__all__ = ["Output", "ObservationPlane"] diff --git a/Python/corsika/io/outputs/observation_plane.py b/Python/corsika/io/outputs/observation_plane.py new file mode 100644 index 0000000000000000000000000000000000000000..00ac90dd43b0daf6c27f379da1d1eccb609e7a11 --- /dev/null +++ b/Python/corsika/io/outputs/observation_plane.py @@ -0,0 +1,99 @@ +""" + Read data written by ObservationPlane. + + (c) Copyright 2020 CORSIKA Project, corsika-project@lists.kit.edu + + This software is distributed under the terms of the GNU General Public + Licence version 3 (GPL Version 3). See file LICENSE for a full version of + the license. +""" +import logging +import os.path as op +from typing import Any, Dict + +import pyarrow.parquet as pq + +from .output import Output + + +class ObservationPlane(Output): + """ + Read particle data from an ObservationPlane. + """ + + def __init__(self, path: str): + """ + Load the particle data into a parquet table. + + Parameters + ---------- + path: str + The path to the directory containing this output. + """ + + # load and store our path and config + self.path = path + self.__config = self.load_config(path) + + # try and load our data + try: + self.__data = pq.read_table(op.join(path, "particles.parquet")) + except Exception as e: + logging.getLogger("corsika").warn( + f"An error occured loading an ObservationPlane: {e}" + ) + + def is_good(self) -> bool: + """ + Returns true if this output has been read successfully + and has the correct files/state/etc. + + Returns + ------- + bool: + True if this is a good output. + """ + return self.__data is not None and self.__config is not None + + def astype(self, dtype: str = "parquet", **kwargs: Any) -> Any: + """ + Load the particle data from this observation plane. + + All additional keyword arguments are passed to `parquet.read_table` + + Parameters + ---------- + dtype: str + The data format to return the data in (i.e. numpy, pandas, etc.) + + Returns + ------- + Any: + The return type of this method is determined by `dtype`. + """ + if dtype == "parquet": + return self.__data + elif dtype == "pandas": + return self.__data.to_pandas() + else: + raise ValueError( + ( + f"Unknown format '{dtype}' for ObservationPlane. " + "We currently only support ['parquet', 'pandas']." + ) + ) + + @property + def config(self) -> Dict[str, Any]: + """ + Return the config file for this output. + + Parameters + ---------- + + Returns + ------- + Dict[str, any] + The configuration file for this output. + """ + return self.__config diff --git a/Python/corsika/io/outputs/output.py b/Python/corsika/io/outputs/output.py new file mode 100644 index 0000000000000000000000000000000000000000..7d7e27e82085b2017ab624425df487ac6734686b --- /dev/null +++ b/Python/corsika/io/outputs/output.py @@ -0,0 +1,108 @@ +""" + This file defines the API for all output readers. + + (c) Copyright 2020 CORSIKA Project, corsika-project@lists.kit.edu + + This software is distributed under the terms of the GNU General Public + Licence version 3 (GPL Version 3). See file LICENSE for a full version of + the license. +""" +import os.path as op +from abc import ABC, abstractmethod +from typing import Any, Dict + +import yaml + + +class Output(ABC): + """ + This class defines the abstract interface for all classes + that wish to provide reading support for CORSIKA8 outputs. + """ + + @abstractmethod + def __init__(self, path: str): + """ + __init__ must load the output files and check + that it is valid. + + Parameters + ---------- + path: str + The path to the directory containing this output. + """ + pass + + @abstractmethod + def is_good(self) -> bool: + """ + Returns true if this output has been read successfully + and has the correct files/state/etc. + + Returns + ------- + bool: + True if this is a good output. + """ + pass + + @abstractmethod + def astype(self, dtype: str, **kwargs: Any) -> Any: + """ + Return the data for this output in the data format given by 'dtype' + + Parameters + ---------- + dtype: str + The data format to return the data in (i.e. numpy, pandas, etc.) + *args: Any + Additional arguments can be accepted by the output. + **kwargs: Any + Additional keyword arguments can be accepted by the output. + + Returns + ------- + Any: + The return type of this method is determined by `dtype`. + """ + pass + + @property + @abstractmethod + def config(self) -> Dict[str, Any]: + """ + Return the config file for this output. + + Parameters + ---------- + + Returns + ------- + Dict[str, any] + The configuration file for this output. + """ + pass + + @staticmethod + def load_config(path: str) -> Dict[str, Any]: + """ + Load the top-level config from a given library path. + + Parameters + ---------- + path: str + The path to the directory containing the library. + + Returns + ------- + dict: + The config as a python dictionary. + + Raises + ------ + FileNotFoundError + If the config file cannot be found + + """ + with open(op.join(path, "config.yaml"), "r") as f: + return yaml.load(f, Loader=yaml.Loader) diff --git a/Python/setup.cfg b/Python/setup.cfg index 2544189cacdaef9ab0c8e4e6dcf730f5bff4b344..f58d04eeb061f000b2d890aa26f542f70f3aa0c0 100644 --- a/Python/setup.cfg +++ b/Python/setup.cfg @@ -63,3 +63,7 @@ ignore_missing_imports = True # ignore missing types for boost_histogram [mypy-boost_histogram.*] ignore_missing_imports = True + +# ignore missing types for pyarow +[mypy-pyarrow.*] +ignore_missing_imports = True diff --git a/Python/setup.py b/Python/setup.py index 350fbe9fac598a11fc7a7acc33a53ee6c4be89d3..c07dbdeb403d03cc29c97bdcfaffb274fcf613c7 100644 --- a/Python/setup.py +++ b/Python/setup.py @@ -15,7 +15,7 @@ with open(path.join(here, "README.md"), encoding="utf-8") as f: setup( name="corsika", version=__version__, - description="A Python package for working with CORSIKA 8", + description="A Python package for working with CORSIKA 8.", long_description=long_description, long_description_content_type="text/markdown", url="https://gitlab.ikp.kit.edu/AirShowerPhysics/corsika", @@ -32,7 +32,7 @@ setup( keywords=["cosmic ray", "physics", "astronomy", "simulation"], packages=["corsika"], python_requires=">=3.6*, <4", - install_requires=["numpy", "pyyaml", "boost_histogram"], + install_requires=["numpy", "pyyaml", "pyarrow", "boost_histogram"], extras_require={ "test": [ "pytest", @@ -43,6 +43,7 @@ setup( "pytest-cov", "flake8", ], + "pandas": ["pandas"], }, scripts=[], project_urls={"code": "https://gitlab.ikp.kit.edu/AirShowerPhysics/corsika"},