IAP GITLAB

Skip to content
Snippets Groups Projects
Commit 902e5251 authored by Remy Prechelt's avatar Remy Prechelt
Browse files

Add support for library and output reading.

parent 9108e8b5
No related branches found
No related tags found
1 merge request!317Output infrastructure and Python analysis library.
......@@ -9,8 +9,9 @@
"""
from . import io
from .io.library import Library
# all imported objects
__all__ = ["io"]
__all__ = ["io", "Library"]
__version__: str = "8.0.0-alpha"
......@@ -9,6 +9,7 @@
"""
from .hist import read_hist
from .library import Library
# all exported objects
__all__ = ["read_hist"]
__all__ = ["read_hist", "Library"]
"""
This file allows for reading/working with C8 libraries.
(c) Copyright 2020 CORSIKA Project, corsika-project@lists.kit.edu
This software is distributed under the terms of the GNU General Public
Licence version 3 (GPL Version 3). See file LICENSE for a full version of
the license.
"""
import logging
import os
import os.path as op
import re
from typing import Any, Dict, Optional
import yaml
from . import outputs
class Library(object):
"""
Represents a library ("run") of showers produced by C8.
"""
def __init__(self, path: str):
"""
Parameters
----------
path: str
The path to the directory containing the library.
Raises
------
ValueError
If `path` does not contain a valid CORSIKA8 library.
"""
# check that this is a valid library
if not self.__valid_library(path):
raise ValueError(f"'{path}' does not contain a valid CORSIKA8 library.")
# store the top-level path
self.path = path
# load the config file
self.config = self.load_config(path)
# build the list of outputs
self.__outputs = self.__build_outputs(path)
def get(self, name: str) -> Optional[outputs.Output]:
"""
Return the output with a given name.
"""
if name in self.__outputs:
return self.__outputs[name]
else:
msg = f"Output with name '{name}' not available in this library."
logging.getLogger("corsika").warn(msg)
return None
@staticmethod
def load_config(path: str) -> Dict[str, Any]:
"""
Load the top-level config from a given library path.
Parameters
----------
path: str
The path to the directory containing the library.
Returns
-------
dict:
The config as a python dictionary.
Raises
------
FileNotFoundError
If the config file cannot be found
"""
with open(op.join(path, "config.yaml"), "r") as f:
return yaml.load(f, Loader=yaml.Loader)
@staticmethod
def __valid_library(path: str) -> bool:
"""
Check if the library pointed to by 'path' is a valid C8 library.
Parameters
----------
path: str
The path to the directory containing the library.
Returns
-------
bool:
True if this is a valid C8 library.
"""
# check that the config file exists
if not op.exists(op.join(path, "config.yaml")):
return False
# the config file exists, we load it
config = Library.load_config(path)
# and check that the config's "writer" key is correct
return config["creator"] == "CORSIKA8"
@staticmethod
def __build_outputs(path: str) -> Dict[str, outputs.Output]:
"""
Build the outputs contained in this library.
This will print a warning message if a particular
output is invalid but will continue to load additional
outputs afterwards.
Parameters
----------
path: str
The path to the directory containing this library.
Returns
-------
Dict[str, Output]:
A dictionary mapping names to initialized outputs.
"""
# get a list of the subdirectories in the library
_, dirs, _ = next(os.walk(path))
# this is the dictionary where we store our components
outputs: Dict[str, Any] = {}
# loop over the subdirectories
for subdir in dirs:
# read the config file for this output
config = Library.load_config(op.join(path, subdir))
# the name keyword is our unique identifier
name = config.get("name")
# get the "type" keyword to identify this component
out_type = config.get("type")
# if `out_type` was None, this is an invalid output
if out_type is None or name is None:
msg = (
f"'{subdir}' does not contain a valid config."
"Missing 'type' or 'name' keyword."
)
logging.getLogger("corsika").warn(msg)
continue # skip to the next output, don't error
# we now have a valid component type, get the corresponding
# type from the proccesses subdirectory
try:
# create the name of the module containing this output class
module_name = re.sub(r"(?<!^)(?=[A-Z])", "_", out_type).lower()
# instantiate the output and store it in our dict
# we use a regex to go from CamelCase to snake_case
component = getattr(getattr(outputs, module_name), out_type)(
op.join(path, subdir)
)
# check if the read failed
if not component.is_good():
msg = (
f"'{name}' encountered an error while reading. "
"This process will be not be loaded."
)
logging.getLogger("corsika").warn(msg)
else:
outputs[name] = component
except AttributeError:
msg = (
f"Unable to instantiate an instance of '{out_type}' "
"for a process called '{name}'"
)
logging.getLogger("corsika").warn(msg)
continue # skip to the next output, don't error
# and we are done building - return the constructed outputs
return outputs
"""
(c) Copyright 2020 CORSIKA Project, corsika-project@lists.kit.edu
This software is distributed under the terms of the GNU General Public
Licence version 3 (GPL Version 3). See file LICENSE for a full version of
the license.
"""
from .observation_plane import ObservationPlane
from .output import Output
__all__ = ["Output", "ObservationPlane"]
"""
Read data written by ObservationPlane.
(c) Copyright 2020 CORSIKA Project, corsika-project@lists.kit.edu
This software is distributed under the terms of the GNU General Public
Licence version 3 (GPL Version 3). See file LICENSE for a full version of
the license.
"""
import logging
import os.path as op
from typing import Any, Dict
import pyarrow.parquet as pq
from .output import Output
class ObservationPlane(Output):
"""
Read particle data from an ObservationPlane.
"""
def __init__(self, path: str):
"""
Load the particle data into a parquet table.
Parameters
----------
path: str
The path to the directory containing this output.
"""
# load and store our path and config
self.path = path
self.__config = self.load_config(path)
# try and load our data
try:
self.__data = pq.read_table(op.join(path, "particles.parquet"))
except Exception as e:
logging.getLogger("corsika").warn(
f"An error occured loading an ObservationPlane: {e}"
)
def is_good(self) -> bool:
"""
Returns true if this output has been read successfully
and has the correct files/state/etc.
Returns
-------
bool:
True if this is a good output.
"""
return self.__data is not None and self.__config is not None
def astype(self, dtype: str = "parquet", **kwargs: Any) -> Any:
"""
Load the particle data from this observation plane.
All additional keyword arguments are passed to `parquet.read_table`
Parameters
----------
dtype: str
The data format to return the data in (i.e. numpy, pandas, etc.)
Returns
-------
Any:
The return type of this method is determined by `dtype`.
"""
if dtype == "parquet":
return self.__data
elif dtype == "pandas":
return self.__data.to_pandas()
else:
raise ValueError(
(
f"Unknown format '{dtype}' for ObservationPlane. "
"We currently only support ['parquet', 'pandas']."
)
)
@property
def config(self) -> Dict[str, Any]:
"""
Return the config file for this output.
Parameters
----------
Returns
-------
Dict[str, any]
The configuration file for this output.
"""
return self.__config
"""
This file defines the API for all output readers.
(c) Copyright 2020 CORSIKA Project, corsika-project@lists.kit.edu
This software is distributed under the terms of the GNU General Public
Licence version 3 (GPL Version 3). See file LICENSE for a full version of
the license.
"""
import os.path as op
from abc import ABC, abstractmethod
from typing import Any, Dict
import yaml
class Output(ABC):
"""
This class defines the abstract interface for all classes
that wish to provide reading support for CORSIKA8 outputs.
"""
@abstractmethod
def __init__(self, path: str):
"""
__init__ must load the output files and check
that it is valid.
Parameters
----------
path: str
The path to the directory containing this output.
"""
pass
@abstractmethod
def is_good(self) -> bool:
"""
Returns true if this output has been read successfully
and has the correct files/state/etc.
Returns
-------
bool:
True if this is a good output.
"""
pass
@abstractmethod
def astype(self, dtype: str, **kwargs: Any) -> Any:
"""
Return the data for this output in the data format given by 'dtype'
Parameters
----------
dtype: str
The data format to return the data in (i.e. numpy, pandas, etc.)
*args: Any
Additional arguments can be accepted by the output.
**kwargs: Any
Additional keyword arguments can be accepted by the output.
Returns
-------
Any:
The return type of this method is determined by `dtype`.
"""
pass
@property
@abstractmethod
def config(self) -> Dict[str, Any]:
"""
Return the config file for this output.
Parameters
----------
Returns
-------
Dict[str, any]
The configuration file for this output.
"""
pass
@staticmethod
def load_config(path: str) -> Dict[str, Any]:
"""
Load the top-level config from a given library path.
Parameters
----------
path: str
The path to the directory containing the library.
Returns
-------
dict:
The config as a python dictionary.
Raises
------
FileNotFoundError
If the config file cannot be found
"""
with open(op.join(path, "config.yaml"), "r") as f:
return yaml.load(f, Loader=yaml.Loader)
......@@ -63,3 +63,7 @@ ignore_missing_imports = True
# ignore missing types for boost_histogram
[mypy-boost_histogram.*]
ignore_missing_imports = True
# ignore missing types for pyarow
[mypy-pyarrow.*]
ignore_missing_imports = True
......@@ -15,7 +15,7 @@ with open(path.join(here, "README.md"), encoding="utf-8") as f:
setup(
name="corsika",
version=__version__,
description="A Python package for working with CORSIKA 8",
description="A Python package for working with CORSIKA 8.",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://gitlab.ikp.kit.edu/AirShowerPhysics/corsika",
......@@ -32,7 +32,7 @@ setup(
keywords=["cosmic ray", "physics", "astronomy", "simulation"],
packages=["corsika"],
python_requires=">=3.6*, <4",
install_requires=["numpy", "pyyaml", "boost_histogram"],
install_requires=["numpy", "pyyaml", "pyarrow", "boost_histogram"],
extras_require={
"test": [
"pytest",
......@@ -43,6 +43,7 @@ setup(
"pytest-cov",
"flake8",
],
"pandas": ["pandas"],
},
scripts=[],
project_urls={"code": "https://gitlab.ikp.kit.edu/AirShowerPhysics/corsika"},
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment