From b450cfafd4385682a599327c8547a61807248882 Mon Sep 17 00:00:00 2001
From: ralfulrich <ralf.ulrich@kit.edu>
Date: Wed, 5 May 2021 18:07:50 +0200
Subject: [PATCH] style and xml

---
 src/framework/core/ParticleClassNames.xml |  10 +-
 src/framework/core/pdxml_reader.py        | 363 ++++++++++++----------
 2 files changed, 202 insertions(+), 171 deletions(-)

diff --git a/src/framework/core/ParticleClassNames.xml b/src/framework/core/ParticleClassNames.xml
index 8301d4b90..ba7ba530d 100644
--- a/src/framework/core/ParticleClassNames.xml
+++ b/src/framework/core/ParticleClassNames.xml
@@ -1,13 +1,19 @@
 <chapter name="Particle Names">
 
   <!-- For selected particles it is possible to specify the C++ class
-       names for a specific unique PDG code -->
+       names "classname" for a specific unique PDG codes
+       
+       This overwrites the default from Pythia8 ParticleData.xml
+
+       If needed, also the text-output form for particles 
+       can be changed using "name"
+        -->
   
   <particle pdgID="0" classname="Unknown"/> <!-- VOID in Pythia8 -->
   
   <particle pdgID="11" classname="Electron"/>
   <particle pdgID="-11" classname="Positron"/>
-  <particle pdgID="22" classname="Photon"/>
+  <particle pdgID="22" classname="Photon" name="photon"/>
 
   <particle pdgID="130" classname="K0Long"/>
   <particle pdgID="310" classname="K0Short"/>
diff --git a/src/framework/core/pdxml_reader.py b/src/framework/core/pdxml_reader.py
index 847e3a81f..d8809a004 100755
--- a/src/framework/core/pdxml_reader.py
+++ b/src/framework/core/pdxml_reader.py
@@ -1,6 +1,11 @@
 #!/usr/bin/env python3
 
-import sys, math, itertools, re, csv, pprint
+import sys
+import math
+import itertools
+import re
+import csv
+import pprint
 import xml.etree.ElementTree as ET
 from collections import OrderedDict
 import pickle
@@ -9,58 +14,62 @@ import io
 GeVfm = 0.19732696312541853
 c_speed_of_light = 29.9792458e10  # mm / s
 # for nuclear masses
-mneutron = 0.9395654133 # GeV
-mproton = 0.9382720813 # GeV
+mneutron = 0.9395654133  # GeV
+mproton = 0.9382720813  # GeV
 
 namespace = "corsika"
 
 ##############################################################
-# 
+#
 # reading xml input data, return line by line particle data
-# 
+#
+
+
 def parsePythia(filename):
     tree = ET.parse(filename)
     root = tree.getroot()
-    
+
     for particle in root.iter("particle"):
-        name = particle.attrib["name"]        
+        name = particle.attrib["name"]
         antiName = "Unknown"
         if ("antiName" in particle.attrib):
             antiName = particle.attrib["antiName"]
         pdg_id = int(particle.attrib["id"])
         mass = float(particle.attrib["m0"])  # GeV
-        electric_charge = int(particle.attrib["chargeType"])  # in units of e/3        
+        electric_charge = int(particle.attrib["chargeType"])  # in units of e/3
         ctau = 0.
         if pdg_id in (11, 12, 14, 16, 22, 2212):  # these are the stable particles !
             ctau = float('Inf')
         elif 'tau0' in particle.attrib:
             ctau = float(particle.attrib['tau0'])  # mm / c
         elif 'mWidth' in particle.attrib:
-            ctau = GeVfm / float(particle.attrib['mWidth']) * 1e-15 * 1000.0  # mm / s
-        elif pdg_id in (0, 423, 433, 4312, 4322, 5112, 5222):  # those are certainly not stable....
+            ctau = GeVfm / \
+                float(particle.attrib['mWidth']) * 1e-15 * 1000.0  # mm / s
+        # those are certainly not stable....
+        elif pdg_id in (0, 423, 433, 4312, 4322, 5112, 5222):
             ctau = 0.
         else:
-            print ("missing lifetime: " + str(pdg_id) + " " + str(name))
+            print("missing lifetime: " + str(pdg_id) + " " + str(name))
             sys.exit(1)
-        
+
         yield (pdg_id, name, mass, electric_charge, antiName, ctau/c_speed_of_light)
-                
+
         # TODO: read decay channels from child elements
-        
+
         if "antiName" in particle.attrib:
             yield (-pdg_id, antiName, mass, -electric_charge, name, ctau/c_speed_of_light)
 
 
 ##############################################################
-# 
+#
 # reading xml input data, return line by line particle data
-# 
+#
 def parseNuclei(filename):
     tree = ET.parse(filename)
     root = tree.getroot()
-        
+
     for particle in root.iter("particle"):
-        name = particle.attrib["name"]        
+        name = particle.attrib["name"]
         antiName = "Unknown"
         if ("antiName" in particle.attrib):
             antiName = particle.attrib["antiName"]
@@ -69,47 +78,53 @@ def parseNuclei(filename):
         Z = int(particle.attrib["Z"])
         # mass in GeV
         if ("mass" in particle.attrib):
-            mass = particle.attrib["mass"] 
+            mass = particle.attrib["mass"]
         else:
             mass = (A-Z)*mneutron + Z*mproton
 
-        electric_charge = Z*3  # in units of e/3        
+        electric_charge = Z*3  # in units of e/3
         ctau = float('Inf')
-        
+
         yield (pdg_id, name, mass, electric_charge, antiName, ctau/c_speed_of_light, A, Z)
-                
 
-        
+
 ##############################################################
-# 
+#
 # returns dict with particle codes and class names
-# 
-def class_names(filename):
+#
+def read_class_names(filename):
     tree = ET.parse(filename)
     root = tree.getroot()
 
     map = {}
-    
+
     for particle in root.iter("particle"):
-        name = particle.attrib["classname"]
+        classname = None
+        name = None
+        if ("classname" in particle.attrib):
+            classname = particle.attrib["classname"]
+        if ("name" in particle.attrib):
+            name = particle.attrib["name"]
         pdg_id = int(particle.attrib["pdgID"])
-        map[pdg_id] = name
-        
+        map[pdg_id] = {"classname": classname, "name": name}
+
     return map
 
 ##############################################################
-# 
+#
 # Automatically produce a string qualifying as C++ class name
-# 
+#
 # This function produces names of type "DeltaPlusPlus"
-# 
+#
+
+
 def c_identifier_camel(name):
     orig = name
-    name = name[0].upper() + name[1:].lower() # all lower case
-    
-    for c in "() /": # replace funny characters
+    name = name[0].upper() + name[1:].lower()  # all lower case
+
+    for c in "() /":  # replace funny characters
         name = name.replace(c, "_")
-    
+
     name = name.replace("bar", "Bar")
     name = name.replace("*", "Star")
     name = name.replace("'", "Prime")
@@ -120,7 +135,7 @@ def c_identifier_camel(name):
     ibar = name.find('Bar')
     if ibar > 0 and ibar < len(name)-3:
         name = name[:ibar] + name[ibar+3:] + 'Bar'
-    
+
     # cleanup "_"s
     while True:
         tmp = name.replace("__", "_")
@@ -153,97 +168,99 @@ def c_identifier_camel(name):
     if pattern.match(name):
         return name
     else:
-        raise Exception("could not generate C identifier for '{:s}': result '{:s}'".format(orig, name))
+        raise Exception(
+            "could not generate C identifier for '{:s}': result '{:s}'".format(orig, name))
 
 
-    
 ##########################################################
-# 
+#
 # returns dict containing all data from pythia-xml input
-#     
-def read_pythia_db(filename, particle_db, classnames):    
-    
+#
+def read_pythia_db(filename, particle_db, classnames):
+
     counter = itertools.count(len(particle_db))
-    
+
     for (pdg, name, mass, electric_charge, antiName, lifetime) in parsePythia(filename):
 
         c_id = "Unknown"
-        if pdg in classnames:
-            c_id = classnames[pdg]
+        if pdg in classnames and classnames[pdg]["classname"] != None:
+            c_id = classnames[pdg]["classname"]
         else:
-            c_id = c_identifier_camel(name) # the camel case names
+            c_id = c_identifier_camel(name)  # the camel case names
 
-        # rename photon in Pythia to gamma
-        if pdg == 22:
-            name = "photon"
+        if pdg in classnames and classnames[pdg]["name"] != None:
+            name = classnames[pdg]["name"]
 
         hadron = abs(pdg) > 100
 
         if c_id in particle_db.keys():
-            raise RuntimeError("particle '{:s}' already known (new PDG id {:d}, stored PDG id: {:d})".format(c_id, pdg, particle_db[c_id]['pdg']))
-            
+            raise RuntimeError("particle '{:s}' already known (new PDG id {:d}, stored PDG id: {:d})".format(
+                c_id, pdg, particle_db[c_id]['pdg']))
+
         particle_db[c_id] = {
-            "name" : name,
-            "antiName" : antiName,
-            "pdg" : pdg,
-            "mass" : mass, # in GeV
-            "electric_charge" : electric_charge, # in e/3
-            "lifetime" : lifetime,
-            "ngc_code" : next(counter),
-            "isNucleus" : False,
-            "isHadron" : hadron,
+            "name": name,
+            "antiName": antiName,
+            "pdg": pdg,
+            "mass": mass,  # in GeV
+            "electric_charge": electric_charge,  # in e/3
+            "lifetime": lifetime,
+            "ngc_code": next(counter),
+            "isNucleus": False,
+            "isHadron": hadron,
         }
-    
+
     return particle_db
-    
 
 
 ##########################################################
-# 
+#
 # returns dict containing all data from pythia-xml input
-#     
-def read_nuclei_db(filename, particle_db, classnames):    
-    
+#
+def read_nuclei_db(filename, particle_db, classnames):
+
     counter = itertools.count(len(particle_db))
 
     for (pdg, name, mass, electric_charge, antiName, lifetime, A, Z) in parseNuclei(filename):
-        
+
         c_id = "Unknown"
-        if pdg in classnames:
-            c_id = classnames[pdg]
+        if pdg in classnames and classnames[pdg]["classname"]:
+            c_id = classnames[pdg]["classname"]
         else:
             c_id = c_identifier_camel(name)
 
+        if pdg in classnames and classnames[pdg]["name"] != None:
+            name = classnames[pdg]["name"]
+
         particle_db[c_id] = {
-            "name" : name,
-            "antiName" : antiName,
-            "pdg" : pdg,
-            "mass" : mass, # in GeV
-            "electric_charge" : electric_charge, # in e/3
-            "lifetime" : lifetime,
-            "ngc_code" : next(counter),
-            "A" : A,
-            "Z" : Z,
-            "isNucleus" : True,
-            "isHadron" : True,
+            "name": name,
+            "antiName": antiName,
+            "pdg": pdg,
+            "mass": mass,  # in GeV
+            "electric_charge": electric_charge,  # in e/3
+            "lifetime": lifetime,
+            "ngc_code": next(counter),
+            "A": A,
+            "Z": Z,
+            "isNucleus": True,
+            "isHadron": True,
         }
-    
+
     return particle_db
 
 
 ###############################################################
-# 
+#
 # build conversion table PDG -> ngc
-# 
+#
 def gen_conversion_PDG_ngc(particle_db):
     # todo: find a optimum value, think about cache miss with array vs lookup time with map
-    P_MAX = 500 # the maximum PDG code that is still filled into the table
-        
+    P_MAX = 500  # the maximum PDG code that is still filled into the table
+
     conversionDict = dict()
     conversionTable = [None] * (2*P_MAX + 1)
     for cId, p in particle_db.items():
         pdg = p['pdg']
-        
+
         if abs(pdg) < P_MAX:
             if conversionTable[pdg + P_MAX]:
                 raise Exception("table entry already occupied")
@@ -254,93 +271,98 @@ def gen_conversion_PDG_ngc(particle_db):
                 raise Exception(f"map entry {pdg} already occupied")
             else:
                 conversionDict[pdg] = cId
-    
+
     output = io.StringIO()
+
     def oprint(*args, **kwargs):
         print(*args, **kwargs, file=output)
-        
-    oprint(f"static std::array<Code, {len(conversionTable)}> constexpr conversionArray {{")
+
+    oprint(
+        f"static std::array<Code, {len(conversionTable)}> constexpr conversionArray {{")
     for ngc in conversionTable:
         oprint("    Code::{0},".format(ngc if ngc else "Unknown"))
     oprint("};")
     oprint()
-    
+
     oprint("static std::map<PDGCode, Code> const conversionMap {")
     for ngc in conversionDict.values():
         oprint(f"    {{PDGCode::{ngc}, Code::{ngc}}},")
     oprint("};")
     oprint()
-    
+
     return output.getvalue()
 
 
 ###############################################################
-# 
+#
 # return string with enum of all internal particle codes
-# 
+#
 def gen_internal_enum(particle_db):
-    string = ("//! @cond EXCLUDE_DOXY\n" 
-              "enum class Code : CodeIntType {\n" 
-              "  FirstParticle = 1, // if you want to loop over particles, you want to start with \"1\"  \n") # identifier for eventual loops...
-    
-    
+    string = ("//! @cond EXCLUDE_DOXY\n"
+              "enum class Code : CodeIntType {\n"
+              "  FirstParticle = 1, // if you want to loop over particles, you want to start with \"1\"  \n")  # identifier for eventual loops...
+
     for k in filter(lambda k: "ngc_code" in particle_db[k], particle_db):
         last_ngc_id = particle_db[k]['ngc_code']
-        string += "  {key:s} = {code:d},\n".format(key = k, code = last_ngc_id)
+        string += "  {key:s} = {code:d},\n".format(key=k, code=last_ngc_id)
 
-    string += ("  LastParticle = {:d},\n" # identifier for eventual loops...
+    string += ("  LastParticle = {:d},\n"  # identifier for eventual loops...
                "}}; //! @endcond").format(last_ngc_id + 1)
-               
-    if last_ngc_id > 0x7fff: # does not fit into int16_t
-        raise Exception("Integer overflow in internal particle code definition prevented!")
-    
+
+    if last_ngc_id > 0x7fff:  # does not fit into int16_t
+        raise Exception(
+            "Integer overflow in internal particle code definition prevented!")
+
     return string
 
 
 ###############################################################
-# 
+#
 # return string with enum of all PDG particle codes
-# 
+#
 def gen_pdg_enum(particle_db):
-    string = ("//! @cond EXCLUDE_DOXY\n" 
+    string = ("//! @cond EXCLUDE_DOXY\n"
               "enum class PDGCode : PDGCodeType {\n")
-    
+
     for cId in particle_db:
         pdgCode = particle_db[cId]['pdg']
-        string += "  {key:s} = {code:d},\n".format(key = cId, code = pdgCode)
+        string += "  {key:s} = {code:d},\n".format(key=cId, code=pdgCode)
 
     string += " }; //! @endcond \n"
-    
+
     return string
 
 
 ###############################################################
-# 
-# return string with all data arrays 
-# 
+#
+# return string with all data arrays
+#
 def gen_properties(particle_db):
 
     # number of particles, size of tables
-    string = "static constexpr std::size_t size = {size:d};\n".format(size = len(particle_db))
+    string = "static constexpr std::size_t size = {size:d};\n".format(
+        size=len(particle_db))
     string += "\n"
 
     # all particle initializer_list
     string += "constexpr std::initializer_list<Code> all_particles = {"
-    for k in particle_db:        
-        string += "  Code::{name:s},\n".format(name = k)
+    for k in particle_db:
+        string += "  Code::{name:s},\n".format(name=k)
     string += "};\n"
     string += "\n"
-    
+
     # particle masses table
-    string += "static constexpr std::array<corsika::units::si::HEPMassType const, size> masses = {\n"    
+    string += "static constexpr std::array<corsika::units::si::HEPMassType const, size> masses = {\n"
     for p in particle_db.values():
-        string += "  {mass:e} * 1e9 * corsika::units::si::electronvolt, // {name:s}\n".format(mass = p['mass'], name = p['name'])
+        string += "  {mass:e} * 1e9 * corsika::units::si::electronvolt, // {name:s}\n".format(
+            mass=p['mass'], name=p['name'])
     string += "};\n\n"
 
     # particle threshold table, initially set to the particle mass
-    string += "static std::array<corsika::units::si::HEPEnergyType, size> thresholds = {\n"    
+    string += "static std::array<corsika::units::si::HEPEnergyType, size> thresholds = {\n"
     for p in particle_db.values():
-        string += "  {mass:e} * 1e9 * corsika::units::si::electronvolt, // {name:s}\n".format(mass = p['mass'], name = p['name'])
+        string += "  {mass:e} * 1e9 * corsika::units::si::electronvolt, // {name:s}\n".format(
+            mass=p['mass'], name=p['name'])
     string += "};\n\n"
 
     # PDG code table
@@ -348,17 +370,17 @@ def gen_properties(particle_db):
     for p in particle_db.keys():
         string += f"  PDGCode::{p},\n"
     string += "};\n"
-    
+
     # name string table
     string += "static constexpr std::array<std::string_view, size> names = {\n"
     for p in particle_db.values():
-        string += "  \"{name:s}\",\n".format(name = p['name'])            
+        string += "  \"{name:s}\",\n".format(name=p['name'])
     string += "};\n"
-    
+
     # electric charges table
     string += "static constexpr std::array<int16_t, size> electric_charges = {\n"
     for p in particle_db.values():
-        string += "  {charge:d},\n".format(charge = p['electric_charge'] // 3)
+        string += "  {charge:d},\n".format(charge=p['electric_charge'] // 3)
     string += "};\n"
 
     # anti-particle table
@@ -371,52 +393,52 @@ def gen_properties(particle_db):
     #string += "static constexpr std::array<corsika::units::si::TimeType const, size> lifetime = {\n"
     string += "static constexpr std::array<double const, size> lifetime = {\n"
     for p in particle_db.values():
-        if p['lifetime'] == float("Inf") :
-            string += "  std::numeric_limits<double>::infinity(), \n" # * corsika::units::si::second, \n"
-        else :
-            string += "  {tau:e}, \n".format(tau = p['lifetime'])
+        if p['lifetime'] == float("Inf"):
+            # * corsika::units::si::second, \n"
+            string += "  std::numeric_limits<double>::infinity(), \n"
+        else:
+            string += "  {tau:e}, \n".format(tau=p['lifetime'])
             #string += "  {tau:e} * corsika::units::si::second, \n".format(tau = p['lifetime'])
     string += "};\n"
-    
+
     # is Hadron flag
     string += "static constexpr std::array<bool, size> isHadron = {\n"
     for p in particle_db.values():
         value = 'false'
         if p['isHadron']:
             value = 'true'
-        string += "  {val},\n".format(val = value)
+        string += "  {val},\n".format(val=value)
     string += "};\n"
 
-    
     ### nuclear data ###
-    
+
     # nucleus mass number A
     string += "static constexpr std::array<int16_t, size> nucleusA = {\n"
     for p in particle_db.values():
         A = p.get('A', 0)
-        string += "  {val},\n".format(val = A)
+        string += "  {val},\n".format(val=A)
     string += "};\n"
-    
+
     # nucleus charge number Z
     string += "static constexpr std::array<int16_t, size> nucleusZ = {\n"
     for p in particle_db.values():
         Z = p.get('Z', 0)
-        string += "  {val},\n".format(val = Z)
+        string += "  {val},\n".format(val=Z)
     string += "};\n"
 
     return string
 
 
 ###############################################################
-# 
+#
 # return string with a list of classes for all particles
-# 
+#
 def gen_classes(particle_db):
 
     string = ("// list of C++ classes to access particle properties\n"
               "/** @defgroup ParticleClasses \n"
               "    @{ */\n")
-    
+
     for cname in particle_db:
         if cname == "Nucleus":
             string += "// skipping Nucleus"
@@ -427,18 +449,19 @@ def gen_classes(particle_db):
             if (particle_db[cname_anti]['name'] == particle_db[cname]['antiName']):
                 antiP = cname_anti
                 break
-        
+
         string += "\n"
         string += "/** @class " + cname + "\n\n"
         string += " * Particle properties are taken from the PYTHIA8 ParticleData.xml file:<br>\n"
-        string += " *  - pdg=" + str(particle_db[cname]['pdg']) +"\n"
+        string += " *  - pdg=" + str(particle_db[cname]['pdg']) + "\n"
         string += " *  - mass=" + str(particle_db[cname]['mass']) + " GeV \n"
-        string += " *  - charge= " + str(particle_db[cname]['electric_charge'] // 3) + " \n"
+        string += " *  - charge= " + \
+            str(particle_db[cname]['electric_charge'] // 3) + " \n"
         string += " *  - name=" + str(cname) + "\n"
         string += " *  - anti=" + str(antiP) + "\n"
         if (particle_db[cname]['isNucleus']):
             string += " *  - nuclear A=" + str(particle_db[cname]['A']) + "\n"
-            string += " *  - nuclear Z=" + str(particle_db[cname]['Z']) + "\n"        
+            string += " *  - nuclear Z=" + str(particle_db[cname]['Z']) + "\n"
         string += "*/\n\n"
         string += "class " + cname + " {\n"
         string += "   /** @cond EXCLUDE_DOXY */ \n"
@@ -459,14 +482,14 @@ def gen_classes(particle_db):
         string += "   /** @endcond */ \n"
         string += "};\n"
 
-    string += "  //! @}\n";
+    string += "  //! @}\n"
 
     return string
 
 
 ###############################################################
-# 
-# 
+#
+#
 def inc_start():
     string = ('// generated by pdxml_reader.py\n'
               '// MANUAL EDITS ON OWN RISK. THEY WILL BE OVERWRITTEN. \n'
@@ -480,8 +503,8 @@ def inc_start():
 
 
 ###############################################################
-# 
-# 
+#
+#
 def detail_start():
     string = ('/** @cond EXCLUDE_DOXY */\n'
               ' namespace particle::detail {\n\n')
@@ -489,45 +512,48 @@ def detail_start():
 
 
 ###############################################################
-# 
-# 
+#
+#
 def detail_end():
     string = "\n}//end namespace particle::detail\n /** @endcond */"
     return string
 
 ###############################################################
-# 
-# 
+#
+#
+
+
 def inc_end():
     string = "/** @} */\n} // end namespace corsika"
     return string
 
 
 ###################################################################
-# 
-# Serialize particle_db into file 
-# 
+#
+# Serialize particle_db into file
+#
 def serialize_particle_db(particle_db, file):
     pickle.dump(particle_db, file)
 
-    
+
 ###################################################################
-# 
+#
 # Main function
-# 
+#
 if __name__ == "__main__":
-    
+
     if len(sys.argv) != 4:
-        print("usage: {:s} <ParticleData.xml> <NuclearData.xml> <ParticleClassNames.xml>".format(sys.argv[0]), file=sys.stderr)
+        print("usage: {:s} <ParticleData.xml> <NuclearData.xml> <ParticleClassNames.xml>".format(
+            sys.argv[0]), file=sys.stderr)
         sys.exit(1)
-        
+
     print("\n       pdxml_reader.py: automatically produce particle properties from input files\n")
-    
-    names = class_names(sys.argv[3])
+
+    names = read_class_names(sys.argv[3])
     particle_db = OrderedDict()
     read_pythia_db(sys.argv[1], particle_db, names)
     read_nuclei_db(sys.argv[2], particle_db, names)
-    
+
     with open("GeneratedParticleProperties.inc", "w") as f:
         print(inc_start(), file=f)
         print(gen_internal_enum(particle_db), file=f)
@@ -535,14 +561,13 @@ if __name__ == "__main__":
         print(detail_start(), file=f)
         print(gen_properties(particle_db), file=f)
         print(gen_conversion_PDG_ngc(particle_db), file=f)
-        print(detail_end(), file=f) 
+        print(detail_end(), file=f)
         print(inc_end(), file=f)
 
     with open("GeneratedParticleClasses.inc", "w") as f:
         print(inc_start(), file=f)
         print(gen_classes(particle_db), file=f)
         print(inc_end(), file=f)
-    
+
     with open("particle_db.pkl", "wb") as f:
         serialize_particle_db(particle_db, f)
-    
-- 
GitLab