19 _hepmcloader: Any =
None
20 input_files: Collection[str] =
None
21 run_numbers: Collection[int] =
None
22 event_numbers: Collection[int] =
None
26 for hepmc_file, run_number, event_number
in files_runs_events:
27 logging.getLogger(
'HepMC2EventBatch').debug(f
"Loading file {hepmc_file}")
28 self.
_hepmcloader.load(hepmc_file, run_number, event_number)
29 logging.getLogger(
"HepMC2EventBatch").debug(
"Loaded.")
33 Adapter to read HepMC2 files compressed in a tar file. Requires SQLamarr.
36 regexp_runNumber: str =
"Run([0-9]+)",
37 regexp_evtNumber: str =
"evt([0-9]+)",
38 regexp_totEvents: str =
"([0-9]+)ev[^\w]",
40 max_event: Optional[int] =
None,
41 events_per_batch: Optional[int] =
None,
53 self.
logger = logging.getLogger(
"CompressedHepMCLoad")
55 def __call__(self, database):
61 def _get_run_number(self, filename) -> int:
70 def _get_evt_number(self, filename: str, default: int) -> int:
72 return int(matches[-1])
if len(matches)
else default
74 def _get_number_of_events(self, filename: str, default: int) -> int:
76 return int(matches[-1])
if len(matches)
else default
80 def archive_mirror(self, filename: str):
81 tmp_dir = os.path.join(
83 f
"pylamarr.tmp.{random.randint(0, 0xFFFFFF):06x}"
85 self.
logger.info(f
"Creating temporary directory {tmp_dir}")
91 self.
logger.info(f
"Removing directory {tmp_dir}")
92 shutil.rmtree(tmp_dir)
96 Apply patches to the HepMC2 file to avoid segmentation fault in HepMC3 ascii reader
98 requires_particle_gun_patch =
False
99 src_lines = input_file_data.split(
'\n')
100 if len([li
for li
in src_lines
if li.replace(
"\n",
"").replace(
" ",
"") !=
""]) == 0:
101 self.
logger.warning(f
"No valid line found in input file")
104 for line
in src_lines:
105 line = line[:-1]
if len(line) > 0
and line[-1] ==
'\n' else line
106 if len(line) > 0
and line[0] ==
'E':
107 tokens = line.split(
" ")
110 if int(tokens[6]) == 1:
112 n_vertices = int(tokens[8])
113 tokens[8] = str(n_vertices + 1)
114 tokens[12 + int(tokens[11])] = str(1)
116 requires_particle_gun_patch =
True
117 dst_lines += [
" ".join(tokens),
'N 1 "0"']
119 dst_lines.append(line)
120 elif len(line) > 0
and line[0] ==
'V' and requires_particle_gun_patch:
124 vertex_id = line.split(
" ")[1]
125 dst_lines += [
"V -99999 0 0 0 0 0 0 0 0",
"P 0 0 0. 0. 0. 0. 0. 3 0 0 %s 0" % vertex_id, line]
126 requires_particle_gun_patch =
False
128 dst_lines.append(line)
130 return "\n".join(dst_lines)
132 def files_in_archive(self, filename: str, tmp_dir: str):
134 with tarfile.open(filename, mode=
'r:*')
as tar:
135 for member
in tar.getmembers():
136 if member.isfile()
and member.name.endswith(
"mc2"):
137 key = os.path.basename(member.name)
138 file_content = tar.extractfile(member).read().decode(
'utf-8')
139 patched_filename = os.path.join(tmp_dir, os.path.basename(key))
140 with open(patched_filename,
'w')
as file_copy:
142 ret.append(patched_filename)
151 raise ValueError(
"CompressedHepMCLoader tried loading with uninitialized db.\n"
161 batches = {k: []
for k
in (
'input_files',
'run_numbers',
'event_numbers')}
164 for i_file, hepmc_file
in enumerate(files_in_archive):
167 n_events = len(batches[
'event_numbers'])
168 batch_info.update(dict(
170 batch_id=batch_counter,
171 description=f
"Run {run_number}",
186 batches = {k: []
for k
in batches.keys()}
190 batches[
'input_files'].append(hepmc_file)
191 batches[
'run_numbers'].append(run_number)
192 batches[
'event_numbers'].append(event_number)
197 f
"{self._particle_gun_patched_events} / {event_counter} events were identified as generated with a "
198 "Particle Gun and patched."