PyLamarr
Pythonizations for the ultra-fast simulation option for the LHCb experiment
 
Loading...
Searching...
No Matches
UprootLoader.py
1import numpy as np
2import logging
3from typing import Collection, Union, Dict, Optional, Any
4from dataclasses import dataclass
5
6import PyLamarr
7
8@dataclass
10 dataframe_dict: Dict[str, Any] = None
11 database: Any = None
12 bid_var: Collection[str] = None
13 batch_selector: Any = None
14
15 def load(self):
16 with self.database.connect() as c:
17 for name, df in self.dataframe_dict.items():
18 (
19 df[df[self.bid_var] == self.batch_selector]
20 .drop(columns=[self.bid_var])
21 .to_sql(name, c, if_exists='append', index=False)
22 )
23
25 """
26 Ease loading nTuples generated with Gaussino into the SQLite
27 event model of Lamarr.
28 As the other python data loaders, UprootLoader should be configured in the
29 constructor.
30 The configured object is then called during the event loop to
31 pass the updated connection to the SQLamarr.SQLite3DB instance.
32
33 The ROOT file with path `input_file` is expected contain a
34 `TDirectory` (named as configured
35 with the `collector` keyword) with a `TTree` per SQLite table (the
36 names of the `TTree`s are listed in `tables`) and each `TTree` should
37 include a column (titled as indicated by `batch_id_var`) providing
38 a unique identifier for the event number.
39
40 """
41 def __init__ (self,
42 input_file: str,
43 tables: Collection[str],
44 collector: str = 'LamarrCollector',
45 batch_id_var: str = 'batch_id',
46 max_rows: Union[int, None] = None
47 ):
48
49 import uproot
50 import pandas as pd
51
52 self.input_file = input_file
53 self.tables = tables
54 self.bid_var = batch_id_var
55 self._db = None
56 root_dir = uproot.open(input_file)[collector]
57
58 self._batch_codes = np.unique(
59 root_dir[tables[0]].arrays(self.bid_var, library='np', entry_stop=max_rows)[self.bid_var]
60 )
61
62 self._dataframe = {
63 n: pd.DataFrame(root_dir[n].arrays(library='np', entry_stop=max_rows)) for n in self.tables
64 }
65
66 self.logger = logging.getLogger(self.__class__.__name__)
67
68 @property
69 def batches(self):
70 return np.arange(len(self._batch_codes))
71
72
73 def __call__(self, db):
74 self._db = db
75 return self
76
77
78 def load(self, batch):
79 if self._db is None:
80 raise ValueError("PandasLoader tried loading with uninitialized db.\n"
81 "Missed ()?")
82
83 self.logger.debug(f"Preparing uproot loader for batch {batch}")
84 yield UprootEventBatch(
85 description=f"batch_id: {batch}",
86 dataframe_dict=self._dataframe,
87 database=self._db,
88 bid_var=self.bid_var,
89 batch_selector=self._batch_codes[batch],
90 )
Ease loading nTuples generated with Gaussino into the SQLite event model of Lamarr.