PyLamarr
Pythonizations for the ultra-fast simulation option for the LHCb experiment
 
Loading...
Searching...
No Matches
pandasio.py
1import pandas as pd
2
3from dataclasses import dataclass, field
4from typing import List, Optional
5from SQLamarr import SQLite3DB
6import PyLamarr
7
8
10 def __init__(self, db: SQLite3DB):
11 self._connection = db
12
13 def load(self, accept_multiple_batches: bool = False, **dataframes):
14 with self._connection.connect() as db:
15 for table, df in dataframes.items():
16 df.to_sql(table, db, if_exists='append', index=False)
17 res = db.execute(f"SELECT COUNT(*) FROM {table}").fetchone()
18 table_len = res[0] if len(res) else 0
19 if table_len != len(df) and accept_multiple_batches is False:
20 raise AttributeError("Handling multiple")
21
22
23@dataclass
25 table: str
26 dataframes: List[pd.DataFrame] = field(default_factory=lambda: [])
27 batch_ids: Optional[List[int]] = None
28
29 @PyLamarr.method
30 def __call__(self, db):
31 self.dataframes.append(pd.read_sql_query(f"SELECT * FROM {self.table}", db))
32
33 @property
34 def dataframe(self):
35 batch_ids = self.batch_ids if self.batch_ids is not None else list(range(len(self.dataframes)))
36 dataframes = [df.assign(batch_id=bid) for bid, df in zip(batch_ids, self.dataframes)]
37 return pd.concat(dataframes, ignore_index=True)
38