PyLamarr
Pythonizations for the ultra-fast simulation option for the LHCb experiment
 
Loading...
Searching...
No Matches
PyPhotons.py
1import os.path
2from typing import Union
3from dataclasses import dataclass
4import numpy as np
5import pandas as pd
6import tensorflow as tf
7
8import PyLamarr
9import pickle
10
11import IPython
12
13from pathlib import Path
14
15tf.config.threading.set_inter_op_parallelism_threads(1)
16tf.config.threading.set_intra_op_parallelism_threads(1)
17
18
19
20
21@dataclass(frozen=True)
23 efficiency_model: Union[str, None]
24 resolution_model: Union[str, None]
25
26 def invertColumnTransformer(self, column_transformer, preprocessed_X):
27 from sklearn.compose import ColumnTransformer
28 assert isinstance(column_transformer, ColumnTransformer)
29
30 iCol = 0
31 postprocessed_split = dict()
32 for name, algo, cols in column_transformer.transformers_:
33 preprocessed_cols = list()
34 for _ in range(len(cols)):
35 preprocessed_cols.append(preprocessed_X[:, iCol][:, None])
36 iCol += 1
37 preprocessed_block = np.concatenate(preprocessed_cols, axis=1)
38 if algo == "passthrough":
39 postprocessed_split[name] = preprocessed_block
40 else:
41 postprocessed_split[name] = algo.inverse_transform(preprocessed_block)
42
43 X = [None] * preprocessed_X.shape[1]
44 for name, _, cols in column_transformer.transformers_:
45 for i, iCol in enumerate(cols):
46 X[iCol] = postprocessed_split[name][:, i][:, None]
47
48 return np.concatenate(X, axis=1)
49
50 def _eval_efficiency(self, X):
51 efficiency_model = tf.keras.models.load_model(self.efficiency_modelefficiency_model)
52 with open(os.path.join(self.efficiency_modelefficiency_model, 'tX.pkl'), 'rb') as tx_file:
53 tX = pickle.load(tx_file)
54
55 y_hat = efficiency_model.predict(tX.transform(X), batch_size=10000, verbose=0)
56
57 return y_hat
58
59 def _eval_smearing(self, X):
60 smearing_model = tf.keras.models.load_model(self.resolution_modelresolution_model)
61 with open(os.path.join(self.resolution_modelresolution_model, 'tX.pkl'), 'rb') as tx_file:
62 tX = pickle.load(tx_file)
63
64 with open(os.path.join(self.resolution_modelresolution_model, 'tY.pkl'), 'rb') as ty_file:
65 tY = pickle.load(ty_file)
66
67 prep_x = tX.transform(X)
68 n_entries, _ = X.shape
69 prep_y_hat = smearing_model.predict(
70 np.c_[prep_x, np.random.normal(0, 1, (n_entries, 64))],
71 verbose=0,
72 batch_size=10000
73 )
74
75 ret = self.invertColumnTransformer(tY, prep_y_hat)
76 return ret
77
78 @PyLamarr.method
79 def __call__(self, db):
80 gen_photons = pd.read_sql_query("""
81 SELECT gev.datasource_id AS event_id, p.*, v.*
82 FROM MCParticles AS p
83 JOIN MCVertices AS v
84 ON p.production_vertex == v.mcvertex_id
85 AND p.genevent_id == v.genevent_id
86 JOIN GenEvents AS gev
87 ON gev.genevent_id = p.genevent_id
88 WHERE
89 pid = 22
90 AND
91 p.pe > 1000
92 AND
93 abs(v.x) < 200
94 AND
95 abs(v.y) < 200
96 AND
97 abs(v.z) < 2000
98 """, db)
99
100 event_id = gen_photons.event_id.values.astype(np.int64)
101 mcparticle_id = gen_photons.mcparticle_id.values.astype(np.int32)
102 ovx, ovy, ovz = gen_photons[['x', 'y', 'z']].values.astype(np.float64).T
103 e, px, py, pz = gen_photons[['pe', 'px', 'py', 'pz']].values.astype(np.float64).T
104
105 tx = px/pz
106 ty = py/pz
107
108 ecal_z = 12689. # mm
109 ecal_x = ovx + tx * (ecal_z - ovz)
110 ecal_y = ovy + ty * (ecal_z - ovz)
111
112 mask = (
113 (ecal_x > -4e3) & (ecal_x < 4e3) &
114 (ecal_y > -4e3) & (ecal_y < 4e3) &
115 (tx > -0.35) & (tx < 0.35) &
116 (ty > -0.25) & (ty < 0.25) &
117 (pz > 0) & (pz < 200e3)
118 )
119
120 X_eff = np.c_[ecal_x, ecal_y, np.log(e/1e3), tx, ty, ovx, ovy, ovz, np.zeros_like(ecal_x)]
121
122
123 if self.efficiency_modelefficiency_model not in [None, "None", "none", "null"]:
124 efficiency = self._eval_efficiency(X_eff)
125 r = np.random.uniform(0, 1, len(X_eff))
126
127 ceff = np.cumsum(efficiency, 1)
128 eff_as_photon = r < ceff[:,0]
129 eff_as_photon_from_pi0 = (r > ceff[:,0]) & (r < ceff[:,1])
130 else:
131 eff_as_photon = np.ones(len(X_eff), dtype=bool)
132 eff_as_photon_from_pi0 = np.zeros(len(X_eff), dtype=bool)
133
134 if self.resolution_modelresolution_model not in [None, "None", "none", "null"]:
135 X_res = np.c_[ecal_x, ecal_y, np.log(e/1e3), tx, ty, ovx, ovy, ovz, np.zeros_like(ecal_x), eff_as_photon, eff_as_photon_from_pi0]
136 dx, dy, de_rel, reco_PhotonID, reco_IsNotE, reco_IsNotH = self._eval_smearing(X_res).T
137
138 sigma_x = np.sqrt(np.exp(-1.65 * np.log(e) + 17.0))
139 sigma_y = np.sqrt(np.exp(-1.65 * np.log(e) + 17.0))
140 sigma_e = np.sqrt(np.exp(0.89 * np.log(e) + 5.1))
141 else:
142 dx, dy, de_rel, reco_PhotonID, reco_IsNotE, reco_IsNotH = np.zeros((6, len(e)))
143 sigma_x, sigma_y, sigma_e = np.zeros((3, len(e)))
144
145 #IPython.embed()
146
147 clusters = pd.DataFrame(dict(
148 mask=mask & eff_as_photon,
149 event_id=event_id,
150 type=np.full_like(mcparticle_id, 4),
151 calocluster_id = mcparticle_id,
152 center_x = ecal_x,# + dx, #np.random.normal(ecal_x, sigma_x),
153 center_y = ecal_y,# + dy, #np.random.normal(ecal_y, sigma_y),
154 z = ecal_z,
155 # energy = e * (1. + de_rel),
156 energy = np.random.normal(e, sigma_e),
157 cov_xx = sigma_x * sigma_x,
158 cov_yy = sigma_y * sigma_y,
159 cov_ee = sigma_e * sigma_e,
160 )).query("mask").drop(columns=['mask'])
161
162 clusters.to_sql("Cluster", db, if_exists='replace')
163
164 cluster_info = pd.concat([
165 pd.DataFrame(dict(
166 mask=eff_as_photon,
167 event_id=event_id,
168 calocluster_id=mcparticle_id,
169 info_key=np.full_like(mcparticle_id, info_key),
170 info_value=info_value
171 )).query("mask").drop(columns=['mask'])
172 for info_key, info_value in [
173 (383, reco_IsNotH),
174 (382, reco_IsNotE),
175 (380, reco_PhotonID),
176 # These magic numbers come from https://lhcb-doxygen.web.cern.ch/lhcb-doxygen/davinci/v50r5/d2/d57/class_l_h_cb_1_1_proto_particle.html
177 ]
178 ]).sort_values('calocluster_id', ignore_index=True)
179
180 cluster_info.to_sql("ClusterInfo", db, if_exists='replace')
181
invertColumnTransformer(self, column_transformer, preprocessed_X)
Definition PyPhotons.py:26