23 efficiency_model: Union[str,
None]
24 resolution_model: Union[str,
None]
26 def invertColumnTransformer(self, column_transformer, preprocessed_X):
27 from sklearn.compose
import ColumnTransformer
28 assert isinstance(column_transformer, ColumnTransformer)
31 postprocessed_split = dict()
32 for name, algo, cols
in column_transformer.transformers_:
33 preprocessed_cols = list()
34 for _
in range(len(cols)):
35 preprocessed_cols.append(preprocessed_X[:, iCol][:,
None])
37 preprocessed_block = np.concatenate(preprocessed_cols, axis=1)
38 if algo ==
"passthrough":
39 postprocessed_split[name] = preprocessed_block
41 postprocessed_split[name] = algo.inverse_transform(preprocessed_block)
43 X = [
None] * preprocessed_X.shape[1]
44 for name, _, cols
in column_transformer.transformers_:
45 for i, iCol
in enumerate(cols):
46 X[iCol] = postprocessed_split[name][:, i][:,
None]
48 return np.concatenate(X, axis=1)
50 def _eval_efficiency(self, X):
53 tX = pickle.load(tx_file)
55 y_hat = efficiency_model.predict(tX.transform(X), batch_size=10000, verbose=0)
59 def _eval_smearing(self, X):
62 tX = pickle.load(tx_file)
65 tY = pickle.load(ty_file)
67 prep_x = tX.transform(X)
68 n_entries, _ = X.shape
69 prep_y_hat = smearing_model.predict(
70 np.c_[prep_x, np.random.normal(0, 1, (n_entries, 64))],
79 def __call__(self, db):
80 gen_photons = pd.read_sql_query(
"""
81 SELECT gev.datasource_id AS event_id, p.*, v.*
84 ON p.production_vertex == v.mcvertex_id
85 AND p.genevent_id == v.genevent_id
87 ON gev.genevent_id = p.genevent_id
100 event_id = gen_photons.event_id.values.astype(np.int64)
101 mcparticle_id = gen_photons.mcparticle_id.values.astype(np.int32)
102 ovx, ovy, ovz = gen_photons[[
'x',
'y',
'z']].values.astype(np.float64).T
103 e, px, py, pz = gen_photons[[
'pe',
'px',
'py',
'pz']].values.astype(np.float64).T
109 ecal_x = ovx + tx * (ecal_z - ovz)
110 ecal_y = ovy + ty * (ecal_z - ovz)
113 (ecal_x > -4e3) & (ecal_x < 4e3) &
114 (ecal_y > -4e3) & (ecal_y < 4e3) &
115 (tx > -0.35) & (tx < 0.35) &
116 (ty > -0.25) & (ty < 0.25) &
117 (pz > 0) & (pz < 200e3)
120 X_eff = np.c_[ecal_x, ecal_y, np.log(e/1e3), tx, ty, ovx, ovy, ovz, np.zeros_like(ecal_x)]
125 r = np.random.uniform(0, 1, len(X_eff))
127 ceff = np.cumsum(efficiency, 1)
128 eff_as_photon = r < ceff[:,0]
129 eff_as_photon_from_pi0 = (r > ceff[:,0]) & (r < ceff[:,1])
131 eff_as_photon = np.ones(len(X_eff), dtype=bool)
132 eff_as_photon_from_pi0 = np.zeros(len(X_eff), dtype=bool)
135 X_res = np.c_[ecal_x, ecal_y, np.log(e/1e3), tx, ty, ovx, ovy, ovz, np.zeros_like(ecal_x), eff_as_photon, eff_as_photon_from_pi0]
136 dx, dy, de_rel, reco_PhotonID, reco_IsNotE, reco_IsNotH = self.
_eval_smearing(X_res).T
138 sigma_x = np.sqrt(np.exp(-1.65 * np.log(e) + 17.0))
139 sigma_y = np.sqrt(np.exp(-1.65 * np.log(e) + 17.0))
140 sigma_e = np.sqrt(np.exp(0.89 * np.log(e) + 5.1))
142 dx, dy, de_rel, reco_PhotonID, reco_IsNotE, reco_IsNotH = np.zeros((6, len(e)))
143 sigma_x, sigma_y, sigma_e = np.zeros((3, len(e)))
147 clusters = pd.DataFrame(dict(
148 mask=mask & eff_as_photon,
150 type=np.full_like(mcparticle_id, 4),
151 calocluster_id = mcparticle_id,
156 energy = np.random.normal(e, sigma_e),
157 cov_xx = sigma_x * sigma_x,
158 cov_yy = sigma_y * sigma_y,
159 cov_ee = sigma_e * sigma_e,
160 )).query(
"mask").drop(columns=[
'mask'])
162 clusters.to_sql(
"Cluster", db, if_exists=
'replace')
164 cluster_info = pd.concat([
168 calocluster_id=mcparticle_id,
169 info_key=np.full_like(mcparticle_id, info_key),
170 info_value=info_value
171 )).query(
"mask").drop(columns=[
'mask'])
172 for info_key, info_value
in [
175 (380, reco_PhotonID),
178 ]).sort_values(
'calocluster_id', ignore_index=
True)
180 cluster_info.to_sql(
"ClusterInfo", db, if_exists=
'replace')