SQLamarr
The stand-alone ultra-fast simulation option for the LHCb experiment
main.cpp
1 // (c) Copyright 2022 CERN for the benefit of the LHCb Collaboration.
2 //
3 // This software is distributed under the terms of the GNU General Public
4 // Licence version 3 (GPL Version 3), copied verbatim in the file "LICENCE".
5 //
6 // In applying this licence, CERN does not waive the privileges and immunities
7 // granted to it by virtue of its status as an Intergovernmental Organization
8 // or submit itself to any jurisdiction.
9 
10 #include <string>
11 #include <unordered_map>
12 #include <vector>
13 #include <iostream>
14 #include "SQLamarr/HepMC2DataLoader.h"
15 #include "SQLamarr/PVFinder.h"
16 #include "SQLamarr/db_functions.h"
17 #include "SQLamarr/MCParticleSelector.h"
18 #include "SQLamarr/PVReconstruction.h"
19 #include "SQLamarr/Plugin.h"
20 #include "SQLamarr/GenerativePlugin.h"
21 #include "SQLamarr/GlobalPRNG.h"
22 #include "SQLamarr/TemporaryTable.h"
23 #include "SQLamarr/BlockLib/LbParticleId.h"
24 #include "SQLamarr/CleanEventStore.h"
25 #include <memory>
26 #include <glob.h>
27 
28 #include "sqlite3.h"
29 #include <stdio.h>
30 
31 #include <filesystem>
32 
33 
34 // Helper function to list files in a dir
35 std::vector<std::string> globVector(const std::string& pattern);
36 
37 
38 int main(int argc, char* argv[])
39 {
40  std::string path{"../temporary_data/HepMC2-ascii/DSt_Pi.hepmc2/evt0.mc2"};
41  //std::string path{"/pclhcb06/landerli/lamarrgridvalidation-test/HepMCexporter/DSt_Pi.hepmc2/*.mc2"};
42 
43  std::vector<std::string> file_paths = globVector(path);
44 // {
45 // "/home/lucio/test-data/DSt_Pi.hepmc2/evt0.mc2",
46 // "/home/lucio/test-data/DSt_Pi.hepmc2/evt1.mc2",
47 // "/home/lucio/test-data/DSt_Pi.hepmc2/evt2.mc2",
48 // "/home/lucio/test-data/DSt_Pi.hepmc2/evt3.mc2",
49 // "/home/lucio/test-data/DSt_Pi.hepmc2/evt4.mc2",
50 // "/home/lucio/test-data/DSt_Pi.hepmc2/evt5.mc2"
51 // };
52 //
53  // Create the in-memory database
54  SQLamarr::SQLite3DB db = SQLamarr::make_database(argc > 1 ? argv[1] : ":memory:" );
56 
57 
58  // Define a data loader to take data from HepMC2-Ascii format
59  SQLamarr::HepMC2DataLoader loader (db);
60 
61  // Loads the data to the database
62  size_t evtNumber = 0;
63  size_t runNumber = 456;
64 
65  for (std::string& file_path: file_paths)
66  if (evtNumber < 100)
67  loader.load(file_path, runNumber, evtNumber++);
68 
69  // Runs the PVFinder algorithm
70  SQLamarr::PVFinder pvfinder(db);
71  pvfinder.execute();
72 
74  mcps.execute();
75 
76  SQLamarr::PVReconstruction pv_reco(db,
78  "../temporary_data/PrimaryVertex/PrimaryVertexSmearing.db",
79  "PVSmearing", "2016_pp_MagUp")
80  );
81  pv_reco.execute();
82 
83  SQLamarr::Plugin acceptance_model (db,
84  "../temporary_data/models/lhcb.trk.2016MU.so",
85  "acceptance",
86  R"(
87  SELECT
88  mcparticle_id,
89  ov.x AS mc_x,
90  ov.y AS mc_y,
91  ov.z AS mc_z,
92  log10(norm2(p.px, p.py, p.pz)) AS mc_log10_p,
93  p.px/p.pz AS mc_tx,
94  p.py/p.pz AS mc_ty,
95  pseudorapidity(p.px, p.py, p.pz) AS mc_eta,
96  azimuthal(p.px, p.py, p.pz) AS mc_phi,
97  abs(p.pid) == 11 AS mc_is_e,
98  abs(p.pid) == 13 AS mc_is_mu,
99  (
100  abs(p.pid) == 211 OR abs(p.pid) == 321 OR abs(p.pid) == 2212
101  ) AS mc_is_h,
102  propagation_charge(p.pid) AS mc_charge
103  FROM MCParticles AS p
104  INNER JOIN MCVertices AS ov ON p.production_vertex = ov.mcvertex_id
105  WHERE
106  p.pz > 1.
107  AND
108  propagation_charge(p.pid) <> 0.
109  )",
110  "tmp_acceptance_out", {"acceptance"}, {"mcparticle_id"}
111  );
112 
113  acceptance_model.execute();
114 
115 
116  SQLamarr::Plugin efficiency_model (db,
117  "../temporary_data/models/lhcb.trk.2016MU.so",
118  "efficiency",
119  R"(
120  SELECT
121  mcparticle_id,
122  ov.x AS mc_x,
123  ov.y AS mc_y,
124  ov.z AS mc_z,
125  log10(norm2(p.px, p.py, p.pz)) AS mc_log10_p,
126  p.px/p.pz AS mc_tx,
127  p.py/p.pz AS mc_ty,
128  pseudorapidity(p.px, p.py, p.pz) AS mc_eta,
129  azimuthal(p.px, p.py, p.pz) AS mc_phi,
130  abs(p.pid) == 11 AS mc_is_e,
131  abs(p.pid) == 13 AS mc_is_mu,
132  (
133  abs(p.pid) == 211 OR abs(p.pid) == 321 OR abs(p.pid) == 2212
134  ) AS mc_is_h,
135  propagation_charge(p.pid) AS mc_charge
136  FROM MCParticles AS p
137  INNER JOIN MCVertices AS ov ON p.production_vertex = ov.mcvertex_id
138  WHERE
139  p.pz > 1.
140  AND
141  propagation_charge(p.pid) <> 0.
142  )",
143  "tmp_efficiency_out",
144  {"not_recoed", "long", "upstream", "downstream"},
145  {"mcparticle_id"}
146  );
147 
148  efficiency_model.execute();
149 
150  SQLamarr::TemporaryTable debug_eff (db,
151  "debug_eff",
152  {
153  "mcparticle_id",
154  "mc_x", "mc_y", "mc_z", "mc_log10_p", "mc_tx", "mc_ty", "mc_eta", "mc_phi", "mc_is_e", "mc_is_mu", "mc_is_h", "mc_charge",
155  "not_recoed", "long", "upstream", "downstream"
156  },
157  R"(
158  SELECT
159  p.mcparticle_id AS mcparticle_id,
160  ov.x AS mc_x,
161  ov.y AS mc_y,
162  ov.z AS mc_z,
163  log10(norm2(p.px, p.py, p.pz)) AS mc_log10_p,
164  p.px/p.pz AS mc_tx,
165  p.py/p.pz AS mc_ty,
166  pseudorapidity(p.px, p.py, p.pz) AS mc_eta,
167  azimuthal(p.px, p.py, p.pz) AS mc_phi,
168  abs(p.pid) == 11 AS mc_is_e,
169  abs(p.pid) == 13 AS mc_is_mu,
170  (
171  abs(p.pid) == 211 OR abs(p.pid) == 321 OR abs(p.pid) == 2212
172  ) AS mc_is_h,
173  propagation_charge(p.pid) AS mc_charge,
174  --
175  out.not_recoed AS not_recoed,
176  out.long AS long,
177  out.upstream AS upstream,
178  out.downstream AS downstream
179  FROM MCParticles AS p
180  INNER JOIN MCVertices AS ov ON p.production_vertex = ov.mcvertex_id
181  INNER JOIN tmp_efficiency_out AS out ON out.mcparticle_id == p.mcparticle_id
182  WHERE
183  p.pz > 1.
184  AND
185  propagation_charge(p.pid) <> 0.
186  )",
187  true //make_persistent
188  );
189  debug_eff.execute();
190 
191 
192 
193 
194  SQLamarr::TemporaryTable assign_track_cat (db,
195  "tmp_particles_recoed_as", {"mcparticle_id", "track_type"},
196  R"( SELECT
197  eff.mcparticle_id,
198  random_category(not_recoed, 0, 0, long, upstream) AS track_type
199  FROM tmp_efficiency_out AS eff
200  INNER JOIN tmp_acceptance_out AS acc
201  ON eff.mcparticle_id = acc.mcparticle_id
202  WHERE random_category(1 - acc.acceptance, acc.acceptance) == 1
203  )",
204  /*make_persistent*/ true
205  );
206 
207  assign_track_cat.execute();
208 
209  SQLamarr::TemporaryTable propagate_to_ctb (db,
210  "tmp_closest_to_beam", {"mcparticle_id", "x", "y", "z"},
211  R"( WITH ctb AS (
212  SELECT
213  mcparticle_id,
214  z_closest_to_beam(ov.x, ov.y, ov.z, p.px/p.pz, p.py/p.pz) AS z,
215  ov.x AS x0,
216  ov.y AS y0,
217  ov.z AS z0,
218  p.px/p.pz AS tx,
219  p.py/p.pz AS ty
220  FROM MCParticles AS p
221  INNER JOIN MCVertices AS ov
222  ON p.production_vertex = ov.mcvertex_id
223  )
224  SELECT
225  mcparticle_id,
226  x0 + (z - z0)*tx AS x,
227  y0 + (z - z0)*ty AS y,
228  z AS z
229  FROM ctb;
230  )",
231  /*make_persistent*/ true
232  );
233  propagate_to_ctb.execute();
234 
235 
236  SQLamarr::GenerativePlugin resolution_model (db,
237  "../temporary_data/models/lhcb.trk.2016MU.so",
238  "resolution",
239  R"(
240  SELECT
241  p.mcparticle_id,
242  ctb.x AS mc_x,
243  ctb.y AS mc_y,
244  ctb.z AS mc_z,
245  p.px/p.pz AS mc_tx,
246  p.py/p.pz AS mc_ty,
247  log10(norm2(p.px, p.py, p.pz)) AS mc_log10_p,
248  abs(p.pid) == 11 AS mc_is_e,
249  abs(p.pid) == 13 AS mc_is_mu,
250  (abs(p.pid) = 211 OR abs(p.pid) = 321 OR abs(p.pid) = 2212) AS is_h,
251  (recguess.track_type == 3) AS is_long,
252  (recguess.track_type == 4) AS is_upstream,
253  (recguess.track_type == 5) AS is_downstream
254  FROM MCParticles AS p
255  INNER JOIN MCVertices AS ov ON p.production_vertex = ov.mcvertex_id
256  INNER JOIN tmp_particles_recoed_as AS recguess
257  ON p.mcparticle_id = recguess.mcparticle_id
258  INNER JOIN tmp_closest_to_beam AS ctb
259  ON p.mcparticle_id = ctb.mcparticle_id
260  WHERE
261  recguess.track_type BETWEEN 3 AND 5;
262  )",
263  "tmp_resolution_out",
264  {"dx", "dy", "dz", "dtx", "dty", "dp",
265  "chi2PerDoF", "nDoF_f", "ghostProb"},
266  128,
267  {"mcparticle_id"}
268  );
269 
270  resolution_model.execute();
271 
272 
273  SQLamarr::GenerativePlugin covariance_model (db,
274  "../temporary_data/models/lhcb.trk.2016MU.so",
275  "covariance",
276  R"(
277  SELECT
278  p.mcparticle_id,
279  ctb.x AS mc_x,
280  ctb.y AS mc_y,
281  ctb.z AS mc_z,
282  p.px/p.pz AS mc_tx,
283  p.py/p.pz AS mc_ty,
284  log10(norm2(p.px, p.py, p.pz)) AS mc_log10_p,
285  tmpres.chi2PerDoF AS chi2PerDoF,
286  tmpres.nDoF_f AS nDoF_f,
287  tmpres.ghostProb AS ghostProb,
288  abs(p.pid) == 11 AS mc_is_e,
289  abs(p.pid) == 13 AS mc_is_mu,
290  (abs(p.pid) = 211 OR abs(p.pid) = 321 OR abs(p.pid) = 2212) AS is_h,
291  (recguess.track_type == 3) AS is_long,
292  (recguess.track_type == 4) AS is_upstream,
293  (recguess.track_type == 5) AS is_downstream
294  FROM MCParticles AS p
295  INNER JOIN MCVertices AS ov ON p.production_vertex = ov.mcvertex_id
296  INNER JOIN tmp_particles_recoed_as AS recguess
297  ON p.mcparticle_id = recguess.mcparticle_id
298  INNER JOIN tmp_resolution_out AS tmpres
299  ON p.mcparticle_id = tmpres.mcparticle_id
300  INNER JOIN tmp_closest_to_beam AS ctb
301  ON p.mcparticle_id = ctb.mcparticle_id
302  WHERE
303  recguess.track_type >= 3 AND recguess.track_type <= 5
304  )",
305  "tmp_covariance_out",
306  {
307  "log_cov_ClosestToBeam_0_0",
308  "log_cov_ClosestToBeam_1_1",
309  "log_cov_ClosestToBeam_2_2",
310  "log_cov_ClosestToBeam_3_3",
311  "log_cov_ClosestToBeam_4_4",
312  "corr_ClosestToBeam_0_1",
313  "corr_ClosestToBeam_0_2",
314  "corr_ClosestToBeam_1_2",
315  "corr_ClosestToBeam_0_3",
316  "corr_ClosestToBeam_1_3",
317  "corr_ClosestToBeam_2_3",
318  "corr_ClosestToBeam_0_4",
319  "corr_ClosestToBeam_1_4",
320  "corr_ClosestToBeam_2_4",
321  "corr_ClosestToBeam_3_4"
322  },
323  128,
324  {"mcparticle_id"}
325  );
326 
327  covariance_model.execute();
328 
329  SQLamarr::TemporaryTable covariance (db,
330  "covariance",
331  { "cov00", "cov01", "cov02", "cov03", "cov04",
332  "cov11", "cov12", "cov13", "cov14",
333  "cov22", "cov23", "cov24",
334  "cov33", "cov34",
335  "cov44"
336  },
337  R"(
338  WITH shortcut AS (
339  SELECT
340  sqrt(exp(log_cov_ClosestToBeam_0_0)) AS sig0,
341  sqrt(exp(log_cov_ClosestToBeam_1_1)) AS sig1,
342  sqrt(exp(log_cov_ClosestToBeam_2_2)) AS sig2,
343  sqrt(exp(log_cov_ClosestToBeam_3_3)) AS sig3,
344  sqrt(exp(log_cov_ClosestToBeam_4_4)) AS sig4,
345  corr_ClosestToBeam_0_1 AS cor01,
346  corr_ClosestToBeam_0_2 AS cor02,
347  corr_ClosestToBeam_1_2 AS cor12,
348  corr_ClosestToBeam_0_3 AS cor03,
349  corr_ClosestToBeam_1_3 AS cor13,
350  corr_ClosestToBeam_2_3 AS cor23,
351  corr_ClosestToBeam_0_4 AS cor04,
352  corr_ClosestToBeam_1_4 AS cor14,
353  corr_ClosestToBeam_2_4 AS cor24,
354  corr_ClosestToBeam_3_4 AS cor34
355  FROM tmp_covariance_out
356  )
357  SELECT
358  sig0 * sig0,
359  cor01 * sig0 * sig1,
360  cor02 * sig0 * sig2,
361  cor03 * sig0 * sig3,
362  cor04 * sig0 * sig4,
363  sig1 * sig1,
364  cor12 * sig1 * sig2,
365  cor13 * sig1 * sig3,
366  cor14 * sig1 * sig4,
367  sig2 * sig2,
368  cor23 * sig2 * sig3,
369  cor24 * sig2 * sig4,
370  sig3 * sig3,
371  cor34 * sig3 * sig4,
372  sig4 * sig3
373  FROM shortcut;
374  )", /*make_persistent*/ true);
375 
376  covariance.execute();
377 
378  const std::string pidlib = "../temporary_data/models/lhcb.pid.2016MU-sim.so";
379  const std::string particle_table = "MCParticles";
380  const std::string track_table = "tmp_particles_recoed_as";
381 
382  std::vector<std::unique_ptr<SQLamarr::GenerativePlugin>> pid_algos;
383  pid_algos.push_back(SQLamarr::BlockLib::LbParticleId::make (db, pidlib, "pion_pipe", "tmp_pid_pi",
384  particle_table, track_table, 211));
385  pid_algos.push_back(SQLamarr::BlockLib::LbParticleId::make(db, pidlib, "kaon_pipe", "tmp_pid_k",
386  particle_table, track_table, 321));
387  pid_algos.push_back(SQLamarr::BlockLib::LbParticleId::make(db, pidlib, "proton_pipe", "tmp_pid_p",
388  particle_table, track_table, 2212));
389  pid_algos.push_back(SQLamarr::BlockLib::LbParticleId::make(db, pidlib, "muon_pipe", "tmp_pid_mu",
390  particle_table, track_table, 13));
391  for (auto& pid_algo: pid_algos)
392  pid_algo->execute();
393 
394  SQLamarr::TemporaryTable concat_pid(db,
395  "pid",
397  {
398  "SELECT * FROM tmp_pid_pi",
399  "SELECT * FROM tmp_pid_k",
400  "SELECT * FROM tmp_pid_mu",
401  "SELECT * FROM tmp_pid_p"
402  },
403  /*make_persistent*/ true
404  );
405  concat_pid.execute();
406 
407  SQLamarr::CleanEventStore clean(db);
408  //clean.execute();
409 
410 
411 
412  std::cout << SQLamarr::dump_table(db, "SELECT COUNT(*) as n_files FROM DataSources") << std::endl;
413 // std::cout << SQLamarr::dump_table(db, "SELECT * FROM DataSources LIMIT 10") << std::endl;
414 // std::cout << SQLamarr::dump_table(db, "SELECT * FROM GenEvents LIMIT 10") << std::endl;
415  std::cout << "GenVertices\n" << SQLamarr::dump_table(db, "SELECT * FROM GenVertices LIMIT 10") << std::endl;
416  std::cout << "GenParticles\n" << SQLamarr::dump_table(db, "SELECT * FROM GenParticles") << std::endl;
417 // std::cout << SQLamarr::dump_table(db, "SELECT * FROM GenParticles WHERE status =889 LIMIT 10") << std::endl;
418 // std::cout << SQLamarr::dump_table(db, "SELECT * FROM MCVertices LIMIT 10") << std::endl;
419  std::cout << "MCParticles\n" << SQLamarr::dump_table(db,
420  "SELECT * FROM MCParticles") << std::endl;
421 // std::cout << SQLamarr::dump_table(db,
422 // "SELECT * FROM MCParticles WHERE is_signal == TRUE LIMIT 10") << std::endl;
423 // std::cout << SQLamarr::dump_table(db, R"(
424 // SELECT pid, COUNT(*)
425 // FROM GenParticles
426 // WHERE
427 // status == 889
428 // AND
429 // production_vertex IS NOT NULL
430 // AND
431 // end_vertex IS NOT NULL
432 // AND
433 // production_vertex != end_vertex
434 // GROUP BY pid;
435 // )");
436 //
437 // std::cout << SQLamarr::dump_table(db, "SELECT * FROM Vertices LIMIT 10") << std::endl;
438 //
439 // std::cout << SQLamarr::dump_table(db, R"(
440 // SELECT acc.acceptance AS acc, eff.*,
441 // not_recoed + long + upstream + downstream AS norm,
442 // red.track_type
443 // FROM tmp_efficiency_out as eff
444 // INNER JOIN tmp_acceptance_out as acc
445 // ON eff.mcparticle_id = acc.mcparticle_id
446 // LEFT JOIN tmp_particles_recoed_as AS red
447 // ON eff.mcparticle_id = red.mcparticle_id
448 // LIMIT 10)") << std::endl;
449 //
450 // std::cout << SQLamarr::dump_table(db, R"(
451 // SELECT * FROM tmp_resolution_out LIMIT 10
452 // )");
453 //
454 // std::cout << SQLamarr::dump_table(db, R"(
455 // SELECT * FROM tmp_covariance_out LIMIT 10
456 // )");
457 //
458 // std::cout << SQLamarr::dump_table(db, R"(
459 // SELECT * FROM tmp_closest_to_beam LIMIT 10
460 // )");
461 //
462 
463  return 0;
464 }
465 
466 
467 std::vector<std::string> globVector(const std::string& pattern)
468 {
469  std::vector<std::string> files;
470 
471  glob_t glob_result;
472  glob(pattern.c_str(),GLOB_TILDE,NULL,&glob_result);
473 
474  for(unsigned int i = 0; i < glob_result.gl_pathc; ++i)
475  files.push_back(string(glob_result.gl_pathv[i]));
476 
477  globfree(&glob_result);
478 
479  return files;
480 }
static SmearingParametrization load_parametrization(const std::string file_path, const std::string table_name, const std::string condition)
Instanciate a SmearingParametrization object from an SQLite file.
static PRNG * get_or_create(const sqlite3_context *db, uint64_t seed=no_seed)
Return a pointer to an initialized generator.
Definition: GlobalPRNG.h:99
A database connection handler easying sharing the DB between C++ and Python.
Definition: db_functions.py:24
std::unique_ptr< GenerativePlugin > make(SQLite3DB &db, const std::string &library, const std::string &function_name, const std::string &output_table, const std::string &particle_table, const std::string &track_table, const int abspid)
Initialize a GenerativePlugin specialized for PID pipelines.
std::vector< std::string > get_column_names(bool include_indices=true, bool include_outputs=true)
Provide a vector of the names of the columns of the output table.
SQLite3DB make_database(std::string filename, int flags=SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_URI, std::string init=std::string())
Initialize the database.
std::string dump_table(SQLite3DB &db, const std::string &query)
Dump the result of a query to a string.