PyLamarr
Pythonizations for the ultra-fast simulation option for the LHCb experiment
RemoteResource.py
1 import os
2 import logging
3 import hashlib
4 
5 from pydantic import BaseModel, PrivateAttr, validator
6 from typing import Optional
7 
8 import requests
9 
10 
11 class RemoteResource (BaseModel):
12  """Resource on the Internet, locally cached.
13 
14  # Remote Resource mechanism
15 
16  Most of the parametrizations Lamarr relies on are committed and maintained in
17  remote repositories. The PyLamarr.RemoteResource class implements a simple
18  caching mechanism to download the remote parametrizations on demand in case they
19  are not available locally. A hash of the URL identifying the remote resource is
20  used to represent the local cache of the remote resource.
21  Note that in case the remote resource is updated without modifying its URL,
22  the local cache is not automatically updated.
23 
24 
25  ### Example.
26  Consider the file `PrimaryVertexSmearing.db` encoding the parametrizations for
27  Primary Vertex reconstruction, and made available publicly here:
28  `https://github.com/LamarrSim/SQLamarr/raw/master/temporary_data/PrimaryVertex/PrimaryVertexSmearing.db`
29 
30  The following snippet of code enables caching the file locally:
31  ```python
32  from PyLamarr import RemoteResource
33  url = ("https://github.com/LamarrSim/SQLamarr/raw/master/temporary_data/"
34  "PrimaryVertex/PrimaryVertexSmearing.db")
35 
36  pv_params = RemoteResource(url)
37 
38  # Now the file might not be available locally, but a lazy download is triggered
39  # when accessing its path:
40 
41  import sqlite3
42  with sqlite3.connect(pv_params.file) as db:
43  # ... do something ...
44  ```
45 
46  Now, in case the remote file is updated, it may be necessary to download the
47  updated version. This can be achieved forcing the download with:
48  ```python
49  pv_params.download(force=True)
50  ```
51 
52  or, replacing the connection attempt in the previous example:
53  ```python
54  import sqlite3
55  with sqlite3.connect(pv_params.download(force=True).file) as db:
56  # ... do something ...
57  ```
58 
59 
60  ### Accessing local resources
61  A local resourcce can be encapsulated inside `RemoteResource` which is
62  the expected format for most of the parametrization data in `PyLamarr`.
63 
64  For example, if testing your local version of `MyPrimaryVertexSmearing.db`,
65  you can write
66  ```python
67  pv_params = RemoteResource("file://MyPrimaryVertexSmearing.db")
68 
69  # Now the file might not be available locally, but a lazy download is triggered
70  # when accessing its path:
71 
72  import sqlite3
73  with sqlite3.connect(pv_params.file) as db:
74  #...
75  ```
76 
77  Note, however, that forcing the download of a local resource would raise an
78  Exception.
79 
80 
81  ### Implicit conversion from URL
82  Most of the parametrizations relying on external dependencies expect an
83  instance of `RemoteResource` identifying the file to obtain the parametrization
84  from. An implicit cast from sring to `RemoteResource` enables passing directly
85  a string with a URL (possibly pointing to a local file), which gets
86  transparently converted into a `RemoteResource` instance and used in the file.
87  """
88  remote_url: str
89  local_filename: Optional[str] = None
90  _file: Optional[str] = PrivateAttr()
91 
92  def __init__ (self, *args, **kwargs):
93  """@private Constructor performing input validation"""
94  remote_url, *args = args
95  local_filename, *args = args if len(args) else None,
96  super().__init__(remote_url=remote_url, local_filename=local_filename)
97 
98 
99  file_protocols = ('file://',)
100  requests_protocols = ('https://', 'http://')
101 
102  if any([self.remote_url.startswith(p) for p in file_protocols]):
103  self._file_file = remote_url[7:]
104  if self.local_filename is not None:
105  raise NotImplementedError("Copy from local file system unavailable")
106 
107  elif any([self.remote_url.startswith(p) for p in requests_protocols]):
108  if self.local_filename is not None:
109  self._file_file = self.local_filename
110  else:
111  h = hashlib.sha1()
112  h.update(self.remote_url.encode('utf-8'))
113  self._file_file = f"/tmp/lamarr.resource.{h.hexdigest()[:16]}"
114 
115  else:
116  raise NotImplementedError(
117  f"Protocol for {self.remote_url} unknown or not supported. "
118  f"Supported protocols: {', '.join(file_protocols+requests_protocols)}"
119  )
120 
121  # Handles implicit conversion from a string representing an url
122  @classmethod
123  def __get_validators__ (cls):
124  """@private Get validators for implicit casting from URL by pydantic"""
125  yield cls.validatevalidate
126 
127  @classmethod
128  def validate (cls, v):
129  """@private Implement implicit casting"""
130  if isinstance(v, RemoteResource):
131  return RemoteResource
132  elif isinstance(v, str):
133  return RemoteResource(v)
134  elif isinstance(v, dict):
135  if 'remote_url' in v.keys() and 'local_filename' in v.keys():
136  return RemoteResource(
137  remote_url=v['remote_url'],
138  local_filename=v['local_filename']
139  )
140 
141  raise ValueError(f"Unexpected initializer {v} for RemoteResource")
142 
143  def download (self, force: bool = False):
144  """Download the remote resource is not available locally or if forced.
145  Can raise an exception if the download fails or upon attempts of downloading
146  local resources (represented by protocol `file://`)
147 
148  @param force: Force the download of the remote resource independently of
149  the cache availability
150 
151  @return Updated instance of `RemoteResource` (`self`)
152  """
153 
154  if os.path.exists(self._file_file) and not force:
155  return self
156 
157  logger = logging.getLogger(self.__class__.__name__)
158  logger.info(f"Downloading {self.remote_url} to {self._file}")
159 
160  res = requests.get(self.remote_url, allow_redirects=True)
161  res.raise_for_status()
162 
163  with open(self._file_file, 'wb') as f:
164  f.write(res.content)
165 
166  return self
167 
168  @property
169  def file (self):
170  """@property Access the local file **path** downloading it if necessary."""
171  self.downloaddownload(force=False)
172  logger = logging.getLogger(self.__class__.__name__)
173  logger.debug(f"Accessing {self._file} as cached version of {self.remote_url}")
174  return self._file_file
175 
176 
177 if __name__ == '__main__':
178  pvdb = RemoteResource(
179  "https://github.com/LamarrSim/SQLamarr/raw/master/temporary_data/PrimaryVertex/PrimaryVertexSmearing.db"
180  )
181  print(pvdb.download(force=True).file)
182  pvdb2 = RemoteResource(f"file://{pvdb.file}")
183  print(pvdb2._file)
184  print(pvdb2.file)
185 
186  try:
187  RemoteResource("ahahah://just-kidding.com")
188  except NotImplementedError:
189  pass
190  else:
191  assert False, "Failed raising exception on bad protocol"
Resource on the Internet, locally cached.
def download(self, bool force=False)
Download the remote resource is not available locally or if forced.