PyLamarr
Pythonizations for the ultra-fast simulation option for the LHCb experiment
 
Loading...
Searching...
No Matches
RemoteResource.py
1import os
2import logging
3import hashlib
4
5from pydantic import BaseModel, PrivateAttr, validator
6from typing import Optional
7
8import requests
9
10
11class RemoteResource (BaseModel):
12 """Resource on the Internet, locally cached.
13
14 # Remote Resource mechanism
15
16 Most of the parametrizations Lamarr relies on are committed and maintained in
17 remote repositories. The PyLamarr.RemoteResource class implements a simple
18 caching mechanism to download the remote parametrizations on demand in case they
19 are not available locally. A hash of the URL identifying the remote resource is
20 used to represent the local cache of the remote resource.
21 Note that in case the remote resource is updated without modifying its URL,
22 the local cache is not automatically updated.
23
24
25 ### Example.
26 Consider the file `PrimaryVertexSmearing.db` encoding the parametrizations for
27 Primary Vertex reconstruction, and made available publicly here:
28 `https://github.com/LamarrSim/SQLamarr/raw/master/temporary_data/PrimaryVertex/PrimaryVertexSmearing.db`
29
30 The following snippet of code enables caching the file locally:
31 ```python
32 from PyLamarr import RemoteResource
33 url = ("https://github.com/LamarrSim/SQLamarr/raw/master/temporary_data/"
34 "PrimaryVertex/PrimaryVertexSmearing.db")
35
36 pv_params = RemoteResource(url)
37
38 # Now the file might not be available locally, but a lazy download is triggered
39 # when accessing its path:
40
41 import sqlite3
42 with sqlite3.connect(pv_params.file) as db:
43 # ... do something ...
44 ```
45
46 Now, in case the remote file is updated, it may be necessary to download the
47 updated version. This can be achieved forcing the download with:
48 ```python
49 pv_params.download(force=True)
50 ```
51
52 or, replacing the connection attempt in the previous example:
53 ```python
54 import sqlite3
55 with sqlite3.connect(pv_params.download(force=True).file) as db:
56 # ... do something ...
57 ```
58
59
60 ### Accessing local resources
61 A local resourcce can be encapsulated inside `RemoteResource` which is
62 the expected format for most of the parametrization test_data in `PyLamarr`.
63
64 For example, if testing your local version of `MyPrimaryVertexSmearing.db`,
65 you can write
66 ```python
67 pv_params = RemoteResource("file://MyPrimaryVertexSmearing.db")
68
69 # Now the file might not be available locally, but a lazy download is triggered
70 # when accessing its path:
71
72 import sqlite3
73 with sqlite3.connect(pv_params.file) as db:
74 #...
75 ```
76
77 Note, however, that forcing the download of a local resource would raise an
78 Exception.
79
80
81 ### Implicit conversion from URL
82 Most of the parametrizations relying on external dependencies expect an
83 instance of `RemoteResource` identifying the file to obtain the parametrization
84 from. An implicit cast from string to `RemoteResource` enables passing directly
85 a string with a URL (possibly pointing to a local file), which gets
86 transparently converted into a `RemoteResource` instance and used in the file.
87 """
88 remote_url: str
89 local_filename: Optional[str] = None
90 _file: Optional[str] = PrivateAttr()
91
92 def __init__ (self, *args, **kwargs):
93 """@private Constructor performing input validation"""
94 remote_url, *args = args
95 local_filename, *args = args if len(args) else None,
96 super().__init__(remote_url=remote_url, local_filename=local_filename)
97
98
99 file_protocols = ('file://',)
100 requests_protocols = ('https://', 'http://')
101
102 if any([self.remote_url.startswith(p) for p in file_protocols]):
103 self._file_file = remote_url[7:]
104 if self.local_filename is not None:
105 raise NotImplementedError("Copy from local file system unavailable")
106
107 elif any([self.remote_url.startswith(p) for p in requests_protocols]):
108 if self.local_filename is not None:
109 self._file_file = self.local_filename
110 else:
111 h = hashlib.sha1()
112 h.update(self.remote_url.encode('utf-8'))
113 self._file_file = f"/tmp/lamarr.resource.{h.hexdigest()[:16]}"
114
115 else:
116 raise NotImplementedError(
117 f"Protocol for {self.remote_url} unknown or not supported. "
118 f"Supported protocols: {', '.join(file_protocols+requests_protocols)}"
119 )
120
121 # Handles implicit conversion from a string representing an url
122 @classmethod
123 def __get_validators__ (cls):
124 """@private Get validators for implicit casting from URL by pydantic"""
125 yield cls.validate
126
127 @classmethod
128 def validate (cls, v):
129 """@private Implement implicit casting"""
130 if isinstance(v, RemoteResource):
131 return RemoteResource
132 elif isinstance(v, str):
133 return RemoteResource(v)
134 elif isinstance(v, dict):
135 if 'remote_url' in v.keys() and 'local_filename' in v.keys():
136 return RemoteResource(
137 remote_url=v['remote_url'],
138 local_filename=v['local_filename']
139 )
140
141 raise ValueError(f"Unexpected initializer {v} for RemoteResource")
142
143 def download (self, force: bool = False):
144 """Download the remote resource is not available locally or if forced.
145 Can raise an exception if the download fails or upon attempts of downloading
146 local resources (represented by protocol `file://`)
147
148 @param force: Force the download of the remote resource independently of
149 the cache availability
150
151 @return Updated instance of `RemoteResource` (`self`)
152 """
153
154 if os.path.exists(self._file_file) and not force:
155 return self
156
157 if self.remote_url.startswith("file://"):
158 raise FileNotFoundError(f"File {self._file} not found.")
159
160 logger = logging.getLogger(self.__class__.__name__)
161 logger.info(f"Downloading {self.remote_url} to {self._file}")
162
163 res = requests.get(self.remote_url, allow_redirects=True)
164 res.raise_for_status()
165
166 with open(self._file_file, 'wb') as f:
167 f.write(res.content)
168
169 return self
170
171 @property
172 def file (self):
173 """@property Access the local file **path** downloading it if necessary."""
174 self.download(force=False)
175 logger = logging.getLogger(self.__class__.__name__)
176 logger.debug(f"Accessing {self._file} as cached version of {self.remote_url}")
177 return self._file_file
178
179
180if __name__ == '__main__':
181 pvdb = RemoteResource(
182 "https://github.com/LamarrSim/SQLamarr/raw/master/temporary_data/PrimaryVertex/PrimaryVertexSmearing.db"
183 )
184 print(pvdb.download(force=True).file)
185 pvdb2 = RemoteResource(f"file://{pvdb.file}")
186 print(pvdb2._file)
187 print(pvdb2.file)
188
189 try:
190 RemoteResource("ahahah://just-kidding.com")
191 except NotImplementedError:
192 pass
193 else:
194 assert False, "Failed raising exception on bad protocol"
Resource on the Internet, locally cached.
download(self, bool force=False)
Download the remote resource is not available locally or if forced.