Development of an internal social media platform with personalised dashboards for students
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

serialization.py 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. """
  2. kombu.serialization
  3. ===================
  4. Serialization utilities.
  5. """
  6. from __future__ import absolute_import
  7. import codecs
  8. import os
  9. import sys
  10. import pickle as pypickle
  11. try:
  12. import cPickle as cpickle
  13. except ImportError: # pragma: no cover
  14. cpickle = None # noqa
  15. from collections import namedtuple
  16. from contextlib import contextmanager
  17. from .exceptions import (
  18. ContentDisallowed, DecodeError, EncodeError, SerializerNotInstalled
  19. )
  20. from .five import BytesIO, reraise, text_t
  21. from .utils import entrypoints
  22. from .utils.encoding import str_to_bytes, bytes_t
  23. __all__ = ['pickle', 'loads', 'dumps', 'register', 'unregister']
  24. SKIP_DECODE = frozenset(['binary', 'ascii-8bit'])
  25. TRUSTED_CONTENT = frozenset(['application/data', 'application/text'])
  26. if sys.platform.startswith('java'): # pragma: no cover
  27. def _decode(t, coding):
  28. return codecs.getdecoder(coding)(t)[0]
  29. else:
  30. _decode = codecs.decode
  31. pickle = cpickle or pypickle
  32. pickle_load = pickle.load
  33. #: Kombu requires Python 2.5 or later so we use protocol 2 by default.
  34. #: There's a new protocol (3) but this is only supported by Python 3.
  35. pickle_protocol = int(os.environ.get('PICKLE_PROTOCOL', 2))
  36. codec = namedtuple('codec', ('content_type', 'content_encoding', 'encoder'))
  37. @contextmanager
  38. def _reraise_errors(wrapper,
  39. include=(Exception, ), exclude=(SerializerNotInstalled, )):
  40. try:
  41. yield
  42. except exclude:
  43. raise
  44. except include as exc:
  45. reraise(wrapper, wrapper(exc), sys.exc_info()[2])
  46. def pickle_loads(s, load=pickle_load):
  47. # used to support buffer objects
  48. return load(BytesIO(s))
  49. def parenthesize_alias(first, second):
  50. return '%s (%s)' % (first, second) if first else second
  51. class SerializerRegistry(object):
  52. """The registry keeps track of serialization methods."""
  53. def __init__(self):
  54. self._encoders = {}
  55. self._decoders = {}
  56. self._default_encode = None
  57. self._default_content_type = None
  58. self._default_content_encoding = None
  59. self._disabled_content_types = set()
  60. self.type_to_name = {}
  61. self.name_to_type = {}
  62. def register(self, name, encoder, decoder, content_type,
  63. content_encoding='utf-8'):
  64. if encoder:
  65. self._encoders[name] = codec(
  66. content_type, content_encoding, encoder,
  67. )
  68. if decoder:
  69. self._decoders[content_type] = decoder
  70. self.type_to_name[content_type] = name
  71. self.name_to_type[name] = content_type
  72. def enable(self, name):
  73. if '/' not in name:
  74. name = self.name_to_type[name]
  75. self._disabled_content_types.discard(name)
  76. def disable(self, name):
  77. if '/' not in name:
  78. name = self.name_to_type[name]
  79. self._disabled_content_types.add(name)
  80. def unregister(self, name):
  81. try:
  82. content_type = self.name_to_type[name]
  83. self._decoders.pop(content_type, None)
  84. self._encoders.pop(name, None)
  85. self.type_to_name.pop(content_type, None)
  86. self.name_to_type.pop(name, None)
  87. except KeyError:
  88. raise SerializerNotInstalled(
  89. 'No encoder/decoder installed for {0}'.format(name))
  90. def _set_default_serializer(self, name):
  91. """
  92. Set the default serialization method used by this library.
  93. :param name: The name of the registered serialization method.
  94. For example, `json` (default), `pickle`, `yaml`, `msgpack`,
  95. or any custom methods registered using :meth:`register`.
  96. :raises SerializerNotInstalled: If the serialization method
  97. requested is not available.
  98. """
  99. try:
  100. (self._default_content_type, self._default_content_encoding,
  101. self._default_encode) = self._encoders[name]
  102. except KeyError:
  103. raise SerializerNotInstalled(
  104. 'No encoder installed for {0}'.format(name))
  105. def dumps(self, data, serializer=None):
  106. if serializer == 'raw':
  107. return raw_encode(data)
  108. if serializer and not self._encoders.get(serializer):
  109. raise SerializerNotInstalled(
  110. 'No encoder installed for {0}'.format(serializer))
  111. # If a raw string was sent, assume binary encoding
  112. # (it's likely either ASCII or a raw binary file, and a character
  113. # set of 'binary' will encompass both, even if not ideal.
  114. if not serializer and isinstance(data, bytes_t):
  115. # In Python 3+, this would be "bytes"; allow binary data to be
  116. # sent as a message without getting encoder errors
  117. return 'application/data', 'binary', data
  118. # For Unicode objects, force it into a string
  119. if not serializer and isinstance(data, text_t):
  120. with _reraise_errors(EncodeError, exclude=()):
  121. payload = data.encode('utf-8')
  122. return 'text/plain', 'utf-8', payload
  123. if serializer:
  124. content_type, content_encoding, encoder = \
  125. self._encoders[serializer]
  126. else:
  127. encoder = self._default_encode
  128. content_type = self._default_content_type
  129. content_encoding = self._default_content_encoding
  130. with _reraise_errors(EncodeError):
  131. payload = encoder(data)
  132. return content_type, content_encoding, payload
  133. encode = dumps # XXX compat
  134. def loads(self, data, content_type, content_encoding,
  135. accept=None, force=False, _trusted_content=TRUSTED_CONTENT):
  136. content_type = content_type or 'application/data'
  137. if accept is not None:
  138. if content_type not in _trusted_content \
  139. and content_type not in accept:
  140. raise self._for_untrusted_content(content_type, 'untrusted')
  141. else:
  142. if content_type in self._disabled_content_types and not force:
  143. raise self._for_untrusted_content(content_type, 'disabled')
  144. content_encoding = (content_encoding or 'utf-8').lower()
  145. if data:
  146. decode = self._decoders.get(content_type)
  147. if decode:
  148. with _reraise_errors(DecodeError):
  149. return decode(data)
  150. if content_encoding not in SKIP_DECODE and \
  151. not isinstance(data, text_t):
  152. with _reraise_errors(DecodeError):
  153. return _decode(data, content_encoding)
  154. return data
  155. decode = loads # XXX compat
  156. def _for_untrusted_content(self, ctype, why):
  157. return ContentDisallowed(
  158. 'Refusing to deserialize {0} content of type {1}'.format(
  159. why,
  160. parenthesize_alias(self.type_to_name.get(ctype, ctype), ctype),
  161. ),
  162. )
  163. #: Global registry of serializers/deserializers.
  164. registry = SerializerRegistry()
  165. """
  166. .. function:: dumps(data, serializer=default_serializer)
  167. Serialize a data structure into a string suitable for sending
  168. as an AMQP message body.
  169. :param data: The message data to send. Can be a list,
  170. dictionary or a string.
  171. :keyword serializer: An optional string representing
  172. the serialization method you want the data marshalled
  173. into. (For example, `json`, `raw`, or `pickle`).
  174. If :const:`None` (default), then json will be used, unless
  175. `data` is a :class:`str` or :class:`unicode` object. In this
  176. latter case, no serialization occurs as it would be
  177. unnecessary.
  178. Note that if `serializer` is specified, then that
  179. serialization method will be used even if a :class:`str`
  180. or :class:`unicode` object is passed in.
  181. :returns: A three-item tuple containing the content type
  182. (e.g., `application/json`), content encoding, (e.g.,
  183. `utf-8`) and a string containing the serialized
  184. data.
  185. :raises SerializerNotInstalled: If the serialization method
  186. requested is not available.
  187. """
  188. dumps = encode = registry.encode # XXX encode is a compat alias
  189. """
  190. .. function:: loads(data, content_type, content_encoding):
  191. Deserialize a data stream as serialized using `dumps`
  192. based on `content_type`.
  193. :param data: The message data to deserialize.
  194. :param content_type: The content-type of the data.
  195. (e.g., `application/json`).
  196. :param content_encoding: The content-encoding of the data.
  197. (e.g., `utf-8`, `binary`, or `us-ascii`).
  198. :returns: The unserialized data.
  199. """
  200. loads = decode = registry.decode # XXX decode is a compat alias
  201. """
  202. .. function:: register(name, encoder, decoder, content_type,
  203. content_encoding='utf-8'):
  204. Register a new encoder/decoder.
  205. :param name: A convenience name for the serialization method.
  206. :param encoder: A method that will be passed a python data structure
  207. and should return a string representing the serialized data.
  208. If :const:`None`, then only a decoder will be registered. Encoding
  209. will not be possible.
  210. :param decoder: A method that will be passed a string representing
  211. serialized data and should return a python data structure.
  212. If :const:`None`, then only an encoder will be registered.
  213. Decoding will not be possible.
  214. :param content_type: The mime-type describing the serialized
  215. structure.
  216. :param content_encoding: The content encoding (character set) that
  217. the `decoder` method will be returning. Will usually be
  218. `utf-8`, `us-ascii`, or `binary`.
  219. """
  220. register = registry.register
  221. """
  222. .. function:: unregister(name):
  223. Unregister registered encoder/decoder.
  224. :param name: Registered serialization method name.
  225. """
  226. unregister = registry.unregister
  227. def raw_encode(data):
  228. """Special case serializer."""
  229. content_type = 'application/data'
  230. payload = data
  231. if isinstance(payload, text_t):
  232. content_encoding = 'utf-8'
  233. with _reraise_errors(EncodeError, exclude=()):
  234. payload = payload.encode(content_encoding)
  235. else:
  236. content_encoding = 'binary'
  237. return content_type, content_encoding, payload
  238. def register_json():
  239. """Register a encoder/decoder for JSON serialization."""
  240. from anyjson import loads as json_loads, dumps as json_dumps
  241. def _loads(obj):
  242. if isinstance(obj, bytes_t):
  243. obj = obj.decode('utf-8')
  244. return json_loads(obj)
  245. registry.register('json', json_dumps, _loads,
  246. content_type='application/json',
  247. content_encoding='utf-8')
  248. def register_yaml():
  249. """Register a encoder/decoder for YAML serialization.
  250. It is slower than JSON, but allows for more data types
  251. to be serialized. Useful if you need to send data such as dates"""
  252. try:
  253. import yaml
  254. registry.register('yaml', yaml.safe_dump, yaml.safe_load,
  255. content_type='application/x-yaml',
  256. content_encoding='utf-8')
  257. except ImportError:
  258. def not_available(*args, **kwargs):
  259. """In case a client receives a yaml message, but yaml
  260. isn't installed."""
  261. raise SerializerNotInstalled(
  262. 'No decoder installed for YAML. Install the PyYAML library')
  263. registry.register('yaml', None, not_available, 'application/x-yaml')
  264. if sys.version_info[0] == 3: # pragma: no cover
  265. def unpickle(s):
  266. return pickle_loads(str_to_bytes(s))
  267. else:
  268. unpickle = pickle_loads # noqa
  269. def register_pickle():
  270. """The fastest serialization method, but restricts
  271. you to python clients."""
  272. def pickle_dumps(obj, dumper=pickle.dumps):
  273. return dumper(obj, protocol=pickle_protocol)
  274. registry.register('pickle', pickle_dumps, unpickle,
  275. content_type='application/x-python-serialize',
  276. content_encoding='binary')
  277. def register_msgpack():
  278. """See http://msgpack.sourceforge.net/"""
  279. pack = unpack = None
  280. try:
  281. import msgpack
  282. if msgpack.version >= (0, 4):
  283. from msgpack import packb, unpackb
  284. def pack(s):
  285. return packb(s, use_bin_type=True)
  286. def unpack(s):
  287. return unpackb(s, encoding='utf-8')
  288. else:
  289. def version_mismatch(*args, **kwargs):
  290. raise SerializerNotInstalled(
  291. 'msgpack requires msgpack-python >= 0.4.0')
  292. pack = unpack = version_mismatch
  293. except (ImportError, ValueError):
  294. def not_available(*args, **kwargs):
  295. raise SerializerNotInstalled(
  296. 'No decoder installed for msgpack. '
  297. 'Please install the msgpack-python library')
  298. pack = unpack = not_available
  299. registry.register(
  300. 'msgpack', pack, unpack,
  301. content_type='application/x-msgpack',
  302. content_encoding='binary',
  303. )
  304. # Register the base serialization methods.
  305. register_json()
  306. register_pickle()
  307. register_yaml()
  308. register_msgpack()
  309. # Default serializer is 'json'
  310. registry._set_default_serializer('json')
  311. _setupfuns = {
  312. 'json': register_json,
  313. 'pickle': register_pickle,
  314. 'yaml': register_yaml,
  315. 'msgpack': register_msgpack,
  316. 'application/json': register_json,
  317. 'application/x-yaml': register_yaml,
  318. 'application/x-python-serialize': register_pickle,
  319. 'application/x-msgpack': register_msgpack,
  320. }
  321. def enable_insecure_serializers(choices=['pickle', 'yaml', 'msgpack']):
  322. """Enable serializers that are considered to be unsafe.
  323. Will enable ``pickle``, ``yaml`` and ``msgpack`` by default,
  324. but you can also specify a list of serializers (by name or content type)
  325. to enable.
  326. """
  327. for choice in choices:
  328. try:
  329. registry.enable(choice)
  330. except KeyError:
  331. pass
  332. def disable_insecure_serializers(allowed=['json']):
  333. """Disable untrusted serializers.
  334. Will disable all serializers except ``json``
  335. or you can specify a list of deserializers to allow.
  336. .. note::
  337. Producers will still be able to serialize data
  338. in these formats, but consumers will not accept
  339. incoming data using the untrusted content types.
  340. """
  341. for name in registry._decoders:
  342. registry.disable(name)
  343. if allowed is not None:
  344. for name in allowed:
  345. registry.enable(name)
  346. # Insecure serializers are disabled by default since v3.0
  347. disable_insecure_serializers()
  348. # Load entrypoints from installed extensions
  349. for ep, args in entrypoints('kombu.serializers'): # pragma: no cover
  350. register(ep.name, *args)
  351. def prepare_accept_content(l, name_to_type=registry.name_to_type):
  352. if l is not None:
  353. return set(n if '/' in n else name_to_type[n] for n in l)
  354. return l