Development of an internal social media platform with personalised dashboards for students
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

xml_serializer.py 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. """
  2. XML serializer.
  3. """
  4. from xml.dom import pulldom
  5. from xml.sax import handler
  6. from xml.sax.expatreader import ExpatParser as _ExpatParser
  7. from django.apps import apps
  8. from django.conf import settings
  9. from django.core.serializers import base
  10. from django.db import DEFAULT_DB_ALIAS, models
  11. from django.utils.xmlutils import (
  12. SimplerXMLGenerator, UnserializableContentError,
  13. )
  14. class Serializer(base.Serializer):
  15. """Serialize a QuerySet to XML."""
  16. def indent(self, level):
  17. if self.options.get('indent') is not None:
  18. self.xml.ignorableWhitespace('\n' + ' ' * self.options.get('indent') * level)
  19. def start_serialization(self):
  20. """
  21. Start serialization -- open the XML document and the root element.
  22. """
  23. self.xml = SimplerXMLGenerator(self.stream, self.options.get("encoding", settings.DEFAULT_CHARSET))
  24. self.xml.startDocument()
  25. self.xml.startElement("django-objects", {"version": "1.0"})
  26. def end_serialization(self):
  27. """
  28. End serialization -- end the document.
  29. """
  30. self.indent(0)
  31. self.xml.endElement("django-objects")
  32. self.xml.endDocument()
  33. def start_object(self, obj):
  34. """
  35. Called as each object is handled.
  36. """
  37. if not hasattr(obj, "_meta"):
  38. raise base.SerializationError("Non-model object (%s) encountered during serialization" % type(obj))
  39. self.indent(1)
  40. attrs = {'model': str(obj._meta)}
  41. if not self.use_natural_primary_keys or not hasattr(obj, 'natural_key'):
  42. obj_pk = obj.pk
  43. if obj_pk is not None:
  44. attrs['pk'] = str(obj_pk)
  45. self.xml.startElement("object", attrs)
  46. def end_object(self, obj):
  47. """
  48. Called after handling all fields for an object.
  49. """
  50. self.indent(1)
  51. self.xml.endElement("object")
  52. def handle_field(self, obj, field):
  53. """
  54. Handle each field on an object (except for ForeignKeys and
  55. ManyToManyFields).
  56. """
  57. self.indent(2)
  58. self.xml.startElement('field', {
  59. 'name': field.name,
  60. 'type': field.get_internal_type(),
  61. })
  62. # Get a "string version" of the object's data.
  63. if getattr(obj, field.name) is not None:
  64. try:
  65. self.xml.characters(field.value_to_string(obj))
  66. except UnserializableContentError:
  67. raise ValueError("%s.%s (pk:%s) contains unserializable characters" % (
  68. obj.__class__.__name__, field.name, obj.pk))
  69. else:
  70. self.xml.addQuickElement("None")
  71. self.xml.endElement("field")
  72. def handle_fk_field(self, obj, field):
  73. """
  74. Handle a ForeignKey (they need to be treated slightly
  75. differently from regular fields).
  76. """
  77. self._start_relational_field(field)
  78. related_att = getattr(obj, field.get_attname())
  79. if related_att is not None:
  80. if self.use_natural_foreign_keys and hasattr(field.remote_field.model, 'natural_key'):
  81. related = getattr(obj, field.name)
  82. # If related object has a natural key, use it
  83. related = related.natural_key()
  84. # Iterable natural keys are rolled out as subelements
  85. for key_value in related:
  86. self.xml.startElement("natural", {})
  87. self.xml.characters(str(key_value))
  88. self.xml.endElement("natural")
  89. else:
  90. self.xml.characters(str(related_att))
  91. else:
  92. self.xml.addQuickElement("None")
  93. self.xml.endElement("field")
  94. def handle_m2m_field(self, obj, field):
  95. """
  96. Handle a ManyToManyField. Related objects are only serialized as
  97. references to the object's PK (i.e. the related *data* is not dumped,
  98. just the relation).
  99. """
  100. if field.remote_field.through._meta.auto_created:
  101. self._start_relational_field(field)
  102. if self.use_natural_foreign_keys and hasattr(field.remote_field.model, 'natural_key'):
  103. # If the objects in the m2m have a natural key, use it
  104. def handle_m2m(value):
  105. natural = value.natural_key()
  106. # Iterable natural keys are rolled out as subelements
  107. self.xml.startElement("object", {})
  108. for key_value in natural:
  109. self.xml.startElement("natural", {})
  110. self.xml.characters(str(key_value))
  111. self.xml.endElement("natural")
  112. self.xml.endElement("object")
  113. else:
  114. def handle_m2m(value):
  115. self.xml.addQuickElement("object", attrs={
  116. 'pk': str(value.pk)
  117. })
  118. for relobj in getattr(obj, field.name).iterator():
  119. handle_m2m(relobj)
  120. self.xml.endElement("field")
  121. def _start_relational_field(self, field):
  122. """Output the <field> element for relational fields."""
  123. self.indent(2)
  124. self.xml.startElement('field', {
  125. 'name': field.name,
  126. 'rel': field.remote_field.__class__.__name__,
  127. 'to': str(field.remote_field.model._meta),
  128. })
  129. class Deserializer(base.Deserializer):
  130. """Deserialize XML."""
  131. def __init__(self, stream_or_string, *, using=DEFAULT_DB_ALIAS, ignorenonexistent=False, **options):
  132. super().__init__(stream_or_string, **options)
  133. self.event_stream = pulldom.parse(self.stream, self._make_parser())
  134. self.db = using
  135. self.ignore = ignorenonexistent
  136. def _make_parser(self):
  137. """Create a hardened XML parser (no custom/external entities)."""
  138. return DefusedExpatParser()
  139. def __next__(self):
  140. for event, node in self.event_stream:
  141. if event == "START_ELEMENT" and node.nodeName == "object":
  142. self.event_stream.expandNode(node)
  143. return self._handle_object(node)
  144. raise StopIteration
  145. def _handle_object(self, node):
  146. """Convert an <object> node to a DeserializedObject."""
  147. # Look up the model using the model loading mechanism. If this fails,
  148. # bail.
  149. Model = self._get_model_from_node(node, "model")
  150. # Start building a data dictionary from the object.
  151. data = {}
  152. if node.hasAttribute('pk'):
  153. data[Model._meta.pk.attname] = Model._meta.pk.to_python(
  154. node.getAttribute('pk'))
  155. # Also start building a dict of m2m data (this is saved as
  156. # {m2m_accessor_attribute : [list_of_related_objects]})
  157. m2m_data = {}
  158. field_names = {f.name for f in Model._meta.get_fields()}
  159. # Deserialize each field.
  160. for field_node in node.getElementsByTagName("field"):
  161. # If the field is missing the name attribute, bail (are you
  162. # sensing a pattern here?)
  163. field_name = field_node.getAttribute("name")
  164. if not field_name:
  165. raise base.DeserializationError("<field> node is missing the 'name' attribute")
  166. # Get the field from the Model. This will raise a
  167. # FieldDoesNotExist if, well, the field doesn't exist, which will
  168. # be propagated correctly unless ignorenonexistent=True is used.
  169. if self.ignore and field_name not in field_names:
  170. continue
  171. field = Model._meta.get_field(field_name)
  172. # As is usually the case, relation fields get the special treatment.
  173. if field.remote_field and isinstance(field.remote_field, models.ManyToManyRel):
  174. m2m_data[field.name] = self._handle_m2m_field_node(field_node, field)
  175. elif field.remote_field and isinstance(field.remote_field, models.ManyToOneRel):
  176. data[field.attname] = self._handle_fk_field_node(field_node, field)
  177. else:
  178. if field_node.getElementsByTagName('None'):
  179. value = None
  180. else:
  181. value = field.to_python(getInnerText(field_node).strip())
  182. data[field.name] = value
  183. obj = base.build_instance(Model, data, self.db)
  184. # Return a DeserializedObject so that the m2m data has a place to live.
  185. return base.DeserializedObject(obj, m2m_data)
  186. def _handle_fk_field_node(self, node, field):
  187. """
  188. Handle a <field> node for a ForeignKey
  189. """
  190. # Check if there is a child node named 'None', returning None if so.
  191. if node.getElementsByTagName('None'):
  192. return None
  193. else:
  194. model = field.remote_field.model
  195. if hasattr(model._default_manager, 'get_by_natural_key'):
  196. keys = node.getElementsByTagName('natural')
  197. if keys:
  198. # If there are 'natural' subelements, it must be a natural key
  199. field_value = [getInnerText(k).strip() for k in keys]
  200. obj = model._default_manager.db_manager(self.db).get_by_natural_key(*field_value)
  201. obj_pk = getattr(obj, field.remote_field.field_name)
  202. # If this is a natural foreign key to an object that
  203. # has a FK/O2O as the foreign key, use the FK value
  204. if field.remote_field.model._meta.pk.remote_field:
  205. obj_pk = obj_pk.pk
  206. else:
  207. # Otherwise, treat like a normal PK
  208. field_value = getInnerText(node).strip()
  209. obj_pk = model._meta.get_field(field.remote_field.field_name).to_python(field_value)
  210. return obj_pk
  211. else:
  212. field_value = getInnerText(node).strip()
  213. return model._meta.get_field(field.remote_field.field_name).to_python(field_value)
  214. def _handle_m2m_field_node(self, node, field):
  215. """
  216. Handle a <field> node for a ManyToManyField.
  217. """
  218. model = field.remote_field.model
  219. default_manager = model._default_manager
  220. if hasattr(default_manager, 'get_by_natural_key'):
  221. def m2m_convert(n):
  222. keys = n.getElementsByTagName('natural')
  223. if keys:
  224. # If there are 'natural' subelements, it must be a natural key
  225. field_value = [getInnerText(k).strip() for k in keys]
  226. obj_pk = default_manager.db_manager(self.db).get_by_natural_key(*field_value).pk
  227. else:
  228. # Otherwise, treat like a normal PK value.
  229. obj_pk = model._meta.pk.to_python(n.getAttribute('pk'))
  230. return obj_pk
  231. else:
  232. def m2m_convert(n):
  233. return model._meta.pk.to_python(n.getAttribute('pk'))
  234. return [m2m_convert(c) for c in node.getElementsByTagName("object")]
  235. def _get_model_from_node(self, node, attr):
  236. """
  237. Look up a model from a <object model=...> or a <field rel=... to=...>
  238. node.
  239. """
  240. model_identifier = node.getAttribute(attr)
  241. if not model_identifier:
  242. raise base.DeserializationError(
  243. "<%s> node is missing the required '%s' attribute"
  244. % (node.nodeName, attr))
  245. try:
  246. return apps.get_model(model_identifier)
  247. except (LookupError, TypeError):
  248. raise base.DeserializationError(
  249. "<%s> node has invalid model identifier: '%s'"
  250. % (node.nodeName, model_identifier))
  251. def getInnerText(node):
  252. """Get all the inner text of a DOM node (recursively)."""
  253. # inspired by http://mail.python.org/pipermail/xml-sig/2005-March/011022.html
  254. inner_text = []
  255. for child in node.childNodes:
  256. if child.nodeType == child.TEXT_NODE or child.nodeType == child.CDATA_SECTION_NODE:
  257. inner_text.append(child.data)
  258. elif child.nodeType == child.ELEMENT_NODE:
  259. inner_text.extend(getInnerText(child))
  260. else:
  261. pass
  262. return "".join(inner_text)
  263. # Below code based on Christian Heimes' defusedxml
  264. class DefusedExpatParser(_ExpatParser):
  265. """
  266. An expat parser hardened against XML bomb attacks.
  267. Forbid DTDs, external entity references
  268. """
  269. def __init__(self, *args, **kwargs):
  270. super().__init__(*args, **kwargs)
  271. self.setFeature(handler.feature_external_ges, False)
  272. self.setFeature(handler.feature_external_pes, False)
  273. def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
  274. raise DTDForbidden(name, sysid, pubid)
  275. def entity_decl(self, name, is_parameter_entity, value, base,
  276. sysid, pubid, notation_name):
  277. raise EntitiesForbidden(name, value, base, sysid, pubid, notation_name)
  278. def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
  279. # expat 1.2
  280. raise EntitiesForbidden(name, None, base, sysid, pubid, notation_name)
  281. def external_entity_ref_handler(self, context, base, sysid, pubid):
  282. raise ExternalReferenceForbidden(context, base, sysid, pubid)
  283. def reset(self):
  284. _ExpatParser.reset(self)
  285. parser = self._parser
  286. parser.StartDoctypeDeclHandler = self.start_doctype_decl
  287. parser.EntityDeclHandler = self.entity_decl
  288. parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
  289. parser.ExternalEntityRefHandler = self.external_entity_ref_handler
  290. class DefusedXmlException(ValueError):
  291. """Base exception."""
  292. def __repr__(self):
  293. return str(self)
  294. class DTDForbidden(DefusedXmlException):
  295. """Document type definition is forbidden."""
  296. def __init__(self, name, sysid, pubid):
  297. super().__init__()
  298. self.name = name
  299. self.sysid = sysid
  300. self.pubid = pubid
  301. def __str__(self):
  302. tpl = "DTDForbidden(name='{}', system_id={!r}, public_id={!r})"
  303. return tpl.format(self.name, self.sysid, self.pubid)
  304. class EntitiesForbidden(DefusedXmlException):
  305. """Entity definition is forbidden."""
  306. def __init__(self, name, value, base, sysid, pubid, notation_name):
  307. super().__init__()
  308. self.name = name
  309. self.value = value
  310. self.base = base
  311. self.sysid = sysid
  312. self.pubid = pubid
  313. self.notation_name = notation_name
  314. def __str__(self):
  315. tpl = "EntitiesForbidden(name='{}', system_id={!r}, public_id={!r})"
  316. return tpl.format(self.name, self.sysid, self.pubid)
  317. class ExternalReferenceForbidden(DefusedXmlException):
  318. """Resolving an external reference is forbidden."""
  319. def __init__(self, context, base, sysid, pubid):
  320. super().__init__()
  321. self.context = context
  322. self.base = base
  323. self.sysid = sysid
  324. self.pubid = pubid
  325. def __str__(self):
  326. tpl = "ExternalReferenceForbidden(system_id='{}', public_id={})"
  327. return tpl.format(self.sysid, self.pubid)