123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508 |
- from collections import Counter, defaultdict
- from functools import partial
- from itertools import chain
- from operator import attrgetter
-
- from django.db import IntegrityError, connections, transaction
- from django.db.models import query_utils, signals, sql
-
-
- class ProtectedError(IntegrityError):
- def __init__(self, msg, protected_objects):
- self.protected_objects = protected_objects
- super().__init__(msg, protected_objects)
-
-
- class RestrictedError(IntegrityError):
- def __init__(self, msg, restricted_objects):
- self.restricted_objects = restricted_objects
- super().__init__(msg, restricted_objects)
-
-
- def CASCADE(collector, field, sub_objs, using):
- collector.collect(
- sub_objs,
- source=field.remote_field.model,
- source_attr=field.name,
- nullable=field.null,
- fail_on_restricted=False,
- )
- if field.null and not connections[using].features.can_defer_constraint_checks:
- collector.add_field_update(field, None, sub_objs)
-
-
- def PROTECT(collector, field, sub_objs, using):
- raise ProtectedError(
- "Cannot delete some instances of model '%s' because they are "
- "referenced through a protected foreign key: '%s.%s'"
- % (
- field.remote_field.model.__name__,
- sub_objs[0].__class__.__name__,
- field.name,
- ),
- sub_objs,
- )
-
-
- def RESTRICT(collector, field, sub_objs, using):
- collector.add_restricted_objects(field, sub_objs)
- collector.add_dependency(field.remote_field.model, field.model)
-
-
- def SET(value):
- if callable(value):
-
- def set_on_delete(collector, field, sub_objs, using):
- collector.add_field_update(field, value(), sub_objs)
-
- else:
-
- def set_on_delete(collector, field, sub_objs, using):
- collector.add_field_update(field, value, sub_objs)
-
- set_on_delete.deconstruct = lambda: ("django.db.models.SET", (value,), {})
- return set_on_delete
-
-
- def SET_NULL(collector, field, sub_objs, using):
- collector.add_field_update(field, None, sub_objs)
-
-
- def SET_DEFAULT(collector, field, sub_objs, using):
- collector.add_field_update(field, field.get_default(), sub_objs)
-
-
- def DO_NOTHING(collector, field, sub_objs, using):
- pass
-
-
- def get_candidate_relations_to_delete(opts):
- # The candidate relations are the ones that come from N-1 and 1-1 relations.
- # N-N (i.e., many-to-many) relations aren't candidates for deletion.
- return (
- f
- for f in opts.get_fields(include_hidden=True)
- if f.auto_created and not f.concrete and (f.one_to_one or f.one_to_many)
- )
-
-
- class Collector:
- def __init__(self, using, origin=None):
- self.using = using
- # A Model or QuerySet object.
- self.origin = origin
- # Initially, {model: {instances}}, later values become lists.
- self.data = defaultdict(set)
- # {model: {(field, value): {instances}}}
- self.field_updates = defaultdict(partial(defaultdict, set))
- # {model: {field: {instances}}}
- self.restricted_objects = defaultdict(partial(defaultdict, set))
- # fast_deletes is a list of queryset-likes that can be deleted without
- # fetching the objects into memory.
- self.fast_deletes = []
-
- # Tracks deletion-order dependency for databases without transactions
- # or ability to defer constraint checks. Only concrete model classes
- # should be included, as the dependencies exist only between actual
- # database tables; proxy models are represented here by their concrete
- # parent.
- self.dependencies = defaultdict(set) # {model: {models}}
-
- def add(self, objs, source=None, nullable=False, reverse_dependency=False):
- """
- Add 'objs' to the collection of objects to be deleted. If the call is
- the result of a cascade, 'source' should be the model that caused it,
- and 'nullable' should be set to True if the relation can be null.
-
- Return a list of all objects that were not already collected.
- """
- if not objs:
- return []
- new_objs = []
- model = objs[0].__class__
- instances = self.data[model]
- for obj in objs:
- if obj not in instances:
- new_objs.append(obj)
- instances.update(new_objs)
- # Nullable relationships can be ignored -- they are nulled out before
- # deleting, and therefore do not affect the order in which objects have
- # to be deleted.
- if source is not None and not nullable:
- self.add_dependency(source, model, reverse_dependency=reverse_dependency)
- return new_objs
-
- def add_dependency(self, model, dependency, reverse_dependency=False):
- if reverse_dependency:
- model, dependency = dependency, model
- self.dependencies[model._meta.concrete_model].add(
- dependency._meta.concrete_model
- )
- self.data.setdefault(dependency, self.data.default_factory())
-
- def add_field_update(self, field, value, objs):
- """
- Schedule a field update. 'objs' must be a homogeneous iterable
- collection of model instances (e.g. a QuerySet).
- """
- if not objs:
- return
- model = objs[0].__class__
- self.field_updates[model][field, value].update(objs)
-
- def add_restricted_objects(self, field, objs):
- if objs:
- model = objs[0].__class__
- self.restricted_objects[model][field].update(objs)
-
- def clear_restricted_objects_from_set(self, model, objs):
- if model in self.restricted_objects:
- self.restricted_objects[model] = {
- field: items - objs
- for field, items in self.restricted_objects[model].items()
- }
-
- def clear_restricted_objects_from_queryset(self, model, qs):
- if model in self.restricted_objects:
- objs = set(
- qs.filter(
- pk__in=[
- obj.pk
- for objs in self.restricted_objects[model].values()
- for obj in objs
- ]
- )
- )
- self.clear_restricted_objects_from_set(model, objs)
-
- def _has_signal_listeners(self, model):
- return signals.pre_delete.has_listeners(
- model
- ) or signals.post_delete.has_listeners(model)
-
- def can_fast_delete(self, objs, from_field=None):
- """
- Determine if the objects in the given queryset-like or single object
- can be fast-deleted. This can be done if there are no cascades, no
- parents and no signal listeners for the object class.
-
- The 'from_field' tells where we are coming from - we need this to
- determine if the objects are in fact to be deleted. Allow also
- skipping parent -> child -> parent chain preventing fast delete of
- the child.
- """
- if from_field and from_field.remote_field.on_delete is not CASCADE:
- return False
- if hasattr(objs, "_meta"):
- model = objs._meta.model
- elif hasattr(objs, "model") and hasattr(objs, "_raw_delete"):
- model = objs.model
- else:
- return False
- if self._has_signal_listeners(model):
- return False
- # The use of from_field comes from the need to avoid cascade back to
- # parent when parent delete is cascading to child.
- opts = model._meta
- return (
- all(
- link == from_field
- for link in opts.concrete_model._meta.parents.values()
- )
- and
- # Foreign keys pointing to this model.
- all(
- related.field.remote_field.on_delete is DO_NOTHING
- for related in get_candidate_relations_to_delete(opts)
- )
- and (
- # Something like generic foreign key.
- not any(
- hasattr(field, "bulk_related_objects")
- for field in opts.private_fields
- )
- )
- )
-
- def get_del_batches(self, objs, fields):
- """
- Return the objs in suitably sized batches for the used connection.
- """
- field_names = [field.name for field in fields]
- conn_batch_size = max(
- connections[self.using].ops.bulk_batch_size(field_names, objs), 1
- )
- if len(objs) > conn_batch_size:
- return [
- objs[i : i + conn_batch_size]
- for i in range(0, len(objs), conn_batch_size)
- ]
- else:
- return [objs]
-
- def collect(
- self,
- objs,
- source=None,
- nullable=False,
- collect_related=True,
- source_attr=None,
- reverse_dependency=False,
- keep_parents=False,
- fail_on_restricted=True,
- ):
- """
- Add 'objs' to the collection of objects to be deleted as well as all
- parent instances. 'objs' must be a homogeneous iterable collection of
- model instances (e.g. a QuerySet). If 'collect_related' is True,
- related objects will be handled by their respective on_delete handler.
-
- If the call is the result of a cascade, 'source' should be the model
- that caused it and 'nullable' should be set to True, if the relation
- can be null.
-
- If 'reverse_dependency' is True, 'source' will be deleted before the
- current model, rather than after. (Needed for cascading to parent
- models, the one case in which the cascade follows the forwards
- direction of an FK rather than the reverse direction.)
-
- If 'keep_parents' is True, data of parent model's will be not deleted.
-
- If 'fail_on_restricted' is False, error won't be raised even if it's
- prohibited to delete such objects due to RESTRICT, that defers
- restricted object checking in recursive calls where the top-level call
- may need to collect more objects to determine whether restricted ones
- can be deleted.
- """
- if self.can_fast_delete(objs):
- self.fast_deletes.append(objs)
- return
- new_objs = self.add(
- objs, source, nullable, reverse_dependency=reverse_dependency
- )
- if not new_objs:
- return
-
- model = new_objs[0].__class__
-
- if not keep_parents:
- # Recursively collect concrete model's parent models, but not their
- # related objects. These will be found by meta.get_fields()
- concrete_model = model._meta.concrete_model
- for ptr in concrete_model._meta.parents.values():
- if ptr:
- parent_objs = [getattr(obj, ptr.name) for obj in new_objs]
- self.collect(
- parent_objs,
- source=model,
- source_attr=ptr.remote_field.related_name,
- collect_related=False,
- reverse_dependency=True,
- fail_on_restricted=False,
- )
- if not collect_related:
- return
-
- if keep_parents:
- parents = set(model._meta.get_parent_list())
- model_fast_deletes = defaultdict(list)
- protected_objects = defaultdict(list)
- for related in get_candidate_relations_to_delete(model._meta):
- # Preserve parent reverse relationships if keep_parents=True.
- if keep_parents and related.model in parents:
- continue
- field = related.field
- if field.remote_field.on_delete == DO_NOTHING:
- continue
- related_model = related.related_model
- if self.can_fast_delete(related_model, from_field=field):
- model_fast_deletes[related_model].append(field)
- continue
- batches = self.get_del_batches(new_objs, [field])
- for batch in batches:
- sub_objs = self.related_objects(related_model, [field], batch)
- # Non-referenced fields can be deferred if no signal receivers
- # are connected for the related model as they'll never be
- # exposed to the user. Skip field deferring when some
- # relationships are select_related as interactions between both
- # features are hard to get right. This should only happen in
- # the rare cases where .related_objects is overridden anyway.
- if not (
- sub_objs.query.select_related
- or self._has_signal_listeners(related_model)
- ):
- referenced_fields = set(
- chain.from_iterable(
- (rf.attname for rf in rel.field.foreign_related_fields)
- for rel in get_candidate_relations_to_delete(
- related_model._meta
- )
- )
- )
- sub_objs = sub_objs.only(*tuple(referenced_fields))
- if sub_objs:
- try:
- field.remote_field.on_delete(self, field, sub_objs, self.using)
- except ProtectedError as error:
- key = "'%s.%s'" % (field.model.__name__, field.name)
- protected_objects[key] += error.protected_objects
- if protected_objects:
- raise ProtectedError(
- "Cannot delete some instances of model %r because they are "
- "referenced through protected foreign keys: %s."
- % (
- model.__name__,
- ", ".join(protected_objects),
- ),
- set(chain.from_iterable(protected_objects.values())),
- )
- for related_model, related_fields in model_fast_deletes.items():
- batches = self.get_del_batches(new_objs, related_fields)
- for batch in batches:
- sub_objs = self.related_objects(related_model, related_fields, batch)
- self.fast_deletes.append(sub_objs)
- for field in model._meta.private_fields:
- if hasattr(field, "bulk_related_objects"):
- # It's something like generic foreign key.
- sub_objs = field.bulk_related_objects(new_objs, self.using)
- self.collect(
- sub_objs, source=model, nullable=True, fail_on_restricted=False
- )
-
- if fail_on_restricted:
- # Raise an error if collected restricted objects (RESTRICT) aren't
- # candidates for deletion also collected via CASCADE.
- for related_model, instances in self.data.items():
- self.clear_restricted_objects_from_set(related_model, instances)
- for qs in self.fast_deletes:
- self.clear_restricted_objects_from_queryset(qs.model, qs)
- if self.restricted_objects.values():
- restricted_objects = defaultdict(list)
- for related_model, fields in self.restricted_objects.items():
- for field, objs in fields.items():
- if objs:
- key = "'%s.%s'" % (related_model.__name__, field.name)
- restricted_objects[key] += objs
- if restricted_objects:
- raise RestrictedError(
- "Cannot delete some instances of model %r because "
- "they are referenced through restricted foreign keys: "
- "%s."
- % (
- model.__name__,
- ", ".join(restricted_objects),
- ),
- set(chain.from_iterable(restricted_objects.values())),
- )
-
- def related_objects(self, related_model, related_fields, objs):
- """
- Get a QuerySet of the related model to objs via related fields.
- """
- predicate = query_utils.Q(
- *((f"{related_field.name}__in", objs) for related_field in related_fields),
- _connector=query_utils.Q.OR,
- )
- return related_model._base_manager.using(self.using).filter(predicate)
-
- def instances_with_model(self):
- for model, instances in self.data.items():
- for obj in instances:
- yield model, obj
-
- def sort(self):
- sorted_models = []
- concrete_models = set()
- models = list(self.data)
- while len(sorted_models) < len(models):
- found = False
- for model in models:
- if model in sorted_models:
- continue
- dependencies = self.dependencies.get(model._meta.concrete_model)
- if not (dependencies and dependencies.difference(concrete_models)):
- sorted_models.append(model)
- concrete_models.add(model._meta.concrete_model)
- found = True
- if not found:
- return
- self.data = {model: self.data[model] for model in sorted_models}
-
- def delete(self):
- # sort instance collections
- for model, instances in self.data.items():
- self.data[model] = sorted(instances, key=attrgetter("pk"))
-
- # if possible, bring the models in an order suitable for databases that
- # don't support transactions or cannot defer constraint checks until the
- # end of a transaction.
- self.sort()
- # number of objects deleted for each model label
- deleted_counter = Counter()
-
- # Optimize for the case with a single obj and no dependencies
- if len(self.data) == 1 and len(instances) == 1:
- instance = list(instances)[0]
- if self.can_fast_delete(instance):
- with transaction.mark_for_rollback_on_error(self.using):
- count = sql.DeleteQuery(model).delete_batch(
- [instance.pk], self.using
- )
- setattr(instance, model._meta.pk.attname, None)
- return count, {model._meta.label: count}
-
- with transaction.atomic(using=self.using, savepoint=False):
- # send pre_delete signals
- for model, obj in self.instances_with_model():
- if not model._meta.auto_created:
- signals.pre_delete.send(
- sender=model,
- instance=obj,
- using=self.using,
- origin=self.origin,
- )
-
- # fast deletes
- for qs in self.fast_deletes:
- count = qs._raw_delete(using=self.using)
- if count:
- deleted_counter[qs.model._meta.label] += count
-
- # update fields
- for model, instances_for_fieldvalues in self.field_updates.items():
- for (field, value), instances in instances_for_fieldvalues.items():
- query = sql.UpdateQuery(model)
- query.update_batch(
- [obj.pk for obj in instances], {field.name: value}, self.using
- )
-
- # reverse instance collections
- for instances in self.data.values():
- instances.reverse()
-
- # delete instances
- for model, instances in self.data.items():
- query = sql.DeleteQuery(model)
- pk_list = [obj.pk for obj in instances]
- count = query.delete_batch(pk_list, self.using)
- if count:
- deleted_counter[model._meta.label] += count
-
- if not model._meta.auto_created:
- for obj in instances:
- signals.post_delete.send(
- sender=model,
- instance=obj,
- using=self.using,
- origin=self.origin,
- )
-
- # update collected instances
- for instances_for_fieldvalues in self.field_updates.values():
- for (field, value), instances in instances_for_fieldvalues.items():
- for obj in instances:
- setattr(obj, field.attname, value)
- for model, instances in self.data.items():
- for instance in instances:
- setattr(instance, model._meta.pk.attname, None)
- return sum(deleted_counter.values()), dict(deleted_counter)
|