Development of an internal social media platform with personalised dashboards for students
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

job.py 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595
  1. # -*- coding: utf-8 -*-
  2. """
  3. celery.worker.job
  4. ~~~~~~~~~~~~~~~~~
  5. This module defines the :class:`Request` class,
  6. which specifies how tasks are executed.
  7. """
  8. from __future__ import absolute_import, unicode_literals
  9. import logging
  10. import socket
  11. import sys
  12. from billiard.einfo import ExceptionInfo
  13. from datetime import datetime
  14. from weakref import ref
  15. from kombu.utils import kwdict, reprcall
  16. from kombu.utils.encoding import safe_repr, safe_str
  17. from celery import signals
  18. from celery.app.trace import trace_task, trace_task_ret
  19. from celery.exceptions import (
  20. Ignore, TaskRevokedError, InvalidTaskError,
  21. SoftTimeLimitExceeded, TimeLimitExceeded,
  22. WorkerLostError, Terminated, Retry, Reject,
  23. )
  24. from celery.five import items, monotonic, string, string_t
  25. from celery.platforms import signals as _signals
  26. from celery.utils import fun_takes_kwargs
  27. from celery.utils.functional import noop
  28. from celery.utils.log import get_logger
  29. from celery.utils.serialization import get_pickled_exception
  30. from celery.utils.text import truncate
  31. from celery.utils.timeutils import maybe_iso8601, timezone, maybe_make_aware
  32. from . import state
  33. __all__ = ['Request']
  34. IS_PYPY = hasattr(sys, 'pypy_version_info')
  35. logger = get_logger(__name__)
  36. debug, info, warn, error = (logger.debug, logger.info,
  37. logger.warning, logger.error)
  38. _does_info = False
  39. _does_debug = False
  40. #: Max length of result representation
  41. RESULT_MAXLEN = 128
  42. def __optimize__():
  43. # this is also called by celery.app.trace.setup_worker_optimizations
  44. global _does_debug
  45. global _does_info
  46. _does_debug = logger.isEnabledFor(logging.DEBUG)
  47. _does_info = logger.isEnabledFor(logging.INFO)
  48. __optimize__()
  49. # Localize
  50. tz_utc = timezone.utc
  51. tz_or_local = timezone.tz_or_local
  52. send_revoked = signals.task_revoked.send
  53. task_accepted = state.task_accepted
  54. task_ready = state.task_ready
  55. revoked_tasks = state.revoked
  56. NEEDS_KWDICT = sys.version_info <= (2, 6)
  57. #: Use when no message object passed to :class:`Request`.
  58. DEFAULT_FIELDS = {
  59. 'headers': None,
  60. 'reply_to': None,
  61. 'correlation_id': None,
  62. 'delivery_info': {
  63. 'exchange': None,
  64. 'routing_key': None,
  65. 'priority': 0,
  66. 'redelivered': False,
  67. },
  68. }
  69. class Request(object):
  70. """A request for task execution."""
  71. if not IS_PYPY: # pragma: no cover
  72. __slots__ = (
  73. 'app', 'name', 'id', 'args', 'kwargs', 'on_ack',
  74. 'hostname', 'eventer', 'connection_errors', 'task', 'eta',
  75. 'expires', 'request_dict', 'acknowledged', 'on_reject',
  76. 'utc', 'time_start', 'worker_pid', '_already_revoked',
  77. '_terminate_on_ack', '_apply_result',
  78. '_tzlocal', '__weakref__', '__dict__',
  79. )
  80. #: Format string used to log task success.
  81. success_msg = """\
  82. Task %(name)s[%(id)s] succeeded in %(runtime)ss: %(return_value)s
  83. """
  84. #: Format string used to log task failure.
  85. error_msg = """\
  86. Task %(name)s[%(id)s] %(description)s: %(exc)s
  87. """
  88. #: Format string used to log internal error.
  89. internal_error_msg = """\
  90. Task %(name)s[%(id)s] %(description)s: %(exc)s
  91. """
  92. ignored_msg = """\
  93. Task %(name)s[%(id)s] %(description)s
  94. """
  95. rejected_msg = """\
  96. Task %(name)s[%(id)s] %(exc)s
  97. """
  98. #: Format string used to log task retry.
  99. retry_msg = """Task %(name)s[%(id)s] retry: %(exc)s"""
  100. def __init__(self, body, on_ack=noop,
  101. hostname=None, eventer=None, app=None,
  102. connection_errors=None, request_dict=None,
  103. message=None, task=None, on_reject=noop, **opts):
  104. self.app = app
  105. name = self.name = body['task']
  106. self.id = body['id']
  107. self.args = body.get('args', [])
  108. self.kwargs = body.get('kwargs', {})
  109. try:
  110. self.kwargs.items
  111. except AttributeError:
  112. raise InvalidTaskError(
  113. 'Task keyword arguments is not a mapping')
  114. if NEEDS_KWDICT:
  115. self.kwargs = kwdict(self.kwargs)
  116. eta = body.get('eta')
  117. expires = body.get('expires')
  118. utc = self.utc = body.get('utc', False)
  119. self.on_ack = on_ack
  120. self.on_reject = on_reject
  121. self.hostname = hostname or socket.gethostname()
  122. self.eventer = eventer
  123. self.connection_errors = connection_errors or ()
  124. self.task = task or self.app.tasks[name]
  125. self.acknowledged = self._already_revoked = False
  126. self.time_start = self.worker_pid = self._terminate_on_ack = None
  127. self._apply_result = None
  128. self._tzlocal = None
  129. # timezone means the message is timezone-aware, and the only timezone
  130. # supported at this point is UTC.
  131. if eta is not None:
  132. try:
  133. self.eta = maybe_iso8601(eta)
  134. except (AttributeError, ValueError, TypeError) as exc:
  135. raise InvalidTaskError(
  136. 'invalid eta value {0!r}: {1}'.format(eta, exc))
  137. if utc:
  138. self.eta = maybe_make_aware(self.eta, self.tzlocal)
  139. else:
  140. self.eta = None
  141. if expires is not None:
  142. try:
  143. self.expires = maybe_iso8601(expires)
  144. except (AttributeError, ValueError, TypeError) as exc:
  145. raise InvalidTaskError(
  146. 'invalid expires value {0!r}: {1}'.format(expires, exc))
  147. if utc:
  148. self.expires = maybe_make_aware(self.expires, self.tzlocal)
  149. else:
  150. self.expires = None
  151. if message:
  152. delivery_info = message.delivery_info or {}
  153. properties = message.properties or {}
  154. body.update({
  155. 'headers': message.headers,
  156. 'reply_to': properties.get('reply_to'),
  157. 'correlation_id': properties.get('correlation_id'),
  158. 'delivery_info': {
  159. 'exchange': delivery_info.get('exchange'),
  160. 'routing_key': delivery_info.get('routing_key'),
  161. 'priority': properties.get(
  162. 'priority', delivery_info.get('priority')),
  163. 'redelivered': delivery_info.get('redelivered'),
  164. }
  165. })
  166. else:
  167. body.update(DEFAULT_FIELDS)
  168. self.request_dict = body
  169. @property
  170. def delivery_info(self):
  171. return self.request_dict['delivery_info']
  172. def extend_with_default_kwargs(self):
  173. """Extend the tasks keyword arguments with standard task arguments.
  174. Currently these are `logfile`, `loglevel`, `task_id`,
  175. `task_name`, `task_retries`, and `delivery_info`.
  176. See :meth:`celery.task.base.Task.run` for more information.
  177. Magic keyword arguments are deprecated and will be removed
  178. in version 4.0.
  179. """
  180. kwargs = dict(self.kwargs)
  181. default_kwargs = {'logfile': None, # deprecated
  182. 'loglevel': None, # deprecated
  183. 'task_id': self.id,
  184. 'task_name': self.name,
  185. 'task_retries': self.request_dict.get('retries', 0),
  186. 'task_is_eager': False,
  187. 'delivery_info': self.delivery_info}
  188. fun = self.task.run
  189. supported_keys = fun_takes_kwargs(fun, default_kwargs)
  190. extend_with = dict((key, val) for key, val in items(default_kwargs)
  191. if key in supported_keys)
  192. kwargs.update(extend_with)
  193. return kwargs
  194. def execute_using_pool(self, pool, **kwargs):
  195. """Used by the worker to send this task to the pool.
  196. :param pool: A :class:`celery.concurrency.base.TaskPool` instance.
  197. :raises celery.exceptions.TaskRevokedError: if the task was revoked
  198. and ignored.
  199. """
  200. uuid = self.id
  201. task = self.task
  202. if self.revoked():
  203. raise TaskRevokedError(uuid)
  204. hostname = self.hostname
  205. kwargs = self.kwargs
  206. if task.accept_magic_kwargs:
  207. kwargs = self.extend_with_default_kwargs()
  208. request = self.request_dict
  209. request.update({'hostname': hostname, 'is_eager': False,
  210. 'delivery_info': self.delivery_info,
  211. 'group': self.request_dict.get('taskset')})
  212. timeout, soft_timeout = request.get('timelimit', (None, None))
  213. timeout = timeout or task.time_limit
  214. soft_timeout = soft_timeout or task.soft_time_limit
  215. result = pool.apply_async(
  216. trace_task_ret,
  217. args=(self.name, uuid, self.args, kwargs, request),
  218. accept_callback=self.on_accepted,
  219. timeout_callback=self.on_timeout,
  220. callback=self.on_success,
  221. error_callback=self.on_failure,
  222. soft_timeout=soft_timeout,
  223. timeout=timeout,
  224. correlation_id=uuid,
  225. )
  226. # cannot create weakref to None
  227. self._apply_result = ref(result) if result is not None else result
  228. return result
  229. def execute(self, loglevel=None, logfile=None):
  230. """Execute the task in a :func:`~celery.app.trace.trace_task`.
  231. :keyword loglevel: The loglevel used by the task.
  232. :keyword logfile: The logfile used by the task.
  233. """
  234. if self.revoked():
  235. return
  236. # acknowledge task as being processed.
  237. if not self.task.acks_late:
  238. self.acknowledge()
  239. kwargs = self.kwargs
  240. if self.task.accept_magic_kwargs:
  241. kwargs = self.extend_with_default_kwargs()
  242. request = self.request_dict
  243. request.update({'loglevel': loglevel, 'logfile': logfile,
  244. 'hostname': self.hostname, 'is_eager': False,
  245. 'delivery_info': self.delivery_info})
  246. retval = trace_task(self.task, self.id, self.args, kwargs, request,
  247. hostname=self.hostname, loader=self.app.loader,
  248. app=self.app)
  249. self.acknowledge()
  250. return retval
  251. def maybe_expire(self):
  252. """If expired, mark the task as revoked."""
  253. if self.expires:
  254. now = datetime.now(self.expires.tzinfo)
  255. if now > self.expires:
  256. revoked_tasks.add(self.id)
  257. return True
  258. def terminate(self, pool, signal=None):
  259. signal = _signals.signum(signal or 'TERM')
  260. if self.time_start:
  261. pool.terminate_job(self.worker_pid, signal)
  262. self._announce_revoked('terminated', True, signal, False)
  263. else:
  264. self._terminate_on_ack = pool, signal
  265. if self._apply_result is not None:
  266. obj = self._apply_result() # is a weakref
  267. if obj is not None:
  268. obj.terminate(signal)
  269. def _announce_revoked(self, reason, terminated, signum, expired):
  270. task_ready(self)
  271. self.send_event('task-revoked',
  272. terminated=terminated, signum=signum, expired=expired)
  273. if self.store_errors:
  274. self.task.backend.mark_as_revoked(self.id, reason, request=self)
  275. self.acknowledge()
  276. self._already_revoked = True
  277. send_revoked(self.task, request=self,
  278. terminated=terminated, signum=signum, expired=expired)
  279. def revoked(self):
  280. """If revoked, skip task and mark state."""
  281. expired = False
  282. if self._already_revoked:
  283. return True
  284. if self.expires:
  285. expired = self.maybe_expire()
  286. if self.id in revoked_tasks:
  287. info('Discarding revoked task: %s[%s]', self.name, self.id)
  288. self._announce_revoked(
  289. 'expired' if expired else 'revoked', False, None, expired,
  290. )
  291. return True
  292. return False
  293. def send_event(self, type, **fields):
  294. if self.eventer and self.eventer.enabled and self.task.send_events:
  295. self.eventer.send(type, uuid=self.id, **fields)
  296. def on_accepted(self, pid, time_accepted):
  297. """Handler called when task is accepted by worker pool."""
  298. self.worker_pid = pid
  299. self.time_start = time_accepted
  300. task_accepted(self)
  301. if not self.task.acks_late:
  302. self.acknowledge()
  303. self.send_event('task-started')
  304. if _does_debug:
  305. debug('Task accepted: %s[%s] pid:%r', self.name, self.id, pid)
  306. if self._terminate_on_ack is not None:
  307. self.terminate(*self._terminate_on_ack)
  308. def on_timeout(self, soft, timeout):
  309. """Handler called if the task times out."""
  310. task_ready(self)
  311. if soft:
  312. warn('Soft time limit (%ss) exceeded for %s[%s]',
  313. timeout, self.name, self.id)
  314. exc = SoftTimeLimitExceeded(timeout)
  315. else:
  316. error('Hard time limit (%ss) exceeded for %s[%s]',
  317. timeout, self.name, self.id)
  318. exc = TimeLimitExceeded(timeout)
  319. if self.store_errors:
  320. self.task.backend.mark_as_failure(self.id, exc, request=self)
  321. if self.task.acks_late:
  322. self.acknowledge()
  323. def on_success(self, ret_value, now=None, nowfun=monotonic):
  324. """Handler called if the task was successfully processed."""
  325. if isinstance(ret_value, ExceptionInfo):
  326. if isinstance(ret_value.exception, (
  327. SystemExit, KeyboardInterrupt)):
  328. raise ret_value.exception
  329. return self.on_failure(ret_value)
  330. task_ready(self)
  331. if self.task.acks_late:
  332. self.acknowledge()
  333. if self.eventer and self.eventer.enabled:
  334. now = nowfun()
  335. runtime = self.time_start and (now - self.time_start) or 0
  336. self.send_event('task-succeeded',
  337. result=safe_repr(ret_value), runtime=runtime)
  338. if _does_info:
  339. now = now or nowfun()
  340. runtime = self.time_start and (now - self.time_start) or 0
  341. info(self.success_msg.strip(), {
  342. 'id': self.id, 'name': self.name,
  343. 'return_value': self.repr_result(ret_value),
  344. 'runtime': runtime})
  345. def on_retry(self, exc_info):
  346. """Handler called if the task should be retried."""
  347. if self.task.acks_late:
  348. self.acknowledge()
  349. self.send_event('task-retried',
  350. exception=safe_repr(exc_info.exception.exc),
  351. traceback=safe_str(exc_info.traceback))
  352. if _does_info:
  353. info(self.retry_msg.strip(),
  354. {'id': self.id, 'name': self.name,
  355. 'exc': exc_info.exception})
  356. def on_failure(self, exc_info):
  357. """Handler called if the task raised an exception."""
  358. task_ready(self)
  359. send_failed_event = True
  360. if not exc_info.internal:
  361. exc = exc_info.exception
  362. if isinstance(exc, Retry):
  363. return self.on_retry(exc_info)
  364. # These are special cases where the process would not have had
  365. # time to write the result.
  366. if self.store_errors:
  367. if isinstance(exc, WorkerLostError):
  368. self.task.backend.mark_as_failure(
  369. self.id, exc, request=self,
  370. )
  371. elif isinstance(exc, Terminated):
  372. self._announce_revoked(
  373. 'terminated', True, string(exc), False)
  374. send_failed_event = False # already sent revoked event
  375. # (acks_late) acknowledge after result stored.
  376. if self.task.acks_late:
  377. self.acknowledge()
  378. self._log_error(exc_info, send_failed_event=send_failed_event)
  379. def _log_error(self, einfo, send_failed_event=True):
  380. einfo.exception = get_pickled_exception(einfo.exception)
  381. eobj = einfo.exception
  382. exception, traceback, exc_info, internal, sargs, skwargs = (
  383. safe_repr(eobj),
  384. safe_str(einfo.traceback),
  385. einfo.exc_info,
  386. einfo.internal,
  387. safe_repr(self.args),
  388. safe_repr(self.kwargs),
  389. )
  390. task = self.task
  391. if task.throws and isinstance(eobj, task.throws):
  392. do_send_mail, severity, exc_info, description = (
  393. False, logging.INFO, None, 'raised expected',
  394. )
  395. else:
  396. do_send_mail, severity, description = (
  397. True, logging.ERROR, 'raised unexpected',
  398. )
  399. format = self.error_msg
  400. if internal:
  401. if isinstance(einfo.exception, MemoryError):
  402. raise MemoryError('Process got: %s' % (einfo.exception, ))
  403. elif isinstance(einfo.exception, Reject):
  404. format = self.rejected_msg
  405. description = 'rejected'
  406. severity = logging.WARN
  407. send_failed_event = False
  408. self.reject(requeue=einfo.exception.requeue)
  409. elif isinstance(einfo.exception, Ignore):
  410. format = self.ignored_msg
  411. description = 'ignored'
  412. severity = logging.INFO
  413. exc_info = None
  414. send_failed_event = False
  415. self.acknowledge()
  416. else:
  417. format = self.internal_error_msg
  418. description = 'INTERNAL ERROR'
  419. severity = logging.CRITICAL
  420. if send_failed_event:
  421. self.send_event(
  422. 'task-failed', exception=exception, traceback=traceback,
  423. )
  424. context = {
  425. 'hostname': self.hostname,
  426. 'id': self.id,
  427. 'name': self.name,
  428. 'exc': exception,
  429. 'traceback': traceback,
  430. 'args': sargs,
  431. 'kwargs': skwargs,
  432. 'description': description,
  433. }
  434. logger.log(severity, format.strip(), context,
  435. exc_info=exc_info,
  436. extra={'data': {'id': self.id,
  437. 'name': self.name,
  438. 'args': sargs,
  439. 'kwargs': skwargs,
  440. 'hostname': self.hostname,
  441. 'internal': internal}})
  442. if do_send_mail:
  443. task.send_error_email(context, einfo.exception)
  444. def acknowledge(self):
  445. """Acknowledge task."""
  446. if not self.acknowledged:
  447. self.on_ack(logger, self.connection_errors)
  448. self.acknowledged = True
  449. def reject(self, requeue=False):
  450. if not self.acknowledged:
  451. self.on_reject(logger, self.connection_errors, requeue)
  452. self.acknowledged = True
  453. def repr_result(self, result, maxlen=RESULT_MAXLEN):
  454. # 46 is the length needed to fit
  455. # 'the quick brown fox jumps over the lazy dog' :)
  456. if not isinstance(result, string_t):
  457. result = safe_repr(result)
  458. return truncate(result) if len(result) > maxlen else result
  459. def info(self, safe=False):
  460. return {'id': self.id,
  461. 'name': self.name,
  462. 'args': self.args if safe else safe_repr(self.args),
  463. 'kwargs': self.kwargs if safe else safe_repr(self.kwargs),
  464. 'hostname': self.hostname,
  465. 'time_start': self.time_start,
  466. 'acknowledged': self.acknowledged,
  467. 'delivery_info': self.delivery_info,
  468. 'worker_pid': self.worker_pid}
  469. def __str__(self):
  470. return '{0.name}[{0.id}]{1}{2}'.format(
  471. self,
  472. ' eta:[{0}]'.format(self.eta) if self.eta else '',
  473. ' expires:[{0}]'.format(self.expires) if self.expires else '',
  474. )
  475. shortinfo = __str__
  476. def __repr__(self):
  477. return '<{0} {1}: {2}>'.format(
  478. type(self).__name__, self.id,
  479. reprcall(self.name, self.args, self.kwargs))
  480. @property
  481. def tzlocal(self):
  482. if self._tzlocal is None:
  483. self._tzlocal = self.app.conf.CELERY_TIMEZONE
  484. return self._tzlocal
  485. @property
  486. def store_errors(self):
  487. return (not self.task.ignore_result or
  488. self.task.store_errors_even_if_ignored)
  489. @property
  490. def task_id(self):
  491. # XXX compat
  492. return self.id
  493. @task_id.setter # noqa
  494. def task_id(self, value):
  495. self.id = value
  496. @property
  497. def task_name(self):
  498. # XXX compat
  499. return self.name
  500. @task_name.setter # noqa
  501. def task_name(self, value):
  502. self.name = value
  503. @property
  504. def reply_to(self):
  505. # used by rpc backend when failures reported by parent process
  506. return self.request_dict['reply_to']
  507. @property
  508. def correlation_id(self):
  509. # used similarly to reply_to
  510. return self.request_dict['correlation_id']