Development of an internal social media platform with personalised dashboards for students
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

beat.py 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571
  1. # -*- coding: utf-8 -*-
  2. """
  3. celery.beat
  4. ~~~~~~~~~~~
  5. The periodic task scheduler.
  6. """
  7. from __future__ import absolute_import
  8. import errno
  9. import os
  10. import time
  11. import shelve
  12. import sys
  13. import traceback
  14. from threading import Event, Thread
  15. from billiard import ensure_multiprocessing
  16. from billiard.process import Process
  17. from billiard.common import reset_signals
  18. from kombu.utils import cached_property, reprcall
  19. from kombu.utils.functional import maybe_evaluate
  20. from . import __version__
  21. from . import platforms
  22. from . import signals
  23. from .five import items, reraise, values, monotonic
  24. from .schedules import maybe_schedule, crontab
  25. from .utils.imports import instantiate
  26. from .utils.timeutils import humanize_seconds
  27. from .utils.log import get_logger, iter_open_logger_fds
  28. __all__ = ['SchedulingError', 'ScheduleEntry', 'Scheduler',
  29. 'PersistentScheduler', 'Service', 'EmbeddedService']
  30. logger = get_logger(__name__)
  31. debug, info, error, warning = (logger.debug, logger.info,
  32. logger.error, logger.warning)
  33. DEFAULT_MAX_INTERVAL = 300 # 5 minutes
  34. class SchedulingError(Exception):
  35. """An error occured while scheduling a task."""
  36. class ScheduleEntry(object):
  37. """An entry in the scheduler.
  38. :keyword name: see :attr:`name`.
  39. :keyword schedule: see :attr:`schedule`.
  40. :keyword args: see :attr:`args`.
  41. :keyword kwargs: see :attr:`kwargs`.
  42. :keyword options: see :attr:`options`.
  43. :keyword last_run_at: see :attr:`last_run_at`.
  44. :keyword total_run_count: see :attr:`total_run_count`.
  45. :keyword relative: Is the time relative to when the server starts?
  46. """
  47. #: The task name
  48. name = None
  49. #: The schedule (run_every/crontab)
  50. schedule = None
  51. #: Positional arguments to apply.
  52. args = None
  53. #: Keyword arguments to apply.
  54. kwargs = None
  55. #: Task execution options.
  56. options = None
  57. #: The time and date of when this task was last scheduled.
  58. last_run_at = None
  59. #: Total number of times this task has been scheduled.
  60. total_run_count = 0
  61. def __init__(self, name=None, task=None, last_run_at=None,
  62. total_run_count=None, schedule=None, args=(), kwargs={},
  63. options={}, relative=False, app=None):
  64. self.app = app
  65. self.name = name
  66. self.task = task
  67. self.args = args
  68. self.kwargs = kwargs
  69. self.options = options
  70. self.schedule = maybe_schedule(schedule, relative, app=self.app)
  71. self.last_run_at = last_run_at or self._default_now()
  72. self.total_run_count = total_run_count or 0
  73. def _default_now(self):
  74. return self.schedule.now() if self.schedule else self.app.now()
  75. def _next_instance(self, last_run_at=None):
  76. """Return a new instance of the same class, but with
  77. its date and count fields updated."""
  78. return self.__class__(**dict(
  79. self,
  80. last_run_at=last_run_at or self._default_now(),
  81. total_run_count=self.total_run_count + 1,
  82. ))
  83. __next__ = next = _next_instance # for 2to3
  84. def __reduce__(self):
  85. return self.__class__, (
  86. self.name, self.task, self.last_run_at, self.total_run_count,
  87. self.schedule, self.args, self.kwargs, self.options,
  88. )
  89. def update(self, other):
  90. """Update values from another entry.
  91. Does only update "editable" fields (task, schedule, args, kwargs,
  92. options).
  93. """
  94. self.__dict__.update({'task': other.task, 'schedule': other.schedule,
  95. 'args': other.args, 'kwargs': other.kwargs,
  96. 'options': other.options})
  97. def is_due(self):
  98. """See :meth:`~celery.schedule.schedule.is_due`."""
  99. return self.schedule.is_due(self.last_run_at)
  100. def __iter__(self):
  101. return iter(items(vars(self)))
  102. def __repr__(self):
  103. return '<Entry: {0.name} {call} {0.schedule}'.format(
  104. self,
  105. call=reprcall(self.task, self.args or (), self.kwargs or {}),
  106. )
  107. class Scheduler(object):
  108. """Scheduler for periodic tasks.
  109. The :program:`celery beat` program may instantiate this class
  110. multiple times for introspection purposes, but then with the
  111. ``lazy`` argument set. It is important for subclasses to
  112. be idempotent when this argument is set.
  113. :keyword schedule: see :attr:`schedule`.
  114. :keyword max_interval: see :attr:`max_interval`.
  115. :keyword lazy: Do not set up the schedule.
  116. """
  117. Entry = ScheduleEntry
  118. #: The schedule dict/shelve.
  119. schedule = None
  120. #: Maximum time to sleep between re-checking the schedule.
  121. max_interval = DEFAULT_MAX_INTERVAL
  122. #: How often to sync the schedule (3 minutes by default)
  123. sync_every = 3 * 60
  124. #: How many tasks can be called before a sync is forced.
  125. sync_every_tasks = None
  126. _last_sync = None
  127. _tasks_since_sync = 0
  128. logger = logger # compat
  129. def __init__(self, app, schedule=None, max_interval=None,
  130. Publisher=None, lazy=False, sync_every_tasks=None, **kwargs):
  131. self.app = app
  132. self.data = maybe_evaluate({} if schedule is None else schedule)
  133. self.max_interval = (max_interval or
  134. app.conf.CELERYBEAT_MAX_LOOP_INTERVAL or
  135. self.max_interval)
  136. self.sync_every_tasks = (
  137. app.conf.CELERYBEAT_SYNC_EVERY if sync_every_tasks is None
  138. else sync_every_tasks)
  139. self.Publisher = Publisher or app.amqp.TaskProducer
  140. if not lazy:
  141. self.setup_schedule()
  142. def install_default_entries(self, data):
  143. entries = {}
  144. if self.app.conf.CELERY_TASK_RESULT_EXPIRES and \
  145. not self.app.backend.supports_autoexpire:
  146. if 'celery.backend_cleanup' not in data:
  147. entries['celery.backend_cleanup'] = {
  148. 'task': 'celery.backend_cleanup',
  149. 'schedule': crontab('0', '4', '*'),
  150. 'options': {'expires': 12 * 3600}}
  151. self.update_from_dict(entries)
  152. def maybe_due(self, entry, publisher=None):
  153. is_due, next_time_to_run = entry.is_due()
  154. if is_due:
  155. info('Scheduler: Sending due task %s (%s)', entry.name, entry.task)
  156. try:
  157. result = self.apply_async(entry, publisher=publisher)
  158. except Exception as exc:
  159. error('Message Error: %s\n%s',
  160. exc, traceback.format_stack(), exc_info=True)
  161. else:
  162. debug('%s sent. id->%s', entry.task, result.id)
  163. return next_time_to_run
  164. def tick(self):
  165. """Run a tick, that is one iteration of the scheduler.
  166. Executes all due tasks.
  167. """
  168. remaining_times = []
  169. try:
  170. for entry in values(self.schedule):
  171. next_time_to_run = self.maybe_due(entry, self.publisher)
  172. if next_time_to_run:
  173. remaining_times.append(next_time_to_run)
  174. except RuntimeError:
  175. pass
  176. return min(remaining_times + [self.max_interval])
  177. def should_sync(self):
  178. return (
  179. (not self._last_sync or
  180. (monotonic() - self._last_sync) > self.sync_every) or
  181. (self.sync_every_tasks and
  182. self._tasks_since_sync >= self.sync_every_tasks)
  183. )
  184. def reserve(self, entry):
  185. new_entry = self.schedule[entry.name] = next(entry)
  186. return new_entry
  187. def apply_async(self, entry, publisher=None, **kwargs):
  188. # Update timestamps and run counts before we actually execute,
  189. # so we have that done if an exception is raised (doesn't schedule
  190. # forever.)
  191. entry = self.reserve(entry)
  192. task = self.app.tasks.get(entry.task)
  193. try:
  194. if task:
  195. result = task.apply_async(entry.args, entry.kwargs,
  196. publisher=publisher,
  197. **entry.options)
  198. else:
  199. result = self.send_task(entry.task, entry.args, entry.kwargs,
  200. publisher=publisher,
  201. **entry.options)
  202. except Exception as exc:
  203. reraise(SchedulingError, SchedulingError(
  204. "Couldn't apply scheduled task {0.name}: {exc}".format(
  205. entry, exc=exc)), sys.exc_info()[2])
  206. finally:
  207. self._tasks_since_sync += 1
  208. if self.should_sync():
  209. self._do_sync()
  210. return result
  211. def send_task(self, *args, **kwargs):
  212. return self.app.send_task(*args, **kwargs)
  213. def setup_schedule(self):
  214. self.install_default_entries(self.data)
  215. def _do_sync(self):
  216. try:
  217. debug('beat: Synchronizing schedule...')
  218. self.sync()
  219. finally:
  220. self._last_sync = monotonic()
  221. self._tasks_since_sync = 0
  222. def sync(self):
  223. pass
  224. def close(self):
  225. self.sync()
  226. def add(self, **kwargs):
  227. entry = self.Entry(app=self.app, **kwargs)
  228. self.schedule[entry.name] = entry
  229. return entry
  230. def _maybe_entry(self, name, entry):
  231. if isinstance(entry, self.Entry):
  232. entry.app = self.app
  233. return entry
  234. return self.Entry(**dict(entry, name=name, app=self.app))
  235. def update_from_dict(self, dict_):
  236. self.schedule.update(dict(
  237. (name, self._maybe_entry(name, entry))
  238. for name, entry in items(dict_)))
  239. def merge_inplace(self, b):
  240. schedule = self.schedule
  241. A, B = set(schedule), set(b)
  242. # Remove items from disk not in the schedule anymore.
  243. for key in A ^ B:
  244. schedule.pop(key, None)
  245. # Update and add new items in the schedule
  246. for key in B:
  247. entry = self.Entry(**dict(b[key], name=key, app=self.app))
  248. if schedule.get(key):
  249. schedule[key].update(entry)
  250. else:
  251. schedule[key] = entry
  252. def _ensure_connected(self):
  253. # callback called for each retry while the connection
  254. # can't be established.
  255. def _error_handler(exc, interval):
  256. error('beat: Connection error: %s. '
  257. 'Trying again in %s seconds...', exc, interval)
  258. return self.connection.ensure_connection(
  259. _error_handler, self.app.conf.BROKER_CONNECTION_MAX_RETRIES
  260. )
  261. def get_schedule(self):
  262. return self.data
  263. def set_schedule(self, schedule):
  264. self.data = schedule
  265. schedule = property(get_schedule, set_schedule)
  266. @cached_property
  267. def connection(self):
  268. return self.app.connection()
  269. @cached_property
  270. def publisher(self):
  271. return self.Publisher(self._ensure_connected())
  272. @property
  273. def info(self):
  274. return ''
  275. class PersistentScheduler(Scheduler):
  276. persistence = shelve
  277. known_suffixes = ('', '.db', '.dat', '.bak', '.dir')
  278. _store = None
  279. def __init__(self, *args, **kwargs):
  280. self.schedule_filename = kwargs.get('schedule_filename')
  281. Scheduler.__init__(self, *args, **kwargs)
  282. def _remove_db(self):
  283. for suffix in self.known_suffixes:
  284. with platforms.ignore_errno(errno.ENOENT):
  285. os.remove(self.schedule_filename + suffix)
  286. def _open_schedule(self):
  287. return self.persistence.open(self.schedule_filename, writeback=True)
  288. def _destroy_open_corrupted_schedule(self, exc):
  289. error('Removing corrupted schedule file %r: %r',
  290. self.schedule_filename, exc, exc_info=True)
  291. self._remove_db()
  292. return self._open_schedule()
  293. def setup_schedule(self):
  294. try:
  295. self._store = self._open_schedule()
  296. # In some cases there may be different errors from a storage
  297. # backend for corrupted files. Example - DBPageNotFoundError
  298. # exception from bsddb. In such case the file will be
  299. # successfully opened but the error will be raised on first key
  300. # retrieving.
  301. self._store.keys()
  302. except Exception as exc:
  303. self._store = self._destroy_open_corrupted_schedule(exc)
  304. for _ in (1, 2):
  305. try:
  306. self._store['entries']
  307. except KeyError:
  308. # new schedule db
  309. try:
  310. self._store['entries'] = {}
  311. except KeyError as exc:
  312. self._store = self._destroy_open_corrupted_schedule(exc)
  313. continue
  314. else:
  315. if '__version__' not in self._store:
  316. warning('DB Reset: Account for new __version__ field')
  317. self._store.clear() # remove schedule at 2.2.2 upgrade.
  318. elif 'tz' not in self._store:
  319. warning('DB Reset: Account for new tz field')
  320. self._store.clear() # remove schedule at 3.0.8 upgrade
  321. elif 'utc_enabled' not in self._store:
  322. warning('DB Reset: Account for new utc_enabled field')
  323. self._store.clear() # remove schedule at 3.0.9 upgrade
  324. break
  325. tz = self.app.conf.CELERY_TIMEZONE
  326. stored_tz = self._store.get('tz')
  327. if stored_tz is not None and stored_tz != tz:
  328. warning('Reset: Timezone changed from %r to %r', stored_tz, tz)
  329. self._store.clear() # Timezone changed, reset db!
  330. utc = self.app.conf.CELERY_ENABLE_UTC
  331. stored_utc = self._store.get('utc_enabled')
  332. if stored_utc is not None and stored_utc != utc:
  333. choices = {True: 'enabled', False: 'disabled'}
  334. warning('Reset: UTC changed from %s to %s',
  335. choices[stored_utc], choices[utc])
  336. self._store.clear() # UTC setting changed, reset db!
  337. entries = self._store.setdefault('entries', {})
  338. self.merge_inplace(self.app.conf.CELERYBEAT_SCHEDULE)
  339. self.install_default_entries(self.schedule)
  340. self._store.update(__version__=__version__, tz=tz, utc_enabled=utc)
  341. self.sync()
  342. debug('Current schedule:\n' + '\n'.join(
  343. repr(entry) for entry in values(entries)))
  344. def get_schedule(self):
  345. return self._store['entries']
  346. def set_schedule(self, schedule):
  347. self._store['entries'] = schedule
  348. schedule = property(get_schedule, set_schedule)
  349. def sync(self):
  350. if self._store is not None:
  351. self._store.sync()
  352. def close(self):
  353. self.sync()
  354. self._store.close()
  355. @property
  356. def info(self):
  357. return ' . db -> {self.schedule_filename}'.format(self=self)
  358. class Service(object):
  359. scheduler_cls = PersistentScheduler
  360. def __init__(self, app, max_interval=None, schedule_filename=None,
  361. scheduler_cls=None):
  362. self.app = app
  363. self.max_interval = (max_interval or
  364. app.conf.CELERYBEAT_MAX_LOOP_INTERVAL)
  365. self.scheduler_cls = scheduler_cls or self.scheduler_cls
  366. self.schedule_filename = (
  367. schedule_filename or app.conf.CELERYBEAT_SCHEDULE_FILENAME)
  368. self._is_shutdown = Event()
  369. self._is_stopped = Event()
  370. def __reduce__(self):
  371. return self.__class__, (self.max_interval, self.schedule_filename,
  372. self.scheduler_cls, self.app)
  373. def start(self, embedded_process=False, drift=-0.010):
  374. info('beat: Starting...')
  375. debug('beat: Ticking with max interval->%s',
  376. humanize_seconds(self.scheduler.max_interval))
  377. signals.beat_init.send(sender=self)
  378. if embedded_process:
  379. signals.beat_embedded_init.send(sender=self)
  380. platforms.set_process_title('celery beat')
  381. try:
  382. while not self._is_shutdown.is_set():
  383. interval = self.scheduler.tick()
  384. interval = interval + drift if interval else interval
  385. if interval and interval > 0:
  386. debug('beat: Waking up %s.',
  387. humanize_seconds(interval, prefix='in '))
  388. time.sleep(interval)
  389. if self.scheduler.should_sync():
  390. self.scheduler._do_sync()
  391. except (KeyboardInterrupt, SystemExit):
  392. self._is_shutdown.set()
  393. finally:
  394. self.sync()
  395. def sync(self):
  396. self.scheduler.close()
  397. self._is_stopped.set()
  398. def stop(self, wait=False):
  399. info('beat: Shutting down...')
  400. self._is_shutdown.set()
  401. wait and self._is_stopped.wait() # block until shutdown done.
  402. def get_scheduler(self, lazy=False):
  403. filename = self.schedule_filename
  404. scheduler = instantiate(self.scheduler_cls,
  405. app=self.app,
  406. schedule_filename=filename,
  407. max_interval=self.max_interval,
  408. lazy=lazy)
  409. return scheduler
  410. @cached_property
  411. def scheduler(self):
  412. return self.get_scheduler()
  413. class _Threaded(Thread):
  414. """Embedded task scheduler using threading."""
  415. def __init__(self, app, **kwargs):
  416. super(_Threaded, self).__init__()
  417. self.app = app
  418. self.service = Service(app, **kwargs)
  419. self.daemon = True
  420. self.name = 'Beat'
  421. def run(self):
  422. self.app.set_current()
  423. self.service.start()
  424. def stop(self):
  425. self.service.stop(wait=True)
  426. try:
  427. ensure_multiprocessing()
  428. except NotImplementedError: # pragma: no cover
  429. _Process = None
  430. else:
  431. class _Process(Process): # noqa
  432. def __init__(self, app, **kwargs):
  433. super(_Process, self).__init__()
  434. self.app = app
  435. self.service = Service(app, **kwargs)
  436. self.name = 'Beat'
  437. def run(self):
  438. reset_signals(full=False)
  439. platforms.close_open_fds([
  440. sys.__stdin__, sys.__stdout__, sys.__stderr__,
  441. ] + list(iter_open_logger_fds()))
  442. self.app.set_default()
  443. self.app.set_current()
  444. self.service.start(embedded_process=True)
  445. def stop(self):
  446. self.service.stop()
  447. self.terminate()
  448. def EmbeddedService(app, max_interval=None, **kwargs):
  449. """Return embedded clock service.
  450. :keyword thread: Run threaded instead of as a separate process.
  451. Uses :mod:`multiprocessing` by default, if available.
  452. """
  453. if kwargs.pop('thread', False) or _Process is None:
  454. # Need short max interval to be able to stop thread
  455. # in reasonable time.
  456. return _Threaded(app, max_interval=1, **kwargs)
  457. return _Process(app, max_interval=max_interval, **kwargs)